Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
f5a2f00b
Commit
f5a2f00b
authored
Dec 08, 2022
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
唯一化客户端初始化补充 See merge request
!228
parents
6f8872eb
d8188e3c
Show whitespace changes
Inline
Side-by-side
Showing
32 changed files
with
72 additions
and
63 deletions
+72
-63
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
+3
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+3
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiTopicCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
+4
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
+9
-9
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+5
-5
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
+3
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/util/HttpClientUtils.java
+11
-2
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -44,7 +44,7 @@ public class BaiDuHotSearchCrawler {
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
View file @
f5a2f00b
...
...
@@ -55,7 +55,7 @@ public class BiliComprehensiveHotCrawler {
for
(
int
i
=
0
;
i
<
urlList
.
size
();
i
++)
{
Request
request
=
RequestUtils
.
wrapGet
(
urlList
.
get
(
i
));
//发送请求每次获取20条数据
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
fmt
.
format
(
date
)+
":第"
+
i
+
1
+
"次请求解析B站综合热门时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
View file @
f5a2f00b
...
...
@@ -45,7 +45,7 @@ public class BililiCrawler {
String
url
=
"https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站排行榜页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
@@ -136,7 +136,7 @@ public class BililiCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
{
System
.
setProperty
(
"https.protocols"
,
"TLSv1,TLSv1.1,TLSv1.2,SSLv3"
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
String
htmlBody
=
response
.
bodyString
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"v-wrap"
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
...
...
@@ -184,7 +184,7 @@ public class BililiCrawler {
String
url
=
"https://app.biliapi.com/x/v2/search/square?build=616050&limit=10"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站热搜页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -48,7 +48,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
cause
);
...
...
@@ -92,7 +92,7 @@ public class DouyinHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<
3
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜链接时出现问题:{}"
,
cause
);
...
...
@@ -127,7 +127,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音娱乐榜榜时出现问题:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -36,7 +36,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="
+
page
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热榜页面连接异常..."
,
cause
);
...
...
@@ -76,7 +76,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://shankapi.ifeng.com/autumn/sogouSearchHotword"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热搜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
View file @
f5a2f00b
...
...
@@ -48,7 +48,7 @@ public class HotSearch36KrCrawler {
headerMap
.
put
(
"sec-fetch-dest"
,
"empty"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -44,7 +44,7 @@ public class HuXiuHotSearchCrawler {
headerMap
.
put
(
"sec-ch-ua"
,
" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -45,7 +45,7 @@ public class KuaiShouHotSearchCrawler {
headerMap
.
put
(
"sec-ch-ua"
,
"Microsoft Edge\";v=\"107\", \"Chromium\";v=\"107\", \"Not=A?Brand\";v=\"24"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -37,7 +37,7 @@ public class MaiMaiHotSearchCrawler {
String
url
=
"https://open.taou.com/maimai/feed/v6/hot_list_entry/feeds?page_version=2&version=6.2.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"脉脉热榜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiTopicCrawler.java
View file @
f5a2f00b
...
...
@@ -30,7 +30,7 @@ public class MaiMaiTopicCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"脉脉话题页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -48,7 +48,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
...
@@ -92,7 +92,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
View file @
f5a2f00b
...
...
@@ -33,7 +33,7 @@ public class SouhuTopicCrawler {
String
url
=
"https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"搜狐话题页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -37,7 +37,7 @@ public class TaoBaoHotSearchCrawler {
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
@@ -56,7 +56,7 @@ public class TaoBaoHotSearchCrawler {
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
f5a2f00b
...
...
@@ -37,7 +37,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
...
...
@@ -101,7 +101,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -51,7 +51,7 @@ public class ToutiaoHotSearchCrawler {
String
jsUrl
=
"https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"
;
Request
jsRequest
=
RequestUtils
.
wrapGet
(
jsUrl
);
String
jsBody
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
jsRequest
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
jsRequest
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条实时热搜头部信息标识失败"
,
cause
);
...
...
@@ -67,7 +67,7 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
())
{
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析今日头条实时热搜时出现连接失败"
,
cause
);
...
...
@@ -167,7 +167,7 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
String
url
=
hotSearchList
.
getUrl
();
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析今日头条热搜详情页面出现连接失败"
,
cause
);
...
...
@@ -228,7 +228,7 @@ public class ToutiaoHotSearchCrawler {
headerMap
.
put
(
"User-Agent"
,
"com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
htmlBody
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条榜单出错"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -43,7 +43,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻实时热榜页面连接异常..."
,
cause
);
...
...
@@ -86,7 +86,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻跟贴热议页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
View file @
f5a2f00b
...
...
@@ -44,7 +44,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌总榜时出现连接失败"
,
cause
);
...
...
@@ -78,7 +78,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌汽车榜时出现连接失败"
,
cause
);
...
...
@@ -112,7 +112,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌手机榜时出现连接失败"
,
cause
);
...
...
@@ -146,7 +146,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌美妆榜时出现连接失败"
,
cause
);
...
...
@@ -180,7 +180,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌奢侈品榜时出现连接失败"
,
cause
);
...
...
@@ -214,7 +214,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌食品饮料榜时出现连接失败"
,
cause
);
...
...
@@ -248,7 +248,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌家电榜时出现连接失败"
,
cause
);
...
...
@@ -282,7 +282,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
...
@@ -316,7 +316,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
View file @
f5a2f00b
...
...
@@ -38,7 +38,7 @@ public class WeiBoSearchBoxHotWordsCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博搜索框热词时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
View file @
f5a2f00b
...
...
@@ -37,7 +37,7 @@ public class WeiBoSearchHotWordsCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -46,7 +46,7 @@ public class WeiShiHotSearchCrawler {
headerMap
.
put
(
"Host"
,
"api.weishi.qq.com"
);
Request
request
=
RequestUtils
.
wrapPost
(
url
,
headerMap
,
RequestBody
.
create
(
MediaType
.
get
(
"application/json"
),
"{\"req_body\":{\"hotRankID\":\"\",\"attachInfo\":\"\",\"hotRankType\":1,\"sourceID\":\"WSSearchH5\"}}"
));
for
(
int
count
=
0
;
count
<=
3
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微视热榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
View file @
f5a2f00b
...
...
@@ -39,7 +39,7 @@ public class WeiboEntertainmentCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -69,7 +69,7 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
i
==
2
)
{
return
list
;
...
...
@@ -265,7 +265,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
@@ -363,7 +363,7 @@ public class WeiboHotSearchCrawler {
String
url
=
"https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜时出现连接失败"
,
cause
);
...
...
@@ -421,7 +421,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
...
@@ -519,7 +519,7 @@ public class WeiboHotSearchCrawler {
//该cookie有效期一年,微博pc端获取游客cookie链接 https://s.weibo.com/top/summary?cate=realtimehot
headerMap
.
put
(
"Cookie"
,
"SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
View file @
f5a2f00b
...
...
@@ -50,7 +50,7 @@ public class WeiboNewsCrawler {
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
List
<
HotSearchList
>
result
=
new
ArrayList
();
//发送第一次请求获取前20条数据
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"第一次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
@@ -72,7 +72,7 @@ public class WeiboNewsCrawler {
continue
;
}
//发送第二次请求获取中间20条数据
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request2
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request2
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"第二次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
@@ -95,7 +95,7 @@ public class WeiboNewsCrawler {
continue
;
}
//发送第三次请求获取最后10条数据
Response
response2
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request3
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response2
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request3
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response2
.
hasCause
()){
Throwable
cause
=
response2
.
cause
();
log
.
error
(
"第三次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
View file @
f5a2f00b
...
...
@@ -44,7 +44,7 @@ public class WeiboOutCircleCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博出圈榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
f5a2f00b
...
...
@@ -66,7 +66,7 @@ public class WeiboSuperTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
@@ -142,7 +142,7 @@ public class WeiboSuperTopicCrawler {
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
f5a2f00b
...
...
@@ -141,7 +141,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
@@ -230,7 +230,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
View file @
f5a2f00b
...
...
@@ -43,7 +43,7 @@ public class WeiboVideoCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博视频榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -41,7 +41,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
JSONObject
jsonObject
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热榜页面连接异常..."
,
cause
);
...
...
@@ -117,7 +117,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
JSONArray
dataJson
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热点页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -43,7 +43,7 @@ public class ZhihuChildHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
}
else
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -102,7 +102,7 @@ public class ZhihuHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
5
;
x
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
cause
);
...
...
@@ -175,7 +175,7 @@ public class ZhihuHotSearchCrawler {
Map
.
put
(
"cookie"
,
"_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
Map
);
try
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"单条知乎热搜数据页面连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
View file @
f5a2f00b
...
...
@@ -27,7 +27,7 @@ public class ZhihuTopicSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
getHttpBoot
()
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/HttpClientUtils.java
View file @
f5a2f00b
...
...
@@ -33,12 +33,21 @@ public class HttpClientUtils {
//private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
public
static
final
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
2
).
build
();
static
boolean
isInit
=
false
;
static
{
init
();
isInit
=
true
;
}
public
static
HttpBoot
getHttpBoot
(){
if
(!
isInit
){
init
();
isInit
=
true
;
}
return
httpBoot
;
}
public
static
void
init
(){
public
static
synchronized
void
init
(){
ApplicationConfig
applicationConfig
=
new
ApplicationConfig
();
applicationConfig
.
setName
(
"hot_search-project"
);
RegistryConfig
registryConfig
=
new
RegistryConfig
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment