Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
ea714ae2
Commit
ea714ae2
authored
Nov 09, 2022
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
Working See merge request
!210
parents
a8cc6e0b
3b65725a
Hide whitespace changes
Inline
Side-by-side
Showing
48 changed files
with
361 additions
and
173 deletions
+361
-173
pom.xml
+14
-2
src/main/java/com/zhiwei/searchhotcrawler/config/ProxyConfig.java
+37
-18
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
+21
-25
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
+10
-9
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+6
-5
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+40
-11
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchRunTest.java
+3
-4
src/main/java/com/zhiwei/searchhotcrawler/test/HuXiuHotSearchCrawlerTest.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/test/Job51Test.java
+5
-5
src/main/java/com/zhiwei/searchhotcrawler/test/KuaiShouHotSearchCrawlerTest.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchCrawlerTest.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoRunTest.java
+3
-4
src/main/java/com/zhiwei/searchhotcrawler/test/WeiboEntertainmentCrawlerTest.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+10
-5
src/main/resources/proxyip.properties
+12
-4
src/test/java/InfoTest/InfoTest.java
+4
-4
src/test/java/baiduTest/BaiduTest.java
+5
-5
src/test/java/hotSaerchTest/HotSearchTest.java
+14
-14
src/test/java/proxy/ProxyTest.java
+86
-0
src/test/java/weiboTest/WeiboHotSearchTest.java
+11
-10
No files found.
pom.xml
View file @
ea714ae2
...
@@ -15,6 +15,11 @@
...
@@ -15,6 +15,11 @@
<log4j.version>
2.15.0
</log4j.version>
<log4j.version>
2.15.0
</log4j.version>
<commons-lang3.version>
3.12.0
</commons-lang3.version>
<commons-lang3.version>
3.12.0
</commons-lang3.version>
<http-boot.version>
0.1.0.8-SNAPSHOT
</http-boot.version>
<cynomys-consumer.version>
0.0.5-SNAPSHOT
</cynomys-consumer.version>
<proxy-client.version>
2.0.1-SNAPSHOT
</proxy-client.version>
<maven.compiler.source>
8
</maven.compiler.source>
<maven.compiler.source>
8
</maven.compiler.source>
<maven.compiler.target>
8
</maven.compiler.target>
<maven.compiler.target>
8
</maven.compiler.target>
</properties>
</properties>
...
@@ -56,7 +61,7 @@
...
@@ -56,7 +61,7 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.http
</groupId>
<groupId>
com.zhiwei.http
</groupId>
<artifactId>
http-boot
</artifactId>
<artifactId>
http-boot
</artifactId>
<version>
0.0.8.2-SNAPSHOT
</version>
<version>
${http-boot.version}
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
org.apache.commons
</groupId>
<groupId>
org.apache.commons
</groupId>
...
@@ -67,8 +72,15 @@
...
@@ -67,8 +72,15 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
proxy-client
</artifactId>
<artifactId>
proxy-client
</artifactId>
<version>
1.1.5-SNAPSHOT
</version>
<version>
${proxy-client.version}
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.network
</groupId>
<artifactId>
cynomys-consumer
</artifactId>
<version>
${cynomys-consumer.version}
</version>
</dependency>
</dependency>
<!-- https://mvnrepository.com/artifact/org.conscrypt/conscrypt-openjdk-uber -->
<!-- https://mvnrepository.com/artifact/org.conscrypt/conscrypt-openjdk-uber -->
<dependency>
<dependency>
<groupId>
org.conscrypt
</groupId>
<groupId>
org.conscrypt
</groupId>
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/ProxyConfig.java
View file @
ea714ae2
...
@@ -4,23 +4,42 @@ import java.io.InputStream;
...
@@ -4,23 +4,42 @@ import java.io.InputStream;
import
java.util.Properties
;
import
java.util.Properties
;
public
class
ProxyConfig
{
public
class
ProxyConfig
{
static
{
static
{
Properties
conf
=
null
;
Properties
conf
=
null
;
try
{
try
{
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"proxyip.properties"
);
.
getResourceAsStream
(
"proxyip.properties"
);
conf
=
new
Properties
();
conf
=
new
Properties
();
conf
.
load
(
is
);
conf
.
load
(
is
);
is
.
close
();
is
.
close
();
registry
=
conf
.
getProperty
(
"registry"
);
localRegistry
=
conf
.
getProperty
(
"local.registry"
);
group
=
conf
.
getProperty
(
"group"
);
localGroup
=
conf
.
getProperty
(
"local.group"
);
}
catch
(
Exception
e
)
{
localUsername
=
conf
.
getProperty
(
"local.username"
);
e
.
printStackTrace
();
localPassword
=
conf
.
getProperty
(
"local.password"
);
}
}
hangzhouRegistry
=
conf
.
getProperty
(
"hangzhou.registry"
);
hangzhouGroup
=
conf
.
getProperty
(
"hangzhou.group"
);
hangzhouUsername
=
conf
.
getProperty
(
"hangzhou.username"
);
public
static
String
registry
;
hangzhouPassword
=
conf
.
getProperty
(
"hangzhou.password"
);
public
static
String
group
;
isLocal
=
Boolean
.
parseBoolean
(
conf
.
getProperty
(
"isLocal"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
public
static
boolean
isLocal
;
public
static
String
localRegistry
;
public
static
String
localGroup
;
public
static
String
localUsername
;
public
static
String
localPassword
;
public
static
String
hangzhouRegistry
;
public
static
String
hangzhouGroup
;
public
static
String
hangzhouUsername
;
public
static
String
hangzhouPassword
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
ea714ae2
...
@@ -6,6 +6,7 @@ import java.util.*;
...
@@ -6,6 +6,7 @@ import java.util.*;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
...
@@ -40,7 +41,7 @@ public class BaiDuHotSearchCrawler {
...
@@ -40,7 +41,7 @@ public class BaiDuHotSearchCrawler {
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -53,7 +54,7 @@ public class BiliComprehensiveHotCrawler {
...
@@ -53,7 +54,7 @@ public class BiliComprehensiveHotCrawler {
for
(
int
i
=
0
;
i
<
urlList
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
urlList
.
size
();
i
++)
{
Request
request
=
RequestUtils
.
wrapGet
(
urlList
.
get
(
i
));
Request
request
=
RequestUtils
.
wrapGet
(
urlList
.
get
(
i
));
//发送请求每次获取20条数据
//发送请求每次获取20条数据
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
fmt
.
format
(
date
)+
":第"
+
i
+
1
+
"次请求解析B站综合热门时出现连接失败"
,
cause
);
log
.
error
(
fmt
.
format
(
date
)+
":第"
+
i
+
1
+
"次请求解析B站综合热门时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -43,7 +44,7 @@ public class BililiCrawler {
...
@@ -43,7 +44,7 @@ public class BililiCrawler {
String
url
=
"https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all"
;
String
url
=
"https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站排行榜页面连接失败"
,
cause
.
fillInStackTrace
());
log
.
error
(
"B站排行榜页面连接失败"
,
cause
.
fillInStackTrace
());
...
@@ -133,7 +134,7 @@ public class BililiCrawler {
...
@@ -133,7 +134,7 @@ public class BililiCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
{
try
{
System
.
setProperty
(
"https.protocols"
,
"TLSv1,TLSv1.1,TLSv1.2,SSLv3"
);
System
.
setProperty
(
"https.protocols"
,
"TLSv1,TLSv1.1,TLSv1.2,SSLv3"
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
String
htmlBody
=
response
.
bodyString
();
String
htmlBody
=
response
.
bodyString
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"v-wrap"
))
{
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"v-wrap"
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
...
@@ -181,7 +182,7 @@ public class BililiCrawler {
...
@@ -181,7 +182,7 @@ public class BililiCrawler {
String
url
=
"https://app.biliapi.com/x/v2/search/square?build=616050&limit=10"
;
String
url
=
"https://app.biliapi.com/x/v2/search/square?build=616050&limit=10"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站热搜页面连接失败"
,
cause
.
fillInStackTrace
());
log
.
error
(
"B站热搜页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import java.util.*;
...
@@ -5,6 +5,7 @@ import java.util.*;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
...
@@ -46,7 +47,7 @@ public class DouyinHotSearchCrawler {
...
@@ -46,7 +47,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
cause
);
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
cause
);
...
@@ -90,7 +91,7 @@ public class DouyinHotSearchCrawler {
...
@@ -90,7 +91,7 @@ public class DouyinHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<
3
;
x
++)
{
for
(
int
x
=
0
;
x
<
3
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜链接时出现问题:{}"
,
cause
);
log
.
debug
(
"获取抖音热搜榜链接时出现问题:{}"
,
cause
);
...
@@ -125,7 +126,7 @@ public class DouyinHotSearchCrawler {
...
@@ -125,7 +126,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900"
;
String
url
=
"https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音娱乐榜榜时出现问题:{}"
,
cause
);
log
.
debug
(
"获取抖音娱乐榜榜时出现问题:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -34,7 +35,7 @@ public class FengHuangSearchCrawler {
...
@@ -34,7 +35,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="
+
page
;
String
url
=
"https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="
+
page
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热榜页面连接异常..."
,
cause
);
log
.
error
(
"凤凰新闻热榜页面连接异常..."
,
cause
);
...
@@ -74,7 +75,7 @@ public class FengHuangSearchCrawler {
...
@@ -74,7 +75,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://shankapi.ifeng.com/autumn/sogouSearchHotword"
;
String
url
=
"https://shankapi.ifeng.com/autumn/sogouSearchHotword"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热搜页面连接异常..."
,
cause
);
log
.
error
(
"凤凰新闻热搜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
View file @
ea714ae2
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -46,7 +47,7 @@ public class HotSearch36KrCrawler {
...
@@ -46,7 +47,7 @@ public class HotSearch36KrCrawler {
headerMap
.
put
(
"sec-fetch-dest"
,
"empty"
);
headerMap
.
put
(
"sec-fetch-dest"
,
"empty"
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -42,7 +43,7 @@ public class HuXiuHotSearchCrawler {
...
@@ -42,7 +43,7 @@ public class HuXiuHotSearchCrawler {
headerMap
.
put
(
"sec-ch-ua"
,
" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101"
);
headerMap
.
put
(
"sec-ch-ua"
,
" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101"
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -36,7 +37,7 @@ public class KuaiShouHotSearchCrawler {
...
@@ -36,7 +37,7 @@ public class KuaiShouHotSearchCrawler {
String
url
=
"https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791"
;
String
url
=
"https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -35,7 +36,7 @@ public class MaiMaiHotSearchCrawler {
...
@@ -35,7 +36,7 @@ public class MaiMaiHotSearchCrawler {
String
url
=
"https://open.taou.com/maimai/feed/v6/hot_posts_list?tab=profession&count=15&version=5.3.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336"
;
String
url
=
"https://open.taou.com/maimai/feed/v6/hot_posts_list?tab=profession&count=15&version=5.3.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"脉脉热榜页面连接异常..."
,
cause
);
log
.
error
(
"脉脉热榜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
ea714ae2
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.util.HeaderTool
;
import
com.zhiwei.searchhotcrawler.util.HeaderTool
;
...
@@ -46,7 +47,7 @@ public class SougoHotSearchCrawler {
...
@@ -46,7 +47,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
@@ -90,7 +91,7 @@ public class SougoHotSearchCrawler {
...
@@ -90,7 +91,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -31,7 +32,7 @@ public class SouhuTopicCrawler {
...
@@ -31,7 +32,7 @@ public class SouhuTopicCrawler {
String
url
=
"https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4"
;
String
url
=
"https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"搜狐话题页面连接失败"
,
cause
.
fillInStackTrace
());
log
.
error
(
"搜狐话题页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -35,7 +36,7 @@ public class TaoBaoHotSearchCrawler {
...
@@ -35,7 +36,7 @@ public class TaoBaoHotSearchCrawler {
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
String
token
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
@@ -54,7 +55,7 @@ public class TaoBaoHotSearchCrawler {
...
@@ -54,7 +55,7 @@ public class TaoBaoHotSearchCrawler {
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -35,7 +36,7 @@ public class TengXunCrawler {
...
@@ -35,7 +36,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
response
.
cause
().
printStackTrace
();
...
@@ -99,7 +100,7 @@ public class TengXunCrawler {
...
@@ -99,7 +100,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
response
.
cause
().
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
View file @
ea714ae2
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -49,7 +50,7 @@ public class ToutiaoHotSearchCrawler {
...
@@ -49,7 +50,7 @@ public class ToutiaoHotSearchCrawler {
String
jsUrl
=
"https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"
;
String
jsUrl
=
"https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"
;
Request
jsRequest
=
RequestUtils
.
wrapGet
(
jsUrl
);
Request
jsRequest
=
RequestUtils
.
wrapGet
(
jsUrl
);
String
jsBody
=
null
;
String
jsBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
jsRequest
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
jsRequest
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条实时热搜头部信息标识失败"
,
cause
);
log
.
error
(
"获取今日头条实时热搜头部信息标识失败"
,
cause
);
...
@@ -65,7 +66,7 @@ public class ToutiaoHotSearchCrawler {
...
@@ -65,7 +66,7 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
())
{
if
(
response1
.
hasCause
())
{
Throwable
cause
=
response1
.
cause
();
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析今日头条实时热搜时出现连接失败"
,
cause
);
log
.
error
(
"解析今日头条实时热搜时出现连接失败"
,
cause
);
...
@@ -165,34 +166,29 @@ public class ToutiaoHotSearchCrawler {
...
@@ -165,34 +166,29 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
String
url
=
hotSearchList
.
getUrl
();
String
url
=
hotSearchList
.
getUrl
();
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
i
=
0
;
i
<=
5
;
i
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析今日头条热搜详情页面出现连接失败"
,
cause
);
log
.
error
(
"解析今日头条热搜详情页面出现连接失败"
,
cause
);
}
else
{
}
else
{
htmlBody
=
response
.
bodyString
();
htmlBody
=
response
.
bodyString
();
}
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)&&
htmlBody
.
contains
(
"data"
))
{
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
String
substring
=
htmlBody
.
substring
(
htmlBody
.
indexOf
(
"read_count"
)+
12
,
htmlBody
.
indexOf
(
"search_bar_controll"
));
Elements
elements
=
document
.
select
(
".result-content .cs-view .cs-topone-tail .cs-view .margin-bottom-m .margin-left-m"
);
String
s
=
substring
.
split
(
","
)[
0
];
if
(
Objects
.
nonNull
(
elements
)
&&
!
elements
.
isEmpty
())
{
Long
commentCount
=
Long
.
valueOf
(
s
);
Element
element
=
elements
.
first
();
hotSearchList
.
setCommentCount
(
commentCount
);
String
readCount
=
element
.
text
().
replaceAll
(
"阅读"
,
""
);
hotSearchListDAO
.
updateTouTiaoReadCount
(
hotSearchList
);
Long
count
=
TipsUtils
.
getHotCount
(
readCount
);
return
hotSearchList
;
log
.
info
(
"{},阅读量:{}"
,
hotSearchList
.
getName
(),
count
);
}
catch
(
Exception
e
)
{
hotSearchList
.
setCommentCount
(
count
);
e
.
printStackTrace
();
hotSearchListDAO
.
updateTouTiaoReadCount
(
hotSearchList
);
return
hotSearchList
;
}
}
}
ZhiWeiTools
.
sleep
(
1000L
);
}
}
}
}
return
hotSearchList
;
return
hotSearchList
;
}
}
/**
/**
* 热搜类型
* 热搜类型
*
*
...
@@ -231,7 +227,7 @@ public class ToutiaoHotSearchCrawler {
...
@@ -231,7 +227,7 @@ public class ToutiaoHotSearchCrawler {
headerMap
.
put
(
"User-Agent"
,
"com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br"
);
headerMap
.
put
(
"User-Agent"
,
"com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条榜单出错"
,
cause
);
log
.
error
(
"获取今日头条榜单出错"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -41,7 +42,7 @@ public class WangYiHotSearchCrawler {
...
@@ -41,7 +42,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻实时热榜页面连接异常..."
,
cause
);
log
.
error
(
"网易新闻实时热榜页面连接异常..."
,
cause
);
...
@@ -84,7 +85,7 @@ public class WangYiHotSearchCrawler {
...
@@ -84,7 +85,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻跟贴热议页面连接异常..."
,
cause
);
log
.
error
(
"网易新闻跟贴热议页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -42,7 +43,7 @@ public class WeiBoBrandCrawler {
...
@@ -42,7 +43,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌总榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌总榜时出现连接失败"
,
cause
);
...
@@ -76,7 +77,7 @@ public class WeiBoBrandCrawler {
...
@@ -76,7 +77,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌汽车榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌汽车榜时出现连接失败"
,
cause
);
...
@@ -110,7 +111,7 @@ public class WeiBoBrandCrawler {
...
@@ -110,7 +111,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌手机榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌手机榜时出现连接失败"
,
cause
);
...
@@ -144,7 +145,7 @@ public class WeiBoBrandCrawler {
...
@@ -144,7 +145,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌美妆榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌美妆榜时出现连接失败"
,
cause
);
...
@@ -178,7 +179,7 @@ public class WeiBoBrandCrawler {
...
@@ -178,7 +179,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌奢侈品榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌奢侈品榜时出现连接失败"
,
cause
);
...
@@ -212,7 +213,7 @@ public class WeiBoBrandCrawler {
...
@@ -212,7 +213,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌食品饮料榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌食品饮料榜时出现连接失败"
,
cause
);
...
@@ -246,7 +247,7 @@ public class WeiBoBrandCrawler {
...
@@ -246,7 +247,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌家电榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌家电榜时出现连接失败"
,
cause
);
...
@@ -280,7 +281,7 @@ public class WeiBoBrandCrawler {
...
@@ -280,7 +281,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
@@ -314,7 +315,7 @@ public class WeiBoBrandCrawler {
...
@@ -314,7 +315,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
...
@@ -36,7 +37,7 @@ public class WeiBoSearchBoxHotWordsCrawler {
...
@@ -36,7 +37,7 @@ public class WeiBoSearchBoxHotWordsCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博搜索框热词时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析微博搜索框热词时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
...
@@ -35,7 +36,7 @@ public class WeiBoSearchHotWordsCrawler {
...
@@ -35,7 +36,7 @@ public class WeiBoSearchHotWordsCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -44,7 +45,7 @@ public class WeiShiHotSearchCrawler {
...
@@ -44,7 +45,7 @@ public class WeiShiHotSearchCrawler {
headerMap
.
put
(
"Host"
,
"api.weishi.qq.com"
);
headerMap
.
put
(
"Host"
,
"api.weishi.qq.com"
);
Request
request
=
RequestUtils
.
wrapPost
(
url
,
headerMap
,
RequestBody
.
create
(
MediaType
.
get
(
"application/json"
),
"{\"req_body\":{\"hotRankID\":\"\",\"attachInfo\":\"\",\"hotRankType\":1,\"sourceID\":\"WSSearchH5\"}}"
));
Request
request
=
RequestUtils
.
wrapPost
(
url
,
headerMap
,
RequestBody
.
create
(
MediaType
.
get
(
"application/json"
),
"{\"req_body\":{\"hotRankID\":\"\",\"attachInfo\":\"\",\"hotRankType\":1,\"sourceID\":\"WSSearchH5\"}}"
));
for
(
int
count
=
0
;
count
<=
3
;
count
++)
{
for
(
int
count
=
0
;
count
<=
3
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微视热榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微视热榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -37,7 +38,7 @@ public class WeiboEntertainmentCrawler {
...
@@ -37,7 +38,7 @@ public class WeiboEntertainmentCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
ea714ae2
...
@@ -12,6 +12,7 @@ import java.util.stream.Collectors;
...
@@ -12,6 +12,7 @@ import java.util.stream.Collectors;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSON
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.*
;
import
com.zhiwei.searchhotcrawler.bean.*
;
...
@@ -67,7 +68,7 @@ public class WeiboHotSearchCrawler {
...
@@ -67,7 +68,7 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
if
(
i
==
2
)
{
if
(
i
==
2
)
{
return
list
;
return
list
;
...
@@ -263,7 +264,7 @@ public class WeiboHotSearchCrawler {
...
@@ -263,7 +264,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
@@ -361,7 +362,7 @@ public class WeiboHotSearchCrawler {
...
@@ -361,7 +362,7 @@ public class WeiboHotSearchCrawler {
String
url
=
"https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot"
;
String
url
=
"https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博热搜时出现连接失败"
,
cause
);
...
@@ -419,7 +420,7 @@ public class WeiboHotSearchCrawler {
...
@@ -419,7 +420,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
@@ -517,7 +518,7 @@ public class WeiboHotSearchCrawler {
...
@@ -517,7 +518,7 @@ public class WeiboHotSearchCrawler {
//该cookie有效期一年,微博pc端获取游客cookie链接 https://s.weibo.com/top/summary?cate=realtimehot
//该cookie有效期一年,微博pc端获取游客cookie链接 https://s.weibo.com/top/summary?cate=realtimehot
headerMap
.
put
(
"Cookie"
,
"SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx"
);
headerMap
.
put
(
"Cookie"
,
"SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -48,7 +49,7 @@ public class WeiboNewsCrawler {
...
@@ -48,7 +49,7 @@ public class WeiboNewsCrawler {
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
List
<
HotSearchList
>
result
=
new
ArrayList
();
List
<
HotSearchList
>
result
=
new
ArrayList
();
//发送第一次请求获取前20条数据
//发送第一次请求获取前20条数据
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"第一次请求解析微博要闻榜时出现连接失败"
,
cause
);
log
.
error
(
"第一次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
@@ -70,7 +71,7 @@ public class WeiboNewsCrawler {
...
@@ -70,7 +71,7 @@ public class WeiboNewsCrawler {
continue
;
continue
;
}
}
//发送第二次请求获取中间20条数据
//发送第二次请求获取中间20条数据
Response
response1
=
httpBoot
.
syncCall
(
request2
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
httpBoot
.
syncCall
(
request2
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"第二次请求解析微博要闻榜时出现连接失败"
,
cause
);
log
.
error
(
"第二次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
@@ -93,7 +94,7 @@ public class WeiboNewsCrawler {
...
@@ -93,7 +94,7 @@ public class WeiboNewsCrawler {
continue
;
continue
;
}
}
//发送第三次请求获取最后10条数据
//发送第三次请求获取最后10条数据
Response
response2
=
httpBoot
.
syncCall
(
request3
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response2
=
httpBoot
.
syncCall
(
request3
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response2
.
hasCause
()){
if
(
response2
.
hasCause
()){
Throwable
cause
=
response2
.
cause
();
Throwable
cause
=
response2
.
cause
();
log
.
error
(
"第三次请求解析微博要闻榜时出现连接失败"
,
cause
);
log
.
error
(
"第三次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
View file @
ea714ae2
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -42,7 +43,7 @@ public class WeiboOutCircleCrawler {
...
@@ -42,7 +43,7 @@ public class WeiboOutCircleCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博出圈榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博出圈榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
ea714ae2
...
@@ -10,6 +10,7 @@ import java.util.Objects;
...
@@ -10,6 +10,7 @@ import java.util.Objects;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic
;
import
com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic
;
...
@@ -64,7 +65,7 @@ public class WeiboSuperTopicCrawler {
...
@@ -64,7 +65,7 @@ public class WeiboSuperTopicCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
cause
);
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
@@ -140,7 +141,7 @@ public class WeiboSuperTopicCrawler {
...
@@ -140,7 +141,7 @@ public class WeiboSuperTopicCrawler {
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
cause
);
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -139,7 +140,7 @@ public class WeiboTopicCrawler {
...
@@ -139,7 +140,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
@@ -228,7 +229,7 @@ public class WeiboTopicCrawler {
...
@@ -228,7 +229,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -41,7 +42,7 @@ public class WeiboVideoCrawler {
...
@@ -41,7 +42,7 @@ public class WeiboVideoCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博视频榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博视频榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -39,7 +40,7 @@ public class XinLangHotSearchCrawler {
...
@@ -39,7 +40,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
JSONObject
jsonObject
=
null
;
JSONObject
jsonObject
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热榜页面连接异常..."
,
cause
);
log
.
error
(
"新浪热榜页面连接异常..."
,
cause
);
...
@@ -115,7 +116,7 @@ public class XinLangHotSearchCrawler {
...
@@ -115,7 +116,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
JSONArray
dataJson
=
null
;
JSONArray
dataJson
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热点页面连接异常..."
,
cause
);
log
.
error
(
"新浪热点页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
ea714ae2
...
@@ -7,6 +7,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -7,6 +7,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -41,7 +42,7 @@ public class ZhihuChildHotSearchCrawler {
...
@@ -41,7 +42,7 @@ public class ZhihuChildHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//采集为空最多重试3次
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
response
.
cause
().
printStackTrace
();
}
else
{
}
else
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import java.util.*;
...
@@ -5,6 +5,7 @@ import java.util.*;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
io.netty.handler.ssl.SslProvider
;
import
io.netty.handler.ssl.SslProvider
;
...
@@ -99,7 +100,7 @@ public class ZhihuHotSearchCrawler {
...
@@ -99,7 +100,7 @@ public class ZhihuHotSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
5
;
x
++)
{
for
(
int
x
=
0
;
x
<=
5
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
cause
);
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
cause
);
...
@@ -170,7 +171,7 @@ public class ZhihuHotSearchCrawler {
...
@@ -170,7 +171,7 @@ public class ZhihuHotSearchCrawler {
Map
.
put
(
"cookie"
,
"_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"
);
Map
.
put
(
"cookie"
,
"_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
Map
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
Map
);
try
{
try
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"单条知乎热搜数据页面连接失败"
,
cause
);
log
.
error
(
"单条知乎热搜数据页面连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
View file @
ea714ae2
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -25,7 +26,7 @@ public class ZhihuTopicSearchCrawler {
...
@@ -25,7 +26,7 @@ public class ZhihuTopicSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
ea714ae2
package
com
.
zhiwei
.
searchhotcrawler
.
run
;
package
com
.
zhiwei
.
searchhotcrawler
.
run
;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.http.proxy.CynomysFactory
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.network.cynomys.consumer.CynomysConsumer
;
import
com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.*
;
import
com.zhiwei.searchhotcrawler.timer.*
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
org.apache.dubbo.config.ApplicationConfig
;
import
org.apache.dubbo.config.ConsumerConfig
;
import
org.apache.dubbo.config.RegistryConfig
;
import
org.springframework.context.ApplicationContext
;
import
org.springframework.context.ApplicationContext
;
import
org.springframework.context.support.ClassPathXmlApplicationContext
;
import
org.springframework.context.support.ClassPathXmlApplicationContext
;
...
@@ -14,15 +18,40 @@ public class HotSearchRun {
...
@@ -14,15 +18,40 @@ public class HotSearchRun {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ApplicationContext
context
=
new
ClassPathXmlApplicationContext
(
"applicationContext.xml"
);
ApplicationContext
context
=
new
ClassPathXmlApplicationContext
(
"applicationContext.xml"
);
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
// ProxyFactory.init(simpleConfig);
ApplicationConfig
applicationConfig
=
new
ApplicationConfig
();
applicationConfig
.
setName
(
"hot_search-project"
);
RegistryConfig
registryConfig
=
new
RegistryConfig
();
ConsumerConfig
consumerConfig
=
new
ConsumerConfig
();
String
username
=
null
;
String
password
=
null
;
if
(
ProxyConfig
.
isLocal
)
{
registryConfig
.
setAddress
(
ProxyConfig
.
localRegistry
);
// 设置分组
consumerConfig
.
setGroup
(
ProxyConfig
.
localGroup
);
username
=
ProxyConfig
.
localUsername
;
password
=
ProxyConfig
.
localPassword
;
}
else
{
registryConfig
.
setAddress
(
ProxyConfig
.
hangzhouRegistry
);
// 设置分组
consumerConfig
.
setGroup
(
ProxyConfig
.
hangzhouGroup
);
username
=
ProxyConfig
.
hangzhouUsername
;
password
=
ProxyConfig
.
hangzhouPassword
;
}
// 创建 consumer,applicationConfig 非必需参数
CynomysConsumer
consumer
=
CynomysConsumerFactory
.
create
(
applicationConfig
,
registryConfig
,
consumerConfig
,
username
,
password
);
new
UpdateWechatUserRun
().
start
();
// 初始化 http-boot 桥接
ZhiWeiTools
.
sleep
(
10000
);
CynomysFactory
.
init
(
consumer
);
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
// new CacheListener().startListen();
// new CacheListener().startListen();
//推送程序启动
//推送程序启动
// new SendWeiboHotSearchRun().start();
// new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
...
@@ -41,7 +70,7 @@ public class HotSearchRun {
...
@@ -41,7 +70,7 @@ public class HotSearchRun {
// scheduledThreadPool.scheduleAtFixedRate(new WeiboTopicRun(), 0, 1, TimeUnit.DAYS);
// scheduledThreadPool.scheduleAtFixedRate(new WeiboTopicRun(), 0, 1, TimeUnit.DAYS);
//采集程序启动
//采集程序启动
// new WeiboHotSearchRun().start();
// new WeiboHotSearchRun().start();
// new BaiduHotSearchRun().start();
// new BaiduHotSearchRun().start();
//// new SougoHotSearchRun().start();
//// new SougoHotSearchRun().start();
...
@@ -56,5 +85,5 @@ public class HotSearchRun {
...
@@ -56,5 +85,5 @@ public class HotSearchRun {
//// //抖音链接更新
//// //抖音链接更新
// new DouYinUrlHotSearchRun().start();
// new DouYinUrlHotSearchRun().start();
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
View file @
ea714ae2
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.test;
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.test;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -40,7 +41,7 @@ public class HotSearch36KrCrawlerTest {
...
@@ -40,7 +41,7 @@ public class HotSearch36KrCrawlerTest {
String
url
=
"https://www.36kr.com/hot-list/catalog"
;
String
url
=
"https://www.36kr.com/hot-list/catalog"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchRunTest.java
View file @
ea714ae2
...
@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.test;
...
@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.test;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
...
@@ -13,9 +12,9 @@ import java.text.ParseException;
...
@@ -13,9 +12,9 @@ import java.text.ParseException;
public
class
HotSearchRunTest
{
public
class
HotSearchRunTest
{
public
static
void
main
(
String
[]
args
)
throws
ParseException
{
public
static
void
main
(
String
[]
args
)
throws
ParseException
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
//微博热搜开始采集
//微博热搜开始采集
// new WeiboHotSearchRun().start();
// new WeiboHotSearchRun().start();
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/HuXiuHotSearchCrawlerTest.java
View file @
ea714ae2
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.test;
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.test;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -39,7 +40,7 @@ public class HuXiuHotSearchCrawlerTest {
...
@@ -39,7 +40,7 @@ public class HuXiuHotSearchCrawlerTest {
String
url
=
"https://www.huxiu.com/"
;
String
url
=
"https://www.huxiu.com/"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/Job51Test.java
View file @
ea714ae2
...
@@ -7,7 +7,7 @@ import com.mongodb.client.MongoDatabase;
...
@@ -7,7 +7,7 @@ import com.mongodb.client.MongoDatabase;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.Proxy
Factory
;
import
com.zhiwei.http.proxy.Proxy
ServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
...
@@ -36,9 +36,9 @@ public class Job51Test {
...
@@ -36,9 +36,9 @@ public class Job51Test {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
...
@@ -64,7 +64,7 @@ public class Job51Test {
...
@@ -64,7 +64,7 @@ public class Job51Test {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
header
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
header
);
for
(
int
t
=
0
;
t
<
1
&&
jsonObject
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
1
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/KuaiShouHotSearchCrawlerTest.java
View file @
ea714ae2
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -37,7 +38,7 @@ public class KuaiShouHotSearchCrawlerTest {
...
@@ -37,7 +38,7 @@ public class KuaiShouHotSearchCrawlerTest {
String
url
=
"https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791"
;
String
url
=
"https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchCrawlerTest.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -37,7 +38,7 @@ public class TaoBaoHotSearchCrawlerTest {
...
@@ -37,7 +38,7 @@ public class TaoBaoHotSearchCrawlerTest {
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
String
token
=
null
;
Response
response1
=
httpBoot
.
syncCall
(
request1
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
httpBoot
.
syncCall
(
request1
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
@@ -56,7 +57,7 @@ public class TaoBaoHotSearchCrawlerTest {
...
@@ -56,7 +57,7 @@ public class TaoBaoHotSearchCrawlerTest {
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoRunTest.java
View file @
ea714ae2
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
...
@@ -13,9 +12,9 @@ import java.text.ParseException;
...
@@ -13,9 +12,9 @@ import java.text.ParseException;
public
class
TaoBaoRunTest
{
public
class
TaoBaoRunTest
{
public
static
void
main
(
String
[]
args
)
throws
ParseException
{
public
static
void
main
(
String
[]
args
)
throws
ParseException
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
//微博热搜开始采集
//微博热搜开始采集
// new WeiboHotSearchRun().start();
// new WeiboHotSearchRun().start();
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/WeiboEntertainmentCrawlerTest.java
View file @
ea714ae2
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
...
@@ -41,7 +42,7 @@ public class WeiboEntertainmentCrawlerTest {
...
@@ -41,7 +42,7 @@ public class WeiboEntertainmentCrawlerTest {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
ea714ae2
...
@@ -113,8 +113,13 @@ public class GatherTimer {
...
@@ -113,8 +113,13 @@ public class GatherTimer {
log
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
);
log
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
log
.
info
(
"今日头条热搜采集结束..."
);
log
.
info
(
"今日头条热搜采集结束..."
);
log
.
info
(
"今日头条热搜详情趋势阅读量更新..."
);
//暂停今日头条阅读量更新
TouTiaoExecutor
.
countTouTiaoReadCount
(
toutiaoList
);
// log.info("今日头条热搜详情趋势阅读量更新开始...");
// //TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// for (HotSearchList hotSearchList : toutiaoList) {
// ToutiaoHotSearchCrawler.toutiaoReadCount(hotSearchList);
// }
// log.info("今日头条热搜详情趋势阅读量更新结束...");
}
}
/**
/**
...
@@ -362,7 +367,7 @@ public class GatherTimer {
...
@@ -362,7 +367,7 @@ public class GatherTimer {
* 知乎热搜数码分类采集
* 知乎热搜数码分类采集
*/
*/
@Async
(
value
=
"myScheduler"
)
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
//
@Scheduled(cron = "20 * * * * ? ")
public
void
crawlerZhiHuDigital
(){
public
void
crawlerZhiHuDigital
(){
this
.
crawlerZhiHuChild
(
DIGITAL
);
this
.
crawlerZhiHuChild
(
DIGITAL
);
}
}
...
@@ -428,7 +433,7 @@ public class GatherTimer {
...
@@ -428,7 +433,7 @@ public class GatherTimer {
* 微博超话的采集
* 微博超话的采集
*/
*/
@Async
(
value
=
"myScheduler"
)
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0 0/3 * * ? "
)
//
@Scheduled(cron = "0 0 0/3 * * ? ")
public
void
crawlerWeiBoSuperTopic
(){
public
void
crawlerWeiBoSuperTopic
(){
log
.
info
(
"微博超话采集开始........"
);
log
.
info
(
"微博超话采集开始........"
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
...
@@ -675,7 +680,7 @@ public class GatherTimer {
...
@@ -675,7 +680,7 @@ public class GatherTimer {
*微博热词采集
*微博热词采集
*/
*/
@Async
(
value
=
"myScheduler"
)
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0 0/1 * * ? "
)
//
@Scheduled(cron = "0 0 0/1 * * ? ")
public
void
WeiBoSearchHotWordsCrawler
(){
public
void
WeiBoSearchHotWordsCrawler
(){
log
.
info
(
"微博热词采集开始........"
);
log
.
info
(
"微博热词采集开始........"
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
...
...
src/main/resources/proxyip.properties
View file @
ea714ae2
registry
=
zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
isLocal
=
false
group
=
hangzhou
hangzhou.registry
=
zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
hangzhou.group
=
hangzhou
hangzhou.username
=
hot-search
hangzhou.password
=
gRG9QJ6QghuLcCC9
########################################################
########################################################
#registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
local.registry
=
zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
#group=local
local.group
=
local
#local.username=15139460980
#local.password=lllq2w3e4r
local.username
=
15757871020
local.password
=
Cwt1q2w3e4r@
src/test/java/InfoTest/InfoTest.java
View file @
ea714ae2
package
InfoTest
;
package
InfoTest
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoCollection
;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
@@ -36,9 +36,9 @@ public class InfoTest {
...
@@ -36,9 +36,9 @@ public class InfoTest {
@Test
@Test
public
void
testMaimai
(){
public
void
testMaimai
(){
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
List
<
HotSearchList
>
hotSearchLists
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
new
Date
());
List
<
HotSearchList
>
hotSearchLists
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
new
Date
());
...
...
src/test/java/baiduTest/BaiduTest.java
View file @
ea714ae2
package
baiduTest
;
package
baiduTest
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
...
@@ -62,9 +62,9 @@ public class BaiduTest {
...
@@ -62,9 +62,9 @@ public class BaiduTest {
@Test
@Test
public
void
test
(){
public
void
test
(){
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
List
<
HotSearchList
>
hotSearchLists
=
baiduHotSearch
(
new
Date
());
List
<
HotSearchList
>
hotSearchLists
=
baiduHotSearch
(
new
Date
());
...
@@ -122,7 +122,7 @@ public class BaiduTest {
...
@@ -122,7 +122,7 @@ public class BaiduTest {
// headers.put("Content-type","text/html; charset=gb2312");
// headers.put("Content-type","text/html; charset=gb2312");
// Request request = RequestUtils.wrapGet(url, HeadersUtils.convertRepeatably(headers, Charset.forName("gb2312")));
// Request request = RequestUtils.wrapGet(url, HeadersUtils.convertRepeatably(headers, Charset.forName("gb2312")));
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
// try(Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY)) {
// try(Response response = httpBoot.syncCall(request, ProxyS
erverS
upplier.NAT_HEAVY_PROXY)) {
// htmlBody = response.body().string();
// htmlBody = response.body().string();
// } catch (Exception e) {
// } catch (Exception e) {
// log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
// log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
...
...
src/test/java/hotSaerchTest/HotSearchTest.java
View file @
ea714ae2
...
@@ -5,7 +5,7 @@ import com.mongodb.client.MongoCollection;
...
@@ -5,7 +5,7 @@ import com.mongodb.client.MongoCollection;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.Proxy
Factory
;
import
com.zhiwei.http.proxy.Proxy
ServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
...
@@ -51,9 +51,9 @@ public class HotSearchTest {
...
@@ -51,9 +51,9 @@ public class HotSearchTest {
@Test
@Test
public
void
kuaiShouTestCrawler
()
{
public
void
kuaiShouTestCrawler
()
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
List
<
HotSearchList
>
hotSearchLists
=
KuaiShouHotSearchCrawlerTest
.
KuaiShouHotSearchCrawler
(
new
Date
());
List
<
HotSearchList
>
hotSearchLists
=
KuaiShouHotSearchCrawlerTest
.
KuaiShouHotSearchCrawler
(
new
Date
());
System
.
out
.
println
(
hotSearchLists
);
System
.
out
.
println
(
hotSearchLists
);
...
@@ -64,15 +64,15 @@ public class HotSearchTest {
...
@@ -64,15 +64,15 @@ public class HotSearchTest {
@Test
@Test
public
void
WeiBoUpdate
()
{
public
void
WeiBoUpdate
()
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
Document
document
=
new
Document
();
Document
document
=
new
Document
();
//String url = "https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D1%26t%3D10%26q%3D%23我国新冠疫苗接种剂次超9亿%23";
//String url = "https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D1%26t%3D10%26q%3D%23我国新冠疫苗接种剂次超9亿%23";
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=231522type%3D1%26q%3D%23可口可乐回应C罗拒绝与可乐同框%23"
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=231522type%3D1%26q%3D%23可口可乐回应C罗拒绝与可乐同框%23"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
@@ -135,9 +135,9 @@ public class HotSearchTest {
...
@@ -135,9 +135,9 @@ public class HotSearchTest {
@Test
@Test
public
void
taoBaoTestCrawler
()
{
public
void
taoBaoTestCrawler
()
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
List
<
HotSearchList
>
hotSearchLists
=
TaoBaoHotSearchCrawlerTest
.
taoBaoHotSearch
(
new
Date
());
List
<
HotSearchList
>
hotSearchLists
=
TaoBaoHotSearchCrawlerTest
.
taoBaoHotSearch
(
new
Date
());
...
@@ -152,9 +152,9 @@ public class HotSearchTest {
...
@@ -152,9 +152,9 @@ public class HotSearchTest {
@Test
@Test
public
void
baiDuTestCrawler
()
{
public
void
baiDuTestCrawler
()
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
List
<
HotSearchList
>
hotSearchLists
=
BaiDuHotSearchCrawler
.
baiduHotSearch
(
new
Date
());
List
<
HotSearchList
>
hotSearchLists
=
BaiDuHotSearchCrawler
.
baiduHotSearch
(
new
Date
());
System
.
out
.
println
(
hotSearchLists
);
System
.
out
.
println
(
hotSearchLists
);
...
...
src/test/java/proxy/ProxyTest.java
0 → 100644
View file @
ea714ae2
package
proxy
;
import
com.zhiwei.http.proxy.CynomysFactory
;
import
com.zhiwei.network.cynomys.consumer.CynomysConsumer
;
import
com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.crawler.HotSearch36KrCrawler
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.dubbo.config.ApplicationConfig
;
import
org.apache.dubbo.config.ConsumerConfig
;
import
org.apache.dubbo.config.RegistryConfig
;
import
org.junit.Test
;
import
org.junit.runner.RunWith
;
import
org.springframework.test.context.ContextConfiguration
;
import
org.springframework.test.context.junit4.SpringJUnit4ClassRunner
;
import
java.util.Date
;
import
java.util.List
;
/**
* @author cwt
* @date 2022/11/8 10:22
*/
@Log4j2
@RunWith
(
SpringJUnit4ClassRunner
.
class
)
@ContextConfiguration
(
locations
=
{
"classpath:applicationContext.xml"
})
public
class
ProxyTest
{
@Test
public
void
initTest
()
{
ApplicationConfig
applicationConfig
=
new
ApplicationConfig
();
applicationConfig
.
setName
(
"hot_search-project"
);
RegistryConfig
registryConfig
=
new
RegistryConfig
();
ConsumerConfig
consumerConfig
=
new
ConsumerConfig
();
String
username
=
null
;
String
password
=
null
;
if
(
ProxyConfig
.
isLocal
)
{
registryConfig
.
setAddress
(
ProxyConfig
.
localRegistry
);
// 设置分组
consumerConfig
.
setGroup
(
ProxyConfig
.
localGroup
);
username
=
ProxyConfig
.
localUsername
;
password
=
ProxyConfig
.
localPassword
;
}
else
{
registryConfig
.
setAddress
(
ProxyConfig
.
hangzhouRegistry
);
// 设置分组
consumerConfig
.
setGroup
(
ProxyConfig
.
hangzhouGroup
);
username
=
ProxyConfig
.
hangzhouUsername
;
password
=
ProxyConfig
.
hangzhouPassword
;
}
// 创建 consumer,applicationConfig 非必需参数
CynomysConsumer
consumer
=
CynomysConsumerFactory
.
create
(
applicationConfig
,
registryConfig
,
consumerConfig
,
username
,
password
);
// 初始化 http-boot 桥接
CynomysFactory
.
init
(
consumer
);
log
.
info
(
"桥接初始化完成"
);
try
{
Thread
.
sleep
(
1000L
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
List
<
HotSearchList
>
hotSearchLists
=
HotSearch36KrCrawler
.
hotSearch36Kr
(
new
Date
());
hotSearchLists
.
forEach
(
System
.
out
::
println
);
}
@Test
public
void
configTest
()
{
System
.
out
.
println
(
ProxyConfig
.
isLocal
);
System
.
out
.
println
(
ProxyConfig
.
hangzhouGroup
);
}
}
src/test/java/weiboTest/WeiboHotSearchTest.java
View file @
ea714ae2
...
@@ -6,7 +6,8 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -6,7 +6,8 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.HttpBoot
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.boot.Response
;
import
com.zhiwei.http.proxy.ProxyFactory
;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.proxy.config.SimpleConfig
;
...
@@ -69,9 +70,9 @@ public class WeiboHotSearchTest {
...
@@ -69,9 +70,9 @@ public class WeiboHotSearchTest {
@Test
@Test
public
void
testHotWeibo
()
{
public
void
testHotWeibo
()
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
while
(
true
)
{
while
(
true
)
{
try
{
try
{
...
@@ -110,9 +111,9 @@ public class WeiboHotSearchTest {
...
@@ -110,9 +111,9 @@ public class WeiboHotSearchTest {
//org.bson.Document document
//org.bson.Document document
// @Test
// @Test
public
void
test12
(
org
.
bson
.
Document
document
)
{
public
void
test12
(
org
.
bson
.
Document
document
)
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
//
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
//
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory
.
init
(
simpleConfig
);
//
ProxyFactory.init(simpleConfig);
// org.bson.Document document = new org.bson.Document();
// org.bson.Document document = new org.bson.Document();
// document.put("name","新疆人讲述真实的新疆");
// document.put("name","新疆人讲述真实的新疆");
// document.put("url","https://m.weibo.cn/search?containerid=100103type%3D1%26t%3D10%26q%3D%23%E6%96%B0%E7%96%86%E4%BA%BA%E8%AE%B2%E8%BF%B0%E7%9C%9F%E5%AE%9E%E7%9A%84%E6%96%B0%E7%96%86%23&isnewpage=1&extparam=seat%3D1%26filter_type%3Drealtimehot%26dgr%3D0%26cate%3D0%26pos%3D1%26realpos%3D2%26flag%3D1%26c_type%3D31%26display_time%3D1622705918&luicode=10000011&lfid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot");
// document.put("url","https://m.weibo.cn/search?containerid=100103type%3D1%26t%3D10%26q%3D%23%E6%96%B0%E7%96%86%E4%BA%BA%E8%AE%B2%E8%BF%B0%E7%9C%9F%E5%AE%9E%E7%9A%84%E6%96%B0%E7%96%86%23&isnewpage=1&extparam=seat%3D1%26filter_type%3Drealtimehot%26dgr%3D0%26cate%3D0%26pos%3D1%26realpos%3D2%26flag%3D1%26c_type%3D31%26display_time%3D1622705918&luicode=10000011&lfid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot");
...
@@ -123,7 +124,7 @@ public class WeiboHotSearchTest {
...
@@ -123,7 +124,7 @@ public class WeiboHotSearchTest {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
2
;
count
++)
{
for
(
int
count
=
0
;
count
<=
2
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
@@ -216,7 +217,7 @@ public class WeiboHotSearchTest {
...
@@ -216,7 +217,7 @@ public class WeiboHotSearchTest {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
2
;
count
++)
{
for
(
int
count
=
0
;
count
<=
2
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
@@ -532,7 +533,7 @@ public class WeiboHotSearchTest {
...
@@ -532,7 +533,7 @@ public class WeiboHotSearchTest {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxySupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyS
erverS
upplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment