Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
9f6d8158
Commit
9f6d8158
authored
Nov 14, 2022
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
唯一化客户端 See merge request
!212
parents
235fffdd
12b6ad6b
Show whitespace changes
Inline
Side-by-side
Showing
32 changed files
with
124 additions
and
93 deletions
+124
-93
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
+5
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+5
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
+6
-5
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
+11
-10
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+7
-6
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
+5
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/util/HttpClientUtils.java
+1
-1
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.boot.Response;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -29,7 +30,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
public
class
BaiDuHotSearchCrawler
{
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
...
...
@@ -41,7 +42,7 @@ public class BaiDuHotSearchCrawler {
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BiliComprehensiveHotCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -28,7 +29,7 @@ import java.util.*;
public
class
BiliComprehensiveHotCrawler
{
//private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -54,7 +55,7 @@ public class BiliComprehensiveHotCrawler {
for
(
int
i
=
0
;
i
<
urlList
.
size
();
i
++)
{
Request
request
=
RequestUtils
.
wrapGet
(
urlList
.
get
(
i
));
//发送请求每次获取20条数据
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
fmt
.
format
(
date
)+
":第"
+
i
+
1
+
"次请求解析B站综合热门时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -28,7 +29,7 @@ import java.util.concurrent.Executors;
@Log4j2
public
class
BililiCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* B站排行榜的采集
...
...
@@ -44,7 +45,7 @@ public class BililiCrawler {
String
url
=
"https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站排行榜页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
@@ -134,7 +135,7 @@ public class BililiCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
{
System
.
setProperty
(
"https.protocols"
,
"TLSv1,TLSv1.1,TLSv1.2,SSLv3"
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
String
htmlBody
=
response
.
bodyString
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"v-wrap"
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
...
...
@@ -182,7 +183,7 @@ public class BililiCrawler {
String
url
=
"https://app.biliapi.com/x/v2/search/square?build=616050&limit=10"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"B站热搜页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -8,6 +8,7 @@ import com.zhiwei.http.boot.Response;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -29,7 +30,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2
public
class
DouyinHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public
static
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
...
...
@@ -47,7 +48,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
cause
);
...
...
@@ -91,7 +92,7 @@ public class DouyinHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<
3
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音热搜榜链接时出现问题:{}"
,
cause
);
...
...
@@ -126,7 +127,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取抖音娱乐榜榜时出现问题:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -22,7 +23,7 @@ import java.util.List;
@Log4j2
public
class
FengHuangSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 获取凤凰新闻热榜
...
...
@@ -35,7 +36,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="
+
page
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热榜页面连接异常..."
,
cause
);
...
...
@@ -75,7 +76,7 @@ public class FengHuangSearchCrawler {
String
url
=
"https://shankapi.ifeng.com/autumn/sogouSearchHotword"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"凤凰新闻热搜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
View file @
9f6d8158
...
...
@@ -7,6 +7,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -28,7 +29,7 @@ import java.util.*;
@Log4j2
public
class
HotSearch36KrCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
...
...
@@ -47,7 +48,7 @@ public class HotSearch36KrCrawler {
headerMap
.
put
(
"sec-fetch-dest"
,
"empty"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析36Kr人气榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -30,7 +31,7 @@ import java.util.*;
*/
@Log4j2
public
class
HuXiuHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public
static
List
<
HotSearchList
>
HuXiuHotArticleRecommended
(
Date
date
){
...
...
@@ -43,7 +44,7 @@ public class HuXiuHotSearchCrawler {
headerMap
.
put
(
"sec-ch-ua"
,
" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析虎嗅热文推荐时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/KuaiShouHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -25,7 +26,7 @@ import java.util.*;
*/
@Log4j2
public
class
KuaiShouHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
...
...
@@ -37,7 +38,7 @@ public class KuaiShouHotSearchCrawler {
String
url
=
"https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析快手热榜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -24,7 +25,7 @@ import java.util.List;
@Log4j2
public
class
MaiMaiHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 获取maimai热榜
...
...
@@ -36,7 +37,7 @@ public class MaiMaiHotSearchCrawler {
String
url
=
"https://open.taou.com/maimai/feed/v6/hot_list_entry/feeds?page_version=2&version=6.2.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"脉脉热榜页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxyServerSupplier;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.util.HeaderTool
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -32,7 +33,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2
public
class
SougoHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @Title: SougoHotSearchTest
...
...
@@ -47,7 +48,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
...
@@ -91,7 +92,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -22,7 +23,7 @@ import java.util.List;
@Log4j2
public
class
SouhuTopicCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public
static
List
<
HotSearchList
>
getSouhuTopic
(
Date
date
){
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
...
...
@@ -32,7 +33,7 @@ public class SouhuTopicCrawler {
String
url
=
"https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"搜狐话题页面连接失败"
,
cause
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.MD5Util
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -25,7 +26,7 @@ import java.util.*;
*/
@Log4j2
public
class
TaoBaoHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public
static
List
<
HotSearchList
>
taoBaoHotSearch
(
Date
date
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
...
...
@@ -36,7 +37,7 @@ public class TaoBaoHotSearchCrawler {
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
@@ -55,7 +56,7 @@ public class TaoBaoHotSearchCrawler {
String
sign
=
MD5Util
.
getMD5
(
signs
).
toLowerCase
();
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -21,7 +22,7 @@ import java.util.List;
@Log4j2
public
class
TengXunCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 腾讯热榜数据采集
...
...
@@ -36,7 +37,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
...
...
@@ -100,7 +101,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -12,6 +12,7 @@ import com.zhiwei.http.util.RequestUtils;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
@@ -37,7 +38,7 @@ import java.util.*;
*/
@Log4j2
public
class
ToutiaoHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @return void 返回类型
...
...
@@ -50,7 +51,7 @@ public class ToutiaoHotSearchCrawler {
String
jsUrl
=
"https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"
;
Request
jsRequest
=
RequestUtils
.
wrapGet
(
jsUrl
);
String
jsBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
jsRequest
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
jsRequest
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条实时热搜头部信息标识失败"
,
cause
);
...
...
@@ -66,7 +67,7 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response1
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
())
{
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"解析今日头条实时热搜时出现连接失败"
,
cause
);
...
...
@@ -166,7 +167,7 @@ public class ToutiaoHotSearchCrawler {
String
htmlBody
=
null
;
String
url
=
hotSearchList
.
getUrl
();
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析今日头条热搜详情页面出现连接失败"
,
cause
);
...
...
@@ -227,7 +228,7 @@ public class ToutiaoHotSearchCrawler {
headerMap
.
put
(
"User-Agent"
,
"com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取今日头条榜单出错"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WangYiHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -29,7 +30,7 @@ import java.util.List;
*/
@Log4j2
public
class
WangYiHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 网易新闻实时热榜的采集
...
...
@@ -42,7 +43,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻实时热榜页面连接异常..."
,
cause
);
...
...
@@ -85,7 +86,7 @@ public class WangYiHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
for
(
int
t
=
0
;
t
<
3
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"网易新闻跟贴热议页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoBrandCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -27,7 +28,7 @@ import java.util.Objects;
@Log4j2
public
class
WeiBoBrandCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -43,7 +44,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌总榜时出现连接失败"
,
cause
);
...
...
@@ -77,7 +78,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌汽车榜时出现连接失败"
,
cause
);
...
...
@@ -111,7 +112,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌手机榜时出现连接失败"
,
cause
);
...
...
@@ -145,7 +146,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌美妆榜时出现连接失败"
,
cause
);
...
...
@@ -179,7 +180,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌奢侈品榜时出现连接失败"
,
cause
);
...
...
@@ -213,7 +214,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌食品饮料榜时出现连接失败"
,
cause
);
...
...
@@ -247,7 +248,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌家电榜时出现连接失败"
,
cause
);
...
...
@@ -281,7 +282,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
...
@@ -315,7 +316,7 @@ public class WeiBoBrandCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//重试两次
for
(
int
x
=
0
;
x
<
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博品牌服装鞋帽榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoSearchBoxHotWordsDao
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -28,7 +29,7 @@ import java.util.Objects;
*/
@Log4j2
public
class
WeiBoSearchBoxHotWordsCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
static
WeiBoSearchBoxHotWordsDao
weiBoSearchDao
=
new
WeiBoSearchBoxHotWordsDao
();
public
static
void
weiBoSearchBoxHotWords
(
Date
date
){
...
...
@@ -37,7 +38,7 @@ public class WeiBoSearchBoxHotWordsCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博搜索框热词时出现解析错误,页面结构有问题"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchHotWordsCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoSearchBoxHotWordsDao
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -27,7 +28,7 @@ import java.util.Objects;
*/
@Log4j2
public
class
WeiBoSearchHotWordsCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
static
WeiBoSearchBoxHotWordsDao
weiBoSearchDao
=
new
WeiBoSearchBoxHotWordsDao
();
public
static
void
weiBoSearchHotWords
(
Date
date
){
...
...
@@ -36,7 +37,7 @@ public class WeiBoSearchHotWordsCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiShiHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.MediaType
;
import
okhttp3.Request
;
...
...
@@ -27,7 +28,7 @@ import java.util.*;
@Log4j2
public
class
WeiShiHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -45,7 +46,7 @@ public class WeiShiHotSearchCrawler {
headerMap
.
put
(
"Host"
,
"api.weishi.qq.com"
);
Request
request
=
RequestUtils
.
wrapPost
(
url
,
headerMap
,
RequestBody
.
create
(
MediaType
.
get
(
"application/json"
),
"{\"req_body\":{\"hotRankID\":\"\",\"attachInfo\":\"\",\"hotRankType\":1,\"sourceID\":\"WSSearchH5\"}}"
));
for
(
int
count
=
0
;
count
<=
3
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微视热榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboEntertainmentCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -24,7 +25,7 @@ import java.util.*;
@Log4j2
public
class
WeiboEntertainmentCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @return void 返回类型
...
...
@@ -38,7 +39,7 @@ public class WeiboEntertainmentCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博娱乐榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -20,6 +20,7 @@ import com.zhiwei.searchhotcrawler.config.RedisConfig;
import
com.zhiwei.searchhotcrawler.dao.RedisDao
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoMassageDao
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoUserDao
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -47,7 +48,7 @@ import static java.util.Objects.nonNull;
@Log4j2
public
class
WeiboHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private
static
RedisDao
redisDao
=
new
RedisDao
();
...
...
@@ -68,7 +69,7 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
if
(
i
==
2
)
{
return
list
;
...
...
@@ -264,7 +265,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
@@ -362,7 +363,7 @@ public class WeiboHotSearchCrawler {
String
url
=
"https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜时出现连接失败"
,
cause
);
...
...
@@ -420,7 +421,7 @@ public class WeiboHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博热搜详情页面时出现连接失败"
,
cause
);
...
...
@@ -518,7 +519,7 @@ public class WeiboHotSearchCrawler {
//该cookie有效期一年,微博pc端获取游客cookie链接 https://s.weibo.com/top/summary?cate=realtimehot
headerMap
.
put
(
"Cookie"
,
"SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博时热搜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboNewsCrawler.java
View file @
9f6d8158
...
...
@@ -10,6 +10,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -28,7 +29,7 @@ import java.util.*;
@Log4j2
public
class
WeiboNewsCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -49,7 +50,7 @@ public class WeiboNewsCrawler {
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
List
<
HotSearchList
>
result
=
new
ArrayList
();
//发送第一次请求获取前20条数据
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request1
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"第一次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
@@ -71,7 +72,7 @@ public class WeiboNewsCrawler {
continue
;
}
//发送第二次请求获取中间20条数据
Response
response1
=
httpBoot
.
syncCall
(
request2
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response1
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request2
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response1
.
hasCause
()){
Throwable
cause
=
response1
.
cause
();
log
.
error
(
"第二次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
@@ -94,7 +95,7 @@ public class WeiboNewsCrawler {
continue
;
}
//发送第三次请求获取最后10条数据
Response
response2
=
httpBoot
.
syncCall
(
request3
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response2
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request3
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response2
.
hasCause
()){
Throwable
cause
=
response2
.
cause
();
log
.
error
(
"第三次请求解析微博要闻榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboOutCircleCrawler.java
View file @
9f6d8158
...
...
@@ -7,6 +7,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -27,7 +28,7 @@ import java.util.*;
@Log4j2
public
class
WeiboOutCircleCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -43,7 +44,7 @@ public class WeiboOutCircleCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博出圈榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
9f6d8158
...
...
@@ -14,6 +14,7 @@ import com.zhiwei.http.proxy.ProxyServerSupplier;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -33,7 +34,7 @@ import com.alibaba.fastjson.JSONObject;
@Log4j2
public
class
WeiboSuperTopicCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private
static
Map
<
String
,
String
>
headMap
=
new
HashMap
<>();
static
{
...
...
@@ -65,7 +66,7 @@ public class WeiboSuperTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
@@ -141,7 +142,7 @@ public class WeiboSuperTopicCrawler {
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
9f6d8158
...
...
@@ -11,6 +11,7 @@ import com.zhiwei.http.util.RequestUtils;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -34,7 +35,7 @@ import java.util.regex.Pattern;
*/
@Log4j2
public
class
WeiboTopicCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private
static
Map
<
String
,
String
>
headMap
=
new
HashMap
<>();
static
{
...
...
@@ -140,7 +141,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
@@ -229,7 +230,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"下载榜单列表页面时出现错误,错误为:{}"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboVideoCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -28,7 +29,7 @@ import java.util.List;
@Log4j2
public
class
WeiboVideoCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
...
...
@@ -42,7 +43,7 @@ public class WeiboVideoCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
x
=
0
;
x
<=
2
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"解析微博视频榜时出现连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
@@ -26,7 +27,7 @@ import java.util.*;
@Log4j2
public
class
XinLangHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 新浪热榜的采集
...
...
@@ -40,7 +41,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
JSONObject
jsonObject
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热榜页面连接异常..."
,
cause
);
...
...
@@ -116,7 +117,7 @@ public class XinLangHotSearchCrawler {
String
htmlBody
=
null
;
JSONArray
dataJson
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"新浪热点页面连接异常..."
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -12,6 +12,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -24,7 +25,7 @@ import java.util.*;
@Log4j2
public
class
ZhihuChildHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 知乎子级分类数据采集
...
...
@@ -42,7 +43,7 @@ public class ZhihuChildHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
response
.
cause
().
printStackTrace
();
}
else
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
9f6d8158
...
...
@@ -8,6 +8,7 @@ import com.zhiwei.http.boot.Response;
import
com.zhiwei.http.proxy.ProxyServerSupplier
;
import
com.zhiwei.http.proxy.ProxySupplier
;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
io.netty.handler.ssl.SslProvider
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
...
...
@@ -35,7 +36,7 @@ import static java.util.Objects.nonNull;
@Log4j2
public
class
ZhihuHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
//private static HttpBoot httpBoot = HttpBoot.newBuilder().sslProvider(SslProvider.CONSCRYPT).retryTimes(3).build();
/**
* @Title: getZhihuHotList
...
...
@@ -100,7 +101,7 @@ public class ZhihuHotSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
for
(
int
x
=
0
;
x
<=
5
;
x
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
cause
);
...
...
@@ -171,7 +172,7 @@ public class ZhihuHotSearchCrawler {
Map
.
put
(
"cookie"
,
"_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
Map
);
try
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
())
{
Throwable
cause
=
response
.
cause
();
log
.
error
(
"单条知乎热搜数据页面连接失败"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
View file @
9f6d8158
...
...
@@ -9,6 +9,7 @@ import com.zhiwei.http.proxy.ProxySupplier;
import
com.zhiwei.http.util.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.HttpClientUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
org.jsoup.Jsoup
;
...
...
@@ -17,7 +18,7 @@ import java.util.*;
@Log4j2
public
class
ZhihuTopicSearchCrawler
{
private
static
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
3
).
build
();
//
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public
static
List
<
HotSearchList
>
getZhihuTopicSearch
(
Date
date
){
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
...
...
@@ -26,7 +27,7 @@ public class ZhihuTopicSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
Response
response
=
HttpClientUtils
.
httpBoot
.
syncCall
(
request
,
ProxyServerSupplier
.
NAT_HEAVY_PROXY
);
if
(
response
.
hasCause
()){
Throwable
cause
=
response
.
cause
();
log
.
error
(
"知乎热搜页面连接异常"
,
cause
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
9f6d8158
...
...
@@ -7,13 +7,14 @@ import com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.*
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.dubbo.config.ApplicationConfig
;
import
org.apache.dubbo.config.ConsumerConfig
;
import
org.apache.dubbo.config.RegistryConfig
;
import
org.springframework.context.ApplicationContext
;
import
org.springframework.context.support.ClassPathXmlApplicationContext
;
@Log4j2
public
class
HotSearchRun
{
public
static
void
main
(
String
[]
args
)
{
...
...
@@ -46,7 +47,7 @@ public class HotSearchRun {
// 初始化 http-boot 桥接
CynomysFactory
.
init
(
consumer
);
log
.
info
(
"http-boot 桥接 成功"
);
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/HttpClientUtils.java
View file @
9f6d8158
...
...
@@ -25,7 +25,7 @@ public final class HttpClientUtils {
private
static
final
String
URL_QUERY_PARAM_SEPARATOR
=
"?"
;
//private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
p
rivate
static
final
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
2
).
build
();
p
ublic
static
final
HttpBoot
httpBoot
=
HttpBoot
.
newBuilder
().
retryTimes
(
2
).
build
();
public
static
String
sendPost
(
String
url
,
String
jsonParam
){
return
sendPost
(
url
,
jsonParam
,
null
,
Charset
.
forName
(
"UTF-8"
));
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment