Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
60774fb9
Commit
60774fb9
authored
Jul 31, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'mlbWork' into 'master'
代理ip See merge request
!8
parents
d00d9860
02d17aa6
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
24 additions
and
12 deletions
+24
-12
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+4
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+4
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
+2
-1
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
60774fb9
...
@@ -43,7 +43,8 @@ public class BaiDuHotSearchCrawler {
...
@@ -43,7 +43,8 @@ public class BaiDuHotSearchCrawler {
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
e
);
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
60774fb9
...
@@ -44,7 +44,8 @@ public class DouyinHotSearchCrawler {
...
@@ -44,7 +44,8 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
e
);
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
60774fb9
...
@@ -48,7 +48,8 @@ public class SougoHotSearchCrawler {
...
@@ -48,7 +48,8 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
e
);
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
60774fb9
...
@@ -35,7 +35,8 @@ public class TengXunCrawler {
...
@@ -35,7 +35,8 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
60774fb9
...
@@ -47,7 +47,8 @@ public class WeiboHotSearchCrawler {
...
@@ -47,7 +47,8 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++){
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
if
(
i
==
2
){
if
(
i
==
2
){
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
60774fb9
...
@@ -64,7 +64,8 @@ public class WeiboSuperTopicCrawler {
...
@@ -64,7 +64,8 @@ public class WeiboSuperTopicCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
e
);
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
e
);
...
@@ -141,7 +142,8 @@ public class WeiboSuperTopicCrawler {
...
@@ -141,7 +142,8 @@ public class WeiboSuperTopicCrawler {
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
e
);
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
60774fb9
...
@@ -137,7 +137,8 @@ public class WeiboTopicCrawler {
...
@@ -137,7 +137,8 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
// log.info("pageUrl::{}", pageUrl);
// log.info("pageUrl::{}", pageUrl);
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
60774fb9
...
@@ -41,7 +41,8 @@ public class ZhihuChildHotSearchCrawler {
...
@@ -41,7 +41,8 @@ public class ZhihuChildHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//采集为空最多重试3次
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
60774fb9
...
@@ -50,7 +50,8 @@ public class ZhihuHotSearchCrawler {
...
@@ -50,7 +50,8 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"Referer"
,
rerferer
);
headerMap
.
put
(
"Referer"
,
rerferer
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
...
@@ -94,7 +95,8 @@ public class ZhihuHotSearchCrawler {
...
@@ -94,7 +95,8 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
View file @
60774fb9
...
@@ -35,7 +35,8 @@ public class ZhihuTopicSearchCrawler {
...
@@ -35,7 +35,8 @@ public class ZhihuTopicSearchCrawler {
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
error
(
"知乎热搜页面连接异常"
,
e
);
log
.
error
(
"知乎热搜页面连接异常"
,
e
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment