Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
07e166b9
Commit
07e166b9
authored
Jul 31, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
代理ip恢复
parent
02d17aa6
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
15 additions
and
27 deletions
+15
-27
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+2
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+2
-4
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
+3
-3
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -43,8 +43,7 @@ public class BaiDuHotSearchCrawler {
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -44,8 +44,7 @@ public class DouyinHotSearchCrawler {
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -48,8 +48,7 @@ public class SougoHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headMap
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
07e166b9
...
...
@@ -35,8 +35,7 @@ public class TengXunCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -47,8 +47,7 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
if
(
i
==
2
){
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
07e166b9
...
...
@@ -64,8 +64,7 @@ public class WeiboSuperTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
//,ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
.
error
(
"获取榜单列表页面时出现错误,错误为:{}"
,
e
);
...
...
@@ -142,8 +141,7 @@ public class WeiboSuperTopicCrawler {
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid="
+
id
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
.
error
(
"解析榜单详情页面时出现错误,错误为:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
07e166b9
...
...
@@ -137,8 +137,7 @@ public class WeiboTopicCrawler {
String
htmlBody
=
null
;
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
// log.info("pageUrl::{}", pageUrl);
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -41,8 +41,7 @@ public class ZhihuChildHotSearchCrawler {
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//采集为空最多重试3次
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
07e166b9
...
...
@@ -50,8 +50,7 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"Referer"
,
rerferer
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
htmlBody
=
null
;
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
...
...
@@ -95,8 +94,7 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuTopicSearchCrawler.java
View file @
07e166b9
...
...
@@ -35,8 +35,7 @@ public class ZhihuTopicSearchCrawler {
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
//, ProxyHolder.NAT_HEAVY_PROXY
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"知乎热搜页面连接异常"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
View file @
07e166b9
...
...
@@ -24,9 +24,9 @@ public class TipsUtils {
Date
lastTime
=
hotSearchListDAO
.
getLastTimeByType
(
type
);
if
(
time
.
getTime
()
-
lastTime
.
getTime
()
>
timeDifference
){
//发送预警
String
crawlerContent
=
String
.
format
(
"%s已经
连续%s分钟未采集到数据"
,
type
,(
time
.
getTime
()
-
lastTime
.
getTime
())/
1000
/
60
);
QYWechatUtil
.
send
(
key
,
QYWechatUtil
.
MSGTYPE_TEXT
,
crawlerContent
,
null
,
null
);
String
crawlerContent
=
String
.
format
(
"%s已经
采集数据异常"
,
type
);
//
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
//
null, null);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment