Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
a4df8c55
Commit
a4df8c55
authored
Aug 11, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
凤凰新闻热搜采集
parent
8d587614
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
42 additions
and
4 deletions
+42
-4
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
+34
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
+7
-4
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
View file @
a4df8c55
...
@@ -14,6 +14,7 @@ public enum HotSearchType {
...
@@ -14,6 +14,7 @@ public enum HotSearchType {
新浪热点
,
新浪热点
,
搜狐话题
,
搜狐话题
,
凤凰新闻热榜
,
凤凰新闻热榜
,
凤凰新闻热搜
,
网易热榜
,
网易热榜
,
网易跟帖热议
网易跟帖热议
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
View file @
a4df8c55
...
@@ -60,4 +60,38 @@ public class FengHuangSearchCrawler {
...
@@ -60,4 +60,38 @@ public class FengHuangSearchCrawler {
log
.
info
(
"凤凰新闻热榜采集结束"
);
log
.
info
(
"凤凰新闻热榜采集结束"
);
return
list
;
return
list
;
}
}
/**
* 获取凤凰新闻热搜
* @return
*/
public
static
List
<
HotSearchList
>
getFengHuangHotData
(){
log
.
info
(
"凤凰新闻热搜开始采集"
);
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
String
url
=
"https://shankapi.ifeng.com/autumn/sogouSearchHotword"
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"凤凰新闻热搜页面连接异常..."
,
e
);
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
JSONArray
jsonArray
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"data"
).
getJSONObject
(
0
).
getJSONArray
(
"item"
);
// log.info(jsonArray);
if
(
jsonArray
!=
null
)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
Integer
rank
=
i
+
1
;
String
name
=
jsonArray
.
getJSONObject
(
i
).
getString
(
"title"
);
String
fenghuangUrl
=
"https://so.ifeng.com/?q="
+
name
;
HotSearchList
hotSearchList
=
new
HotSearchList
(
fenghuangUrl
,
name
,
null
,
rank
,
HotSearchType
.
凤凰新闻热搜
.
name
());
list
.
add
(
hotSearchList
);
}
}
}
log
.
info
(
"{}, 此轮凤凰新闻热搜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"凤凰新闻热搜采集结束"
);
return
list
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
View file @
a4df8c55
...
@@ -33,17 +33,20 @@ public class ThreadOneRun extends Thread {
...
@@ -33,17 +33,20 @@ public class ThreadOneRun extends Thread {
private
void
getHotList
(){
private
void
getHotList
(){
List
<
HotSearchList
>
tengXunlist
=
TengXunCrawler
.
getTengXunHotList
();
List
<
HotSearchList
>
tengXunlist
=
TengXunCrawler
.
getTengXunHotList
();
TipsUtils
.
addHotList
(
"腾讯新闻"
,
tengXunlist
);
TipsUtils
.
addHotList
(
"腾讯新闻"
,
tengXunlist
);
ZhiWeiTools
.
sleep
(
30
00L
);
ZhiWeiTools
.
sleep
(
15
00L
);
List
<
HotSearchList
>
xinLanglist
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
();
List
<
HotSearchList
>
xinLanglist
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
();
TipsUtils
.
addHotList
(
"新浪热榜"
,
xinLanglist
);
TipsUtils
.
addHotList
(
"新浪热榜"
,
xinLanglist
);
ZhiWeiTools
.
sleep
(
30
00L
);
ZhiWeiTools
.
sleep
(
15
00L
);
List
<
HotSearchList
>
souhuList
=
SouhuTopicCrawler
.
getSouhuTopic
();
List
<
HotSearchList
>
souhuList
=
SouhuTopicCrawler
.
getSouhuTopic
();
TipsUtils
.
addHotList
(
"搜狐话题"
,
souhuList
);
TipsUtils
.
addHotList
(
"搜狐话题"
,
souhuList
);
ZhiWeiTools
.
sleep
(
30
00L
);
ZhiWeiTools
.
sleep
(
15
00L
);
List
<
HotSearchList
>
xinLangHotList
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
();
List
<
HotSearchList
>
xinLangHotList
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
();
TipsUtils
.
addHotList
(
"新浪热点"
,
xinLangHotList
);
TipsUtils
.
addHotList
(
"新浪热点"
,
xinLangHotList
);
ZhiWeiTools
.
sleep
(
30
00L
);
ZhiWeiTools
.
sleep
(
15
00L
);
List
<
HotSearchList
>
fengHuangHotList
=
FengHuangSearchCrawler
.
getFengHuangHotList
();
List
<
HotSearchList
>
fengHuangHotList
=
FengHuangSearchCrawler
.
getFengHuangHotList
();
TipsUtils
.
addHotList
(
"凤凰新闻热榜"
,
fengHuangHotList
);
TipsUtils
.
addHotList
(
"凤凰新闻热榜"
,
fengHuangHotList
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
fengHuangHotDataList
=
FengHuangSearchCrawler
.
getFengHuangHotData
();
TipsUtils
.
addHotList
(
"凤凰新闻热搜"
,
fengHuangHotDataList
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment