Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
1c4d8472
Commit
1c4d8472
authored
Aug 14, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
采集程序恢复线程
parent
444bc287
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
212 additions
and
212 deletions
+212
-212
pom.xml
+10
-11
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+13
-13
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+27
-26
src/main/java/com/zhiwei/searchhotcrawler/timer/DouYinUrlHotSearchRun.java
+19
-19
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+17
-17
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
+17
-17
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+11
-11
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
+22
-22
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+29
-29
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuChildHotSearchRun.java
+44
-44
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+3
-3
No files found.
pom.xml
View file @
1c4d8472
...
...
@@ -11,7 +11,6 @@
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<project.reporting.outputEncoding>
UTF-8
</project.reporting.outputEncoding>
<spring.version>
4.2.2.RELEASE
</spring.version>
<quartz.version>
2.2.3
</quartz.version>
</properties>
<developers>
...
...
@@ -57,16 +56,16 @@
<artifactId>
crawler-core
</artifactId>
<version>
0.6.0.4-RELEASE
</version>
</dependency>
<dependency
>
<groupId>
org.quartz-scheduler
</groupId
>
<artifactId>
quartz
</artifactId
>
<version>
${quartz.version}
</version
>
</dependency
>
<dependency
>
<groupId>
org.quartz-scheduler
</groupId
>
<artifactId>
quartz-jobs
</artifactId
>
<version>
${quartz.version}
</version
>
</dependency
>
<!-- <dependency>--
>
<!-- <groupId>org.quartz-scheduler</groupId>--
>
<!-- <artifactId>quartz</artifactId>--
>
<!-- <version>${quartz.version}</version>--
>
<!-- </dependency>--
>
<!-- <dependency>--
>
<!-- <groupId>org.quartz-scheduler</groupId>--
>
<!-- <artifactId>quartz-jobs</artifactId>--
>
<!-- <version>${quartz.version}</version>--
>
<!-- </dependency>--
>
<!-- Spring文件配置 -->
<dependency>
<groupId>
org.springframework
</groupId>
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
1c4d8472
...
...
@@ -46,18 +46,18 @@ public class HotSearchRun {
//采集程序启动
//
new WeiboHotSearchRun().start();
//
new BaiduHotSearchRun().start();
//
//
new SougoHotSearchRun().start();
//
new DouyinHotSearchRun().start();
//
//
new ZhihuHotSearchRun().start();
//
new WeiboSuperTopicRun().start();
//
new WeiboTopicRun().start();
//
//
new ToutiaoHotSearchRun().start();
//
//
new ZhihuTopSearchRun().start();
//
new ZhihuChildHotSearchRun().start();
//
new ThreadOneRun().start();
//
//
//抖音链接更新
//
new DouYinUrlHotSearchRun().start();
new
WeiboHotSearchRun
().
start
();
new
BaiduHotSearchRun
().
start
();
// new SougoHotSearchRun().start();
new
DouyinHotSearchRun
().
start
();
// new ZhihuHotSearchRun().start();
new
WeiboSuperTopicRun
().
start
();
new
WeiboTopicRun
().
start
();
// new ToutiaoHotSearchRun().start();
// new ZhihuTopSearchRun().start();
new
ZhihuChildHotSearchRun
().
start
();
new
ThreadOneRun
().
start
();
// //抖音链接更新
new
DouYinUrlHotSearchRun
().
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
1c4d8472
...
...
@@ -42,32 +42,32 @@ public class BaiduHotSearchRun extends Thread{
private
void
getHotList
()
{
//
log.info("百度风云榜采集开始........");
//
//
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
//
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(
);
//
log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
//
//
if(Objects.nonNull(list) && !list.isEmpty()) {
//
//
List<Document> data = hotSearchCacheDAO.addData(list);
//
//
hotSearchDAO.addHotSearchList(data);
//
//
TipsUtils.recoveryTips("百度热搜",new Date());
//
//
} else {
//
//
TipsUtils.sendTips("百度热搜",new Date());
//
//
}
//
TipsUtils.addHotList("百度热搜",baiduList);
//
log.info("百度风云榜采集结束........");
//
ZhiWeiTools.sleep(2000L);
//
log.info("搜狗微信采集开始........");
// List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(
);
//
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
//
TipsUtils.addHotList("搜狗微信热搜",sougouList);
//
log.info("搜狗微信采集结束........");
//
ZhiWeiTools.sleep(2000L);
//
log.info("知乎话题采集开始........");
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(
);
//
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
//
TipsUtils.addHotList("知乎热搜",zhihuList);
//
log.info("知乎话题采集结束........");
log
.
info
(
"百度风云榜采集开始........"
);
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List
<
HotSearchList
>
baiduList
=
BaiDuHotSearchCrawler
.
baiduHotSearch
(
new
Date
()
);
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
baiduList
!=
null
?
baiduList
.
size
()
:
0
));
// if(Objects.nonNull(list) && !list.isEmpty()) {
// List<Document> data = hotSearchCacheDAO.addData(list);
// hotSearchDAO.addHotSearchList(data);
// TipsUtils.recoveryTips("百度热搜",new Date());
// } else {
// TipsUtils.sendTips("百度热搜",new Date());
// }
TipsUtils
.
addHotList
(
"百度热搜"
,
baiduList
);
log
.
info
(
"百度风云榜采集结束........"
);
ZhiWeiTools
.
sleep
(
2000L
);
log
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
sougouList
=
SougoHotSearchCrawler
.
sougoHotSearch
(
new
Date
()
);
log
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
sougouList
!=
null
?
sougouList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"搜狗微信热搜"
,
sougouList
);
log
.
info
(
"搜狗微信采集结束........"
);
ZhiWeiTools
.
sleep
(
2000L
);
log
.
info
(
"知乎话题采集开始........"
);
List
<
HotSearchList
>
zhihuList
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
(
new
Date
()
);
log
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
zhihuList
!=
null
?
zhihuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"知乎热搜"
,
zhihuList
);
log
.
info
(
"知乎话题采集结束........"
);
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/timer/DouYinUrlHotSearchRun.java
View file @
1c4d8472
...
...
@@ -38,24 +38,24 @@ public class DouYinUrlHotSearchRun extends Thread {
* @return void
*/
private
void
getUrlList
()
{
//
log.info("抖音链接更新开始........");
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//
List<HotSearchList> list = DouyinHotSearchRun.list;
//
if(list != null && list.size()>0) {
//
for (int i = 0; i < list.size(); i++) {
//
String name = list.get(i).getName();
//
String id = name+"_"+list.get(i).getType();
//
String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
//
if(url != null) {
//
Document document = new Document();
//
document.put("id", id);
//
document.put("url", url);
//
hotSearchCacheDAO.updateDouyinUrl(document);
//
}
//
}
//
log.info("抖音链接更新结束........");
//
}else{
//
log.info("抖音链接更新失败,获取抖音数据为空");
//
}
log
.
info
(
"抖音链接更新开始........"
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchRun
.
list
;
if
(
list
!=
null
&&
list
.
size
()>
0
)
{
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++)
{
String
name
=
list
.
get
(
i
).
getName
();
String
id
=
name
+
"_"
+
list
.
get
(
i
).
getType
();
String
url
=
DouyinHotSearchCrawler
.
getDouyinUrl
(
"https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="
+
name
);
if
(
url
!=
null
)
{
Document
document
=
new
Document
();
document
.
put
(
"id"
,
id
);
document
.
put
(
"url"
,
url
);
hotSearchCacheDAO
.
updateDouyinUrl
(
document
);
}
}
log
.
info
(
"抖音链接更新结束........"
);
}
else
{
log
.
info
(
"抖音链接更新失败,获取抖音数据为空"
);
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
1c4d8472
...
...
@@ -48,23 +48,23 @@ public class DouyinHotSearchRun extends Thread{
* @return void
*/
private
void
getHotList
()
{
//
log.info("抖音热搜榜采集开始........");
// list = DouyinHotSearchCrawler.getMobileDouyinHotList(
);
//
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
//
TipsUtils.addHotList("抖音热搜",list);
//
log.info("抖音热搜榜采集结束........");
//
ZhiWeiTools.sleep(3000L);
//
log.info("今日头条热搜采集开始........");
// List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(
);
//
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(toutiaoList != null ? toutiaoList.size() : 0));
//
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
//
log.info("今日头条热搜采集结束........");
//
ZhiWeiTools.sleep(3000L);
//
log.info("知乎热搜榜单采集开始...");
// List<HotSearchList> zhihuList = ZhihuTopicSearchCrawler.getZhihuTopicSearch(
);
//
log.info("{}, 知乎热搜榜单此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
//
TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),zhihuList);
//
log.info("知乎热搜榜单采集结束........");
log
.
info
(
"抖音热搜榜采集开始........"
);
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
(
new
Date
()
);
log
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"抖音热搜"
,
list
);
log
.
info
(
"抖音热搜榜采集结束........"
);
ZhiWeiTools
.
sleep
(
3000L
);
log
.
info
(
"今日头条热搜采集开始........"
);
List
<
HotSearchList
>
toutiaoList
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
new
Date
()
);
log
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
log
.
info
(
"今日头条热搜采集结束........"
);
ZhiWeiTools
.
sleep
(
3000L
);
log
.
info
(
"知乎热搜榜单采集开始..."
);
List
<
HotSearchList
>
zhihuList
=
ZhihuTopicSearchCrawler
.
getZhihuTopicSearch
(
new
Date
()
);
log
.
info
(
"{}, 知乎热搜榜单此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
zhihuList
!=
null
?
zhihuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
知乎热搜榜单
.
name
(),
zhihuList
);
log
.
info
(
"知乎热搜榜单采集结束........"
);
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
View file @
1c4d8472
...
...
@@ -31,22 +31,22 @@ public class ThreadOneRun extends Thread {
}
private
void
getHotList
(){
// List<HotSearchList> tengXunlist = TengXunCrawler.getTengXunHotList(
);
//
TipsUtils.addHotList("腾讯新闻",tengXunlist);
//
ZhiWeiTools.sleep(1500L);
// List<HotSearchList> xinLanglist = XinLangHotSearchCrawler.getXinLangHotSearch(
);
//
TipsUtils.addHotList("新浪热榜",xinLanglist);
//
ZhiWeiTools.sleep(1500L);
// List<HotSearchList> souhuList = SouhuTopicCrawler.getSouhuTopic(
);
//
TipsUtils.addHotList("搜狐话题",souhuList);
//
ZhiWeiTools.sleep(1500L);
// List<HotSearchList> xinLangHotList = XinLangHotSearchCrawler.getXinLangHotSpot(
);
//
TipsUtils.addHotList("新浪热点",xinLangHotList);
//
ZhiWeiTools.sleep(1500L);
// List<HotSearchList> fengHuangHotList = FengHuangSearchCrawler.getFengHuangHotList(
);
//
TipsUtils.addHotList("凤凰新闻热榜",fengHuangHotList);
//
ZhiWeiTools.sleep(1500L);
// List<HotSearchList> fengHuangHotDataList = FengHuangSearchCrawler.getFengHuangHotData(
);
//
TipsUtils.addHotList("凤凰新闻热搜",fengHuangHotDataList);
List
<
HotSearchList
>
tengXunlist
=
TengXunCrawler
.
getTengXunHotList
(
new
Date
()
);
TipsUtils
.
addHotList
(
"腾讯新闻"
,
tengXunlist
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
xinLanglist
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
(
new
Date
()
);
TipsUtils
.
addHotList
(
"新浪热榜"
,
xinLanglist
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
souhuList
=
SouhuTopicCrawler
.
getSouhuTopic
(
new
Date
()
);
TipsUtils
.
addHotList
(
"搜狐话题"
,
souhuList
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
xinLangHotList
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
(
new
Date
()
);
TipsUtils
.
addHotList
(
"新浪热点"
,
xinLangHotList
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
fengHuangHotList
=
FengHuangSearchCrawler
.
getFengHuangHotData
(
new
Date
()
);
TipsUtils
.
addHotList
(
"凤凰新闻热榜"
,
fengHuangHotList
);
ZhiWeiTools
.
sleep
(
1500L
);
List
<
HotSearchList
>
fengHuangHotDataList
=
FengHuangSearchCrawler
.
getFengHuangHotSearch
(
new
Date
()
);
TipsUtils
.
addHotList
(
"凤凰新闻热搜"
,
fengHuangHotDataList
);
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
1c4d8472
...
...
@@ -34,17 +34,17 @@ public class WeiboHotSearchRun extends Thread{
private
void
getHotList
()
{
//
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(
);
//
log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
//
if(list == null || list.size() == 0){
//
TipsUtils.sendTips("微博热搜",new Date());
//
}else {
//
List<Document> data = hotSearchCacheDAO.addData(list);
//
hotSearchDAO.addHotSearchList(data);
//
TipsUtils.recoveryTips("微博热搜",new Date());
//
}
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
(
new
Date
()
);
log
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
"微博热搜"
,
new
Date
());
}
else
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
recoveryTips
(
"微博热搜"
,
new
Date
());
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
View file @
1c4d8472
...
...
@@ -32,28 +32,28 @@ public class WeiboSuperTopicRun extends Thread{
private
void
getTopicList
()
{
//
WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
//
log.info("微博超话采集开始........");
//
List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
//
log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
//
List<Document> data = new ArrayList<>();
//
for(WeiboSuperTopic topic : list){
//
log.info("topic::::{}", topic);
//
Document doc = new Document();
//
doc.put("_id", topic.getId());
//
doc.put("name", topic.getTopicName());
//
doc.put("rank", topic.getRank());
//
doc.put("score_num", topic.getScore());
//
doc.put("fensi_num", topic.getFensi());
//
doc.put("post_num", topic.getPostNum());
//
doc.put("type", topic.getType());
//
doc.put("day", topic.getDay());
//
doc.put("time", topic.getTime());
//
doc.put("url", topic.getUrl());
//
data.add(doc);
//
}
//
weiboTopicDAO.addTopicList(data);
//
log.info("微博话题采集结束........");
WeiboSuperTopicDAO
weiboTopicDAO
=
new
WeiboSuperTopicDAO
();
log
.
info
(
"微博超话采集开始........"
);
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
log
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
Document
>
data
=
new
ArrayList
<>();
for
(
WeiboSuperTopic
topic
:
list
){
log
.
info
(
"topic::::{}"
,
topic
);
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"score_num"
,
topic
.
getScore
());
doc
.
put
(
"fensi_num"
,
topic
.
getFensi
());
doc
.
put
(
"post_num"
,
topic
.
getPostNum
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
data
.
add
(
doc
);
}
weiboTopicDAO
.
addTopicList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
1c4d8472
...
...
@@ -33,35 +33,35 @@ public class WeiboTopicRun extends Thread{
private
void
getTopicList
()
{
//
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//
log.info("微博话题采集开始........");
// List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(
);
//
log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
//
if(list == null || list.size() == 0){
//
TipsUtils.sendTips("微博话题",new Date());
//
}else{
//
TipsUtils.recoveryTips("微博话题",new Date());
//
}
//
List<Document> data = new ArrayList<>();
//
for(HotSearchList topic : list){
//
Document doc = new Document();
//
doc.put("_id", topic.getId());
//
doc.put("name", topic.getName());
//
doc.put("url", topic.getUrl());
//
doc.put("count", topic.getCount());
//
doc.put("hot", topic.getHot());
//
doc.put("day", topic.getDay());
//
doc.put("time", topic.getTime());
//
doc.put("rank", topic.getRank());
//
doc.put("type", topic.getType());
//
doc.put("topic_lead", topic.getTopicLead());
//
doc.put("comment_count", topic.getCommentCount());
//
data.add(doc);
//
hotSearchCacheDAO.addAndUpdateData(doc);
//
}
//
weiboHotSearchDAO.addHotSearchList(data);
//
log.info("微博话题采集结束........");
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"微博话题采集开始........"
);
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
(
new
Date
()
);
log
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
"微博话题"
,
new
Date
());
}
else
{
TipsUtils
.
recoveryTips
(
"微博话题"
,
new
Date
());
}
List
<
Document
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
topic
:
list
){
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getName
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
doc
.
put
(
"count"
,
topic
.
getCount
());
doc
.
put
(
"hot"
,
topic
.
getHot
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"topic_lead"
,
topic
.
getTopicLead
());
doc
.
put
(
"comment_count"
,
topic
.
getCommentCount
());
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuChildHotSearchRun.java
View file @
1c4d8472
...
...
@@ -35,50 +35,50 @@ public class ZhihuChildHotSearchRun extends Thread {
}
private
void
getHotList
()
{
//
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//
for (int i = 0; i < childType.size(); i++) {
//
String name = this.getTypeName(childType.get(i));
//
if (!"".equals(name)) {
//
log.info("知乎{}话题热榜采集开始...", name);
// List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(childType.get(i), name
);
//
log.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
//
if (list == null || list.size() == 0) {
//
TipsUtils.sendTips("知乎热搜"+name+"分类", new Date());
//
}else {
//
List<Document> data = hotSearchCacheDAO.addData(list);
//
hotSearchDAO.addHotSearchList(data);
//
TipsUtils.recoveryTips("知乎热搜"+name+"分类",new Date());
//
}
//
log.info("知乎{}话题热榜采集结束...", name);
//
ZhiWeiTools.sleep(3000);
//
}
//
}
//
//网易实时热榜采集
//
ZhiWeiTools.sleep(3000L);
// List<HotSearchList> wangyiHotSearchList = WangYiHotSearchCrawler.getWangYiHotSearch(
);
//
TipsUtils.addHotList("网易热榜",wangyiHotSearchList);
//
//网易跟帖热议采集
//
ZhiWeiTools.sleep(3000L);
// List<HotSearchList> wangyiComment = WangYiHotSearchCrawler.getWangYicomment(
);
//
TipsUtils.addHotList("网易跟帖热议",wangyiComment);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
for
(
int
i
=
0
;
i
<
childType
.
size
();
i
++)
{
String
name
=
this
.
getTypeName
(
childType
.
get
(
i
));
if
(!
""
.
equals
(
name
))
{
log
.
info
(
"知乎{}话题热榜采集开始..."
,
name
);
List
<
HotSearchList
>
list
=
ZhihuChildHotSearchCrawler
.
getZhihuTopicSearch
(
childType
.
get
(
i
),
name
,
new
Date
()
);
log
.
info
(
"{}, 知乎{}话题此轮采集到的数据量为:{}"
,
new
Date
(),
name
,
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
list
==
null
||
list
.
size
()
==
0
)
{
TipsUtils
.
sendTips
(
"知乎热搜"
+
name
+
"分类"
,
new
Date
());
}
else
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
recoveryTips
(
"知乎热搜"
+
name
+
"分类"
,
new
Date
());
}
log
.
info
(
"知乎{}话题热榜采集结束..."
,
name
);
ZhiWeiTools
.
sleep
(
3000
);
}
}
//网易实时热榜采集
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
wangyiHotSearchList
=
WangYiHotSearchCrawler
.
getWangYiHotSearch
(
new
Date
()
);
TipsUtils
.
addHotList
(
"网易热榜"
,
wangyiHotSearchList
);
//网易跟帖热议采集
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
wangyiComment
=
WangYiHotSearchCrawler
.
getWangYicomment
(
new
Date
()
);
TipsUtils
.
addHotList
(
"网易跟帖热议"
,
wangyiComment
);
}
//
private String getTypeName(String type){
//
String name;
//
switch (type) {
//
case "digital":
//
name = "数码";
//
break;
//
case "focus":
//
name = "国际";
//
break;
//
case "depth":
//
name = "时事";
//
break;
//
default:
//
name = "";
//
}
//
return name;
//
}
private
String
getTypeName
(
String
type
){
String
name
;
switch
(
type
)
{
case
"digital"
:
name
=
"数码"
;
break
;
case
"focus"
:
name
=
"国际"
;
break
;
case
"depth"
:
name
=
"时事"
;
break
;
default
:
name
=
""
;
}
return
name
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
1c4d8472
...
...
@@ -21,9 +21,9 @@ import java.util.ArrayList;
import
java.util.Date
;
import
java.util.List
;
@Component
@EnableScheduling
@EnableAsync
//
@Component
//
@EnableScheduling
//
@EnableAsync
public
class
GatherTimer
{
private
Logger
logger
=
LoggerFactory
.
getLogger
(
GatherTimer
.
class
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment