Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
69084be0
Commit
69084be0
authored
Jul 12, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'revert-
37d43810
' into 'working'
Revert "Revert "新增微博话题采集话题贡献者,关于功能"" See merge request
!98
parents
9e1b3d9f
600d1086
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
545 additions
and
512 deletions
+545
-512
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+56
-29
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+483
-483
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
View file @
69084be0
...
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this
.
profileImageUrl
=
profileImageUrl
;
}
public
WeiBoUser
(
String
userId
,
String
userName
,
String
topic
,
Date
time
)
{
this
.
userId
=
userId
;
this
.
userName
=
userName
;
this
.
topic
=
topic
;
this
.
time
=
time
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
69084be0
...
...
@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
}
/**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于
* 微博热搜数据更新话题贡献者排行,关于
*
* @param document
* @return
*/
public
static
Document
weiboUpdatePC
(
Document
document
)
{
document
.
getString
(
"name"
);
String
name
=
document
.
getString
(
"name"
);
String
gb
=
"#"
+
name
+
"#"
;
String
encode
=
null
;
String
topic
=
document
.
getString
(
"name"
);
String
gb
=
"#"
+
topic
+
"#"
;
String
encode
=
null
;
try
{
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
}
catch
(
UnsupportedEncodingException
e
)
{
log
.
error
(
"字符解析成URl模式异常"
,
e
);
}
...
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
//获取贡献者信息
try
{
Elements
li
=
documen
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
weiBoUserDao
=
new
WeiBoUserDao
();
}
if
(
Objects
.
nonNull
(
li
))
{
Date
date
=
new
Date
();
for
(
Element
element
:
li
)
{
WeiBoUser
weiBoUser
=
new
WeiBoUser
();
//获取用户名
String
userName
=
element
.
select
(
"a.name"
).
text
(
);
//获取用户id
String
attr
=
element
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
String
userId
=
attr
.
substring
(
14
);
String
type
=
"话题贡献者"
;
String
id
=
userId
+
"_"
+
type
+
"_"
+
name
;
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
weiBoUser
.
setUserName
(
userName
);
weiBoUser
.
setUserId
(
userId
)
;
weiBoUser
.
setTopic
(
name
);
weiBoUser
.
setTime
(
date
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
Elements
cardUser
=
documen
.
select
(
"div.card-user"
);
for
(
Element
element
:
cardUser
)
{
if
(!
element
.
select
(
"div.card-head"
).
text
().
isEmpty
())
{
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
nonNull
(
li
))
{
//循环获取话题贡献者相关信息
for
(
Element
eleme
:
li
)
{
String
type
=
"话题贡献者"
;
writeUser
(
eleme
,
type
,
topic
);
}
}
}
else
{
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
)
;
if
(
Objects
.
nonNull
(
li
))
{
//循环获取话题贡献者相关信息
for
(
Element
eleme
:
li
)
{
String
type
=
"当事人"
;
writeUser
(
eleme
,
type
,
topic
);
}
}
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
}
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
if
(
Objects
.
nonNull
(
dt
))
{
//获取微博关于的相关信息
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Document
dtDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
...
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
}
return
docm
;
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
}
}
return
document
;
}
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private
static
void
writeUser
(
Element
eleme
,
String
type
,
String
topic
)
{
Date
date
=
new
Date
();
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
weiBoUserDao
=
new
WeiBoUserDao
();
}
//获取用户名
String
userName
=
eleme
.
select
(
"a.name"
).
text
();
String
attr
=
eleme
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
//获取用户id
String
userId
=
attr
.
substring
(
14
);
String
id
=
userId
+
"_"
+
type
+
"_"
+
topic
;
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userName
,
userId
,
topic
,
date
);
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
/**
* 解析微博信息
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
69084be0
...
...
@@ -29,496 +29,496 @@ import java.util.*;
@EnableScheduling
@EnableAsync
public
class
GatherTimer
{
//
// private Logger logger = LoggerFactory.getLogger(GatherTimer.class);
//
// private RedisDao redisDao = new RedisDao();
// /** 知乎数码子分类 */
// private String DIGITAL = "digital";
// /** 知乎国际子分类 */
// private String FOCUS = "focus";
// /** 知乎时事子分类 */
// private String DEPTH = "depth";
//
//
// /**
// * 虎嗅热文推荐的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ?")
// public void crawlerHuXiu() {
// logger.info("虎嗅热文推荐开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> huXiuList = HuXiuHotSearchCrawler.HuXiuHotArticleRecommended(date);
// logger.info("{}, 虎嗅热文推荐此轮采集到的数据量为:{}", new Date(), Integer.valueOf(huXiuList != null ? huXiuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.虎嗅热文推荐.name(), huXiuList);
// logger.info("虎嗅热文推荐采集结束...");
//
// /**
// * 36氪人气榜的采集
// */
// logger.info("36氪人气榜开始采集...");
// List<HotSearchList> list36Kr = HotSearch36KrCrawler.hotSearch36Kr(date);
// logger.info("{}, 36氪人气榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list36Kr != null ? list36Kr.size() : 0));
// TipsUtils.addHotList(HotSearchType.人气榜36氪.name(), list36Kr);
// logger.info("36氪人气榜采集结束...");
// }
//
// /**
// * 微博热搜的采集
// */
private
Logger
logger
=
LoggerFactory
.
getLogger
(
GatherTimer
.
class
);
private
RedisDao
redisDao
=
new
RedisDao
();
/** 知乎数码子分类 */
private
String
DIGITAL
=
"digital"
;
/** 知乎国际子分类 */
private
String
FOCUS
=
"focus"
;
/** 知乎时事子分类 */
private
String
DEPTH
=
"depth"
;
/**
* 虎嗅热文推荐的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ?"
)
public
void
crawlerHuXiu
()
{
logger
.
info
(
"虎嗅热文推荐开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
huXiuList
=
HuXiuHotSearchCrawler
.
HuXiuHotArticleRecommended
(
date
);
logger
.
info
(
"{}, 虎嗅热文推荐此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
huXiuList
!=
null
?
huXiuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
虎嗅热文推荐
.
name
(),
huXiuList
);
logger
.
info
(
"虎嗅热文推荐采集结束..."
);
/**
* 36氪人气榜的采集
*/
logger
.
info
(
"36氪人气榜开始采集..."
);
List
<
HotSearchList
>
list36Kr
=
HotSearch36KrCrawler
.
hotSearch36Kr
(
date
);
logger
.
info
(
"{}, 36氪人气榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list36Kr
!=
null
?
list36Kr
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
人气榜
36
氪
.
name
(),
list36Kr
);
logger
.
info
(
"36氪人气榜采集结束..."
);
}
/**
* 微博热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeiBo
(){
logger
.
info
(
"微博热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
weiboList
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
(
date
);
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
weiboList
!=
null
?
weiboList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
微博热搜
.
name
(),
weiboList
);
logger
.
info
(
"微博热搜采集结束..."
);
}
/**
* 微博热搜导语,阅读量,讨论量更新
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"45 0/10 * * * ? "
)
public
void
updateWeiBo
(){
logger
.
info
(
"微博热搜导语更新..."
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
Set
<
String
>
hotSearchIdSet
=
redisDao
.
getRedisSetData
(
RedisConfig
.
WEIBO_HOTSEARCHIDS
);
redisDao
.
removeRedis
(
RedisConfig
.
WEIBO_HOTSEARCHIDS
);
Iterator
<
String
>
hotSearchIterator
=
hotSearchIdSet
.
iterator
();
while
(
hotSearchIterator
.
hasNext
()){
String
id
=
hotSearchIterator
.
next
();
Document
document
=
hotSearchCacheDAO
.
getHotSearchById
(
id
);
if
(
document
!=
null
){
document
=
WeiboHotSearchCrawler
.
weiboUpdate
(
document
);
if
(
document
.
containsKey
(
"topicLead"
)
||
document
.
containsKey
(
"readCount"
)
||
document
.
containsKey
(
"discussCount"
))
{
hotSearchCacheDAO
.
updateWeibo
(
document
,
id
);
}
ZhiWeiTools
.
sleep
(
3000L
);
}
}
logger
.
info
(
"微博热搜导语更新结束..."
);
}
/**
* 今日头条热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerTouTiao
(){
logger
.
info
(
"今日头条热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
toutiaoList
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
date
);
logger
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
logger
.
info
(
"今日头条热搜采集结束..."
);
logger
.
info
(
"今日头条热搜详情趋势阅读量更新..."
);
TouTiaoExecutor
.
countTouTiaoReadCount
(
toutiaoList
);
}
/**
* 百度热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerBaiDu
(){
logger
.
info
(
"百度热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
baiduList
=
BaiDuHotSearchCrawler
.
baiduHotSearch
(
date
);
logger
.
info
(
"{}, 百度热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
baiduList
!=
null
?
baiduList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
百度热搜
.
name
(),
baiduList
);
logger
.
info
(
"百度热搜采集结束..."
);
}
/**
* 抖音热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerDouYin
(){
logger
.
info
(
"抖音热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
douyinList
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
(
date
);
logger
.
info
(
"{}, 抖音热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
douyinList
!=
null
?
douyinList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
抖音热搜
.
name
(),
douyinList
);
logger
.
info
(
"抖音热搜采集结束..."
);
}
/**
* 抖音链接的更新
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0/5 * * * ? "
)
public
void
updateDouYinUrl
(){
logger
.
info
(
"抖音链接更新开始..."
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
douyinList
=
DouyinHotSearchCrawler
.
list
;
if
(
douyinList
!=
null
&&
douyinList
.
size
()>
0
){
for
(
int
i
=
0
;
i
<
douyinList
.
size
();
i
++){
String
name
=
douyinList
.
get
(
i
).
getName
();
String
id
=
name
+
"_"
+
douyinList
.
get
(
i
).
getType
();
String
url
=
DouyinHotSearchCrawler
.
getDouyinUrl
(
"https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="
+
name
);
if
(
url
!=
null
)
{
Document
document
=
new
Document
();
document
.
put
(
"id"
,
id
);
document
.
put
(
"url"
,
url
);
hotSearchCacheDAO
.
updateDouyinUrl
(
document
);
}
}
logger
.
info
(
"抖音链接更新结束"
);
}
else
{
logger
.
info
(
"抖音链接更新失败,抖音热搜列表获取为空。"
);
}
}
/**
* 知乎热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerZhihu
(){
logger
.
info
(
"知乎热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
zhihuList
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
(
date
);
logger
.
info
(
"{}, 知乎热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
zhihuList
!=
null
?
zhihuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
知乎热搜
.
name
(),
zhihuList
);
logger
.
info
(
"知乎热搜采集结束..."
);
}
/**
* 搜狗微信热词的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeChat
(){
logger
.
info
(
"搜狗微信热词开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
(
date
);
logger
.
info
(
"{}, 搜狗微信热词采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
搜狗微信热搜
.
name
(),
list
);
logger
.
info
(
"搜狗微信热词采集结束..."
);
}
/**
* 搜狗微信热搜的采集(app端采集链接)
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
ceawlerSougouHotData
(){
logger
.
info
(
"搜狗微信热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougouHotDataCrawler
(
date
);
logger
.
info
(
"{}, 搜狗微信热搜此轮采集到的数据量为:{}"
,
new
Date
(),
list
!=
null
?
list
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
搜狗微信客户端热搜
.
name
(),
list
);
logger
.
info
(
"搜狗微信热搜采集结束..."
);
}
/**
* 微博话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeiBoTopic
(){
logger
.
info
(
"微博话题开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
(
date
);
logger
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
微博话题
.
name
(),
list
);
logger
.
info
(
"微博话题采集结束..."
);
}
/**
* 腾讯新闻热点的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerTengXun
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
TengXunCrawler
.
getTengXunHotList
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
腾讯新闻
.
name
(),
list
);
}
/**
* 新浪热点的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerXinLangHotSpot
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
新浪热点
.
name
(),
list
);
}
/**
* 新浪热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerXinLangHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
新浪热榜
.
name
(),
list
);
}
/**
* 网易新闻热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerWangYiHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WangYiHotSearchCrawler
.
getWangYiHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
网易热榜
.
name
(),
list
);
}
/**
* 网易新闻跟帖热议的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerWangYiHotComment
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WangYiHotSearchCrawler
.
getWangYicomment
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
网易跟帖热议
.
name
(),
list
);
}
/**
* 凤凰新闻热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerFengHuangHotData
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
FengHuangSearchCrawler
.
getFengHuangHotData
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
凤凰新闻热榜
.
name
(),
list
);
}
/**
* 凤凰新闻热搜的采集
*/
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeiBo(){
// logger.info("微博热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> weiboList = WeiboHotSearchCrawler.weiboHotSearchByPhone(date);
// logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), weiboList != null ? weiboList.size() : 0);
// TipsUtils.addHotList(HotSearchType.微博热搜.name(),weiboList);
// logger.info("微博热搜采集结束...");
// }
//
// /**
// * 微博热搜导语,阅读量,讨论量更新
// */
// @Scheduled(cron = "10 * * * * ? ")
public
void
crawlerFengHuangHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
FengHuangSearchCrawler
.
getFengHuangHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
凤凰新闻热搜
.
name
(),
list
);
}
/**
* 腾讯较真辟谣榜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerTengXunVerificationHotSearch
(){
logger
.
info
(
"{},腾讯较真辟谣榜开始采集"
,
new
Date
());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
TengXunCrawler
.
getTengXunVerificationList
(
date
);
logger
.
info
(
"腾讯较真辟谣榜本轮采集数量:{}"
,
list
.
size
());
TipsUtils
.
addHotList
(
HotSearchType
.
腾讯较真榜
.
name
(),
list
);
logger
.
info
(
"{},腾讯较真辟谣榜采集结束"
,
new
Date
());
}
/**
* 搜狐话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerSouHuTopic
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SouhuTopicCrawler
.
getSouhuTopic
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
搜狐话题
.
name
(),
list
);
}
/**
* 知乎热搜话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhihuHotTopic
(){
logger
.
info
(
"知乎热搜话题开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
ZhihuTopicSearchCrawler
.
getZhihuTopicSearch
(
date
);
logger
.
info
(
"{}, 知乎热搜话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
知乎热搜榜单
.
name
(),
list
);
logger
.
info
(
"知乎热搜话题采集结束..."
);
}
/**
* 微博预热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerWeiBoPreheat
(){
logger
.
info
(
"微博预热榜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboPreheatSearch
(
date
);
logger
.
info
(
"{},微博预热榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
微博预热榜
.
name
(),
list
);
logger
.
info
(
"微博预热榜采集结束..."
);
}
/**
* 知乎热搜数码分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuDigital
(){
this
.
crawlerZhiHuChild
(
DIGITAL
);
}
/**
* 知乎热搜国际分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuFocus
(){
this
.
crawlerZhiHuChild
(
FOCUS
);
}
/**
* 知乎热搜时事分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuDepth
(){
this
.
crawlerZhiHuChild
(
DEPTH
);
}
/**
* maimai采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 0/30 * * * ? "
)
public
void
crawlerMaiMaiHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
date
);
int
i
=
0
;
while
(
list
.
size
()==
0
&&
i
<
10
){
ZhiWeiTools
.
sleep
(
5000L
);
list
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
date
);
i
++;
}
TipsUtils
.
addHotList
(
HotSearchType
.
脉脉热榜
.
name
(),
list
);
}
/**
* B站排行榜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 * * * * ? "
)
public
void
crawlerBilibiliHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
BililiCrawler
.
getBilibiliHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
B
站排行榜
.
name
(),
list
);
}
/**
* B站热搜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 * * * * ? "
)
public
void
crawlerBilibiliHotData
()
{
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
BililiCrawler
.
getBiHotData
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
B
站热搜
.
name
(),
list
);
}
/**
* 微博超话的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0 0/3 * * ? "
)
public
void
crawlerWeiBoSuperTopic
(){
logger
.
info
(
"微博超话采集开始........"
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
WeiboSuperTopicDAO
weiboTopicDAO
=
new
WeiboSuperTopicDAO
();
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
Document
>
data
=
new
ArrayList
<>();
for
(
WeiboSuperTopic
topic
:
list
){
logger
.
info
(
"topic::::{}"
,
topic
);
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"score_num"
,
topic
.
getScore
());
doc
.
put
(
"fensi_num"
,
topic
.
getFensi
());
doc
.
put
(
"post_num"
,
topic
.
getPostNum
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
data
.
add
(
doc
);
}
weiboTopicDAO
.
addTopicList
(
data
);
logger
.
info
(
"微博话题采集结束........"
);
}
// @Async(value = "myScheduler")
// @Scheduled(cron = "45 0/10 * * * ? ")
// public void updateWeiBo(){
// logger.info("微博热搜导语更新...");
// @Scheduled(cron = "0 05 09 * * ? ")
// public void updateWeiboHistory(){
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// Set<String> hotSearchIdSet = redisDao.getRedisSetData(RedisConfig.WEIBO_HOTSEARCHIDS);
// redisDao.removeRedis(RedisConfig.WEIBO_HOTSEARCHIDS);
// Iterator<String> hotSearchIterator = hotSearchIdSet.iterator();
// while (hotSearchIterator.hasNext()){
// String id = hotSearchIterator.next();
// Document document = hotSearchCacheDAO.getHotSearchById(id);
// List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
// int i=0;
// for (Document document : documentList){
// document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
// if(document != null){
// document = WeiboHotSearchCrawler.weiboUpdate(document);
// if(document.containsKey("topicLead") || document.containsKey("readCount") || document.containsKey("discussCount")) {
// hotSearchCacheDAO.updateWeibo(document, id);
// }
// ZhiWeiTools.sleep(3000L);
// }
// }
// logger.info("微博热搜导语更新结束...");
// }
//
// /**
// * 今日头条热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerTouTiao(){
// logger.info("今日头条热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date);
// logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
// TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
// logger.info("今日头条热搜采集结束...");
// logger.info("今日头条热搜详情趋势阅读量更新...");
// TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// }
//
// /**
// * 百度热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerBaiDu(){
// logger.info("百度热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(date);
// logger.info("{}, 百度热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
// TipsUtils.addHotList(HotSearchType.百度热搜.name(),baiduList);
// logger.info("百度热搜采集结束...");
// }
//
// /**
// * 抖音热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerDouYin(){
// logger.info("抖音热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.getMobileDouyinHotList(date);
// logger.info("{}, 抖音热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(douyinList != null ? douyinList.size() : 0));
// TipsUtils.addHotList(HotSearchType.抖音热搜.name(),douyinList);
// logger.info("抖音热搜采集结束...");
// }
//
// /**
// * 抖音链接的更新
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0/5 * * * ? ")
// public void updateDouYinUrl(){
// logger.info("抖音链接更新开始...");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.list;
// if(douyinList!=null && douyinList.size()>0){
// for(int i=0; i<douyinList.size(); i++){
// String name = douyinList.get(i).getName();
// String id = name+"_"+douyinList.get(i).getType();
// String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
// if(url != null) {
// Document document = new Document();
// document.put("id", id);
// document.put("url", url);
// hotSearchCacheDAO.updateDouyinUrl(document);
// }
// hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
// ZhiWeiTools.sleep(500L);
// }
// logger.info("抖音链接更新结束");
// }else{
// logger.info("抖音链接更新失败,抖音热搜列表获取为空。");
// }
// }
//
// /**
// * 知乎热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerZhihu(){
// logger.info("知乎热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(date);
// logger.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜.name(),zhihuList);
// logger.info("知乎热搜采集结束...");
// }
//
// /**
// * 搜狗微信热词的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeChat(){
// logger.info("搜狗微信热词开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(date);
// logger.info("{}, 搜狗微信热词采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.搜狗微信热搜.name(),list);
// logger.info("搜狗微信热词采集结束...");
// }
//
// /**
// * 搜狗微信热搜的采集(app端采集链接)
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void ceawlerSougouHotData(){
// logger.info("搜狗微信热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougouHotDataCrawler(date);
// logger.info("{}, 搜狗微信热搜此轮采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
// TipsUtils.addHotList(HotSearchType.搜狗微信客户端热搜.name(),list);
// logger.info("搜狗微信热搜采集结束...");
// }
//
// /**
// * 微博话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeiBoTopic(){
// logger.info("微博话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(date);
// logger.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博话题.name(),list);
// logger.info("微博话题采集结束...");
// }
//
// /**
// * 腾讯新闻热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXun(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunHotList(date);
// TipsUtils.addHotList(HotSearchType.腾讯新闻.name(),list);
// }
//
// /**
// * 新浪热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSpot(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSpot(date);
// TipsUtils.addHotList(HotSearchType.新浪热点.name(),list);
// }
//
// /**
// * 新浪热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.新浪热榜.name(),list);
// }
//
// /**
// * 网易新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYiHotSearch(date);
// TipsUtils.addHotList(HotSearchType.网易热榜.name(),list);
// }
//
// /**
// * 网易新闻跟帖热议的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotComment(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYicomment(date);
// TipsUtils.addHotList(HotSearchType.网易跟帖热议.name(),list);
// }
//
// /**
// * 凤凰新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotData(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotData(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热榜.name(),list);
// }
//
// /**
// * 凤凰新闻热搜的采集
// */
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热搜.name(),list);
// }
//
// /**
// * 腾讯较真辟谣榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXunVerificationHotSearch(){
// logger.info("{},腾讯较真辟谣榜开始采集", new Date());
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunVerificationList(date);
// logger.info("腾讯较真辟谣榜本轮采集数量:{}",list.size());
// TipsUtils.addHotList(HotSearchType.腾讯较真榜.name(), list);
// logger.info("{},腾讯较真辟谣榜采集结束", new Date());
// }
//
// /**
// * 搜狐话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerSouHuTopic(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SouhuTopicCrawler.getSouhuTopic(date);
// TipsUtils.addHotList(HotSearchType.搜狐话题.name(),list);
// }
//
// /**
// * 知乎热搜话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhihuHotTopic(){
// logger.info("知乎热搜话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(date);
// logger.info("{}, 知乎热搜话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),list);
// logger.info("知乎热搜话题采集结束...");
// }
//
// /**
// * 微博预热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerWeiBoPreheat(){
// logger.info("微博预热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboPreheatSearch(date);
// logger.info("{},微博预热榜此轮采集到的数据量为:{}", new Date(),Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博预热榜.name(),list);
// logger.info("微博预热榜采集结束...");
// }
//
// /**
// * 知乎热搜数码分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDigital(){
// this.crawlerZhiHuChild(DIGITAL);
// }
//
// /**
// * 知乎热搜国际分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuFocus(){
// this.crawlerZhiHuChild(FOCUS);
// }
//
// /**
// * 知乎热搜时事分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDepth(){
// this.crawlerZhiHuChild(DEPTH);
// }
//
// /**
// * maimai采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 0/30 * * * ? ")
// public void crawlerMaiMaiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// int i=0;
// while (list.size()==0 && i<10){
// ZhiWeiTools.sleep(5000L);
// list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// i++;
// logger.info("更新进度:{}",i*100/documentList.size());
// }
// TipsUtils.addHotList(HotSearchType.脉脉热榜.name(),list);
// }
//
// /**
// * B站排行榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
// TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
// }
//
// /**
// * B站热搜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotData() {
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = BililiCrawler.getBiHotData(date);
// TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
// }
//
// /**
// * 微博超话的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0 0/3 * * ? ")
// public void crawlerWeiBoSuperTopic(){
// logger.info("微博超话采集开始........");
// Date date = DateUtils.getMillSecondTime(new Date());
// WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
// List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
// logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// List<Document> data = new ArrayList<>();
// for(WeiboSuperTopic topic : list){
// logger.info("topic::::{}", topic);
// Document doc = new Document();
// doc.put("_id", topic.getId());
// doc.put("name", topic.getTopicName());
// doc.put("rank", topic.getRank());
// doc.put("score_num", topic.getScore());
// doc.put("fensi_num", topic.getFensi());
// doc.put("post_num", topic.getPostNum());
// doc.put("type", topic.getType());
// doc.put("day", topic.getDay());
// doc.put("time", topic.getTime());
// doc.put("url", topic.getUrl());
// data.add(doc);
// }
// weiboTopicDAO.addTopicList(data);
// logger.info("微博话题采集结束........");
// }
//
//
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "0 05 09 * * ? ")
//// public void updateWeiboHistory(){
//// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//// List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
//// int i=0;
//// for (Document document : documentList){
//// document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
//// if(document != null){
//// hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
//// ZhiWeiTools.sleep(500L);
//// }
//// i++;
//// logger.info("更新进度:{}",i*100/documentList.size());
//// }
//// logger.info("更新结束");
//// }
//
// /**
// * 知乎子类采集函数
// * @param type
// */
// private void crawlerZhiHuChild(String type){
// Date date = DateUtils.getMillSecondTime(new Date());
// String name = this.getTypeName(type);
// logger.info("知乎{}话题热榜采集开始...", name);
// List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(type,name,date);
// logger.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(name,list);
// logger.info("知乎{}话题热榜采集结束...", name);
// }
//
// private String getTypeName(String type){
// String name;
// switch (type) {
// case "digital":
// name = "数码";
// break;
// case "focus":
// name = "国际";
// break;
// case "depth":
// name = "时事";
// break;
// default:
// name = "";
// }
// return name;
// }
// /**
// *快手热榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerKuaiShou(){
// logger.info("快手热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> kuaiShouList = KuaiShouHotSearchCrawler.KuaiShouHotSearchCrawler(date);
// logger.info("{}, 快手此轮采集到的数据量为:{}", new Date(), kuaiShouList != null ? kuaiShouList.size() : 0);
// TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
// logger.info("快手热榜采集结束...");
// logger.info("更新结束");
// }
/**
* 知乎子类采集函数
* @param type
*/
private
void
crawlerZhiHuChild
(
String
type
){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
String
name
=
this
.
getTypeName
(
type
);
logger
.
info
(
"知乎{}话题热榜采集开始..."
,
name
);
List
<
HotSearchList
>
list
=
ZhihuChildHotSearchCrawler
.
getZhihuTopicSearch
(
type
,
name
,
date
);
logger
.
info
(
"{}, 知乎{}话题此轮采集到的数据量为:{}"
,
new
Date
(),
name
,
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
name
,
list
);
logger
.
info
(
"知乎{}话题热榜采集结束..."
,
name
);
}
private
String
getTypeName
(
String
type
){
String
name
;
switch
(
type
)
{
case
"digital"
:
name
=
"数码"
;
break
;
case
"focus"
:
name
=
"国际"
;
break
;
case
"depth"
:
name
=
"时事"
;
break
;
default
:
name
=
""
;
}
return
name
;
}
/**
*快手热榜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerKuaiShou
(){
logger
.
info
(
"快手热榜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
kuaiShouList
=
KuaiShouHotSearchCrawler
.
KuaiShouHotSearchCrawler
(
date
);
logger
.
info
(
"{}, 快手此轮采集到的数据量为:{}"
,
new
Date
(),
kuaiShouList
!=
null
?
kuaiShouList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
快手热榜
.
name
(),
kuaiShouList
);
logger
.
info
(
"快手热榜采集结束..."
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment