Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
7e156432
Commit
7e156432
authored
Jul 12, 2021
by
leiliangliang
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增微博话题采集话题贡献者,关于功能
parent
f986b5c8
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
545 additions
and
512 deletions
+545
-512
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+56
-29
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+483
-483
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
View file @
7e156432
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this
.
profileImageUrl
=
profileImageUrl
;
this
.
profileImageUrl
=
profileImageUrl
;
}
}
public
WeiBoUser
(
String
userId
,
String
userName
,
String
topic
,
Date
time
)
{
this
.
userId
=
userId
;
this
.
userName
=
userName
;
this
.
topic
=
topic
;
this
.
time
=
time
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
7e156432
...
@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
...
@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
}
}
/**
/**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于
* 微博热搜数据更新话题贡献者排行,关于
*
* @param document
* @param document
* @return
* @return
*/
*/
public
static
Document
weiboUpdatePC
(
Document
document
)
{
public
static
Document
weiboUpdatePC
(
Document
document
)
{
document
.
getString
(
"name"
);
document
.
getString
(
"name"
);
String
name
=
document
.
getString
(
"name"
);
String
topic
=
document
.
getString
(
"name"
);
String
gb
=
"#"
+
name
+
"#"
;
String
gb
=
"#"
+
topic
+
"#"
;
String
encode
=
null
;
String
encode
=
null
;
try
{
try
{
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
log
.
error
(
"字符解析成URl模式异常"
,
e
);
log
.
error
(
"字符解析成URl模式异常"
,
e
);
}
}
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
//获取贡献者信息
//获取贡献者信息
try
{
try
{
Elements
li
=
documen
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
Elements
cardUser
=
documen
.
select
(
"div.card-user"
);
weiBoUserDao
=
new
WeiBoUserDao
();
for
(
Element
element
:
cardUser
)
{
}
if
(!
element
.
select
(
"div.card-head"
).
text
().
isEmpty
())
{
if
(
Objects
.
nonNull
(
li
))
{
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
Date
date
=
new
Date
();
if
(
Objects
.
nonNull
(
li
))
{
for
(
Element
element
:
li
)
{
//循环获取话题贡献者相关信息
WeiBoUser
weiBoUser
=
new
WeiBoUser
();
for
(
Element
eleme
:
li
)
{
//获取用户名
String
type
=
"话题贡献者"
;
String
userName
=
element
.
select
(
"a.name"
).
text
(
);
writeUser
(
eleme
,
type
,
topic
);
//获取用户id
}
String
attr
=
element
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
}
String
userId
=
attr
.
substring
(
14
);
}
else
{
String
type
=
"话题贡献者"
;
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
)
;
String
id
=
userId
+
"_"
+
type
+
"_"
+
name
;
if
(
Objects
.
nonNull
(
li
))
{
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
//循环获取话题贡献者相关信息
weiBoUser
.
setUserName
(
userName
);
for
(
Element
eleme
:
li
)
{
weiBoUser
.
setUserId
(
userId
)
;
String
type
=
"当事人"
;
weiBoUser
.
setTopic
(
name
);
writeUser
(
eleme
,
type
,
topic
);
weiBoUser
.
setTime
(
date
);
}
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
}
}
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
if
(
Objects
.
nonNull
(
dt
))
{
if
(
Objects
.
nonNull
(
dt
))
{
//获取微博关于的相关信息
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Document
dtDocument
=
new
Document
();
Document
dtDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
}
}
return
docm
;
return
docm
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
}
}
}
}
return
document
;
return
document
;
}
}
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private
static
void
writeUser
(
Element
eleme
,
String
type
,
String
topic
)
{
Date
date
=
new
Date
();
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
weiBoUserDao
=
new
WeiBoUserDao
();
}
//获取用户名
String
userName
=
eleme
.
select
(
"a.name"
).
text
();
String
attr
=
eleme
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
//获取用户id
String
userId
=
attr
.
substring
(
14
);
String
id
=
userId
+
"_"
+
type
+
"_"
+
topic
;
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userName
,
userId
,
topic
,
date
);
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
/**
/**
* 解析微博信息
* 解析微博信息
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
7e156432
...
@@ -29,496 +29,496 @@ import java.util.*;
...
@@ -29,496 +29,496 @@ import java.util.*;
@EnableScheduling
@EnableScheduling
@EnableAsync
@EnableAsync
public
class
GatherTimer
{
public
class
GatherTimer
{
//
// private Logger logger = LoggerFactory.getLogger(GatherTimer.class);
private
Logger
logger
=
LoggerFactory
.
getLogger
(
GatherTimer
.
class
);
//
// private RedisDao redisDao = new RedisDao();
private
RedisDao
redisDao
=
new
RedisDao
();
// /** 知乎数码子分类 */
/** 知乎数码子分类 */
// private String DIGITAL = "digital";
private
String
DIGITAL
=
"digital"
;
// /** 知乎国际子分类 */
/** 知乎国际子分类 */
// private String FOCUS = "focus";
private
String
FOCUS
=
"focus"
;
// /** 知乎时事子分类 */
/** 知乎时事子分类 */
// private String DEPTH = "depth";
private
String
DEPTH
=
"depth"
;
//
//
// /**
/**
// * 虎嗅热文推荐的采集
* 虎嗅热文推荐的采集
// */
*/
// @Async(value = "myScheduler")
@Async
(
value
=
"myScheduler"
)
// @Scheduled(cron = "0 * * * * ?")
@Scheduled
(
cron
=
"0 * * * * ?"
)
// public void crawlerHuXiu() {
public
void
crawlerHuXiu
()
{
// logger.info("虎嗅热文推荐开始采集...");
logger
.
info
(
"虎嗅热文推荐开始采集..."
);
// Date date = DateUtils.getMillSecondTime(new Date());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
// List<HotSearchList> huXiuList = HuXiuHotSearchCrawler.HuXiuHotArticleRecommended(date);
List
<
HotSearchList
>
huXiuList
=
HuXiuHotSearchCrawler
.
HuXiuHotArticleRecommended
(
date
);
// logger.info("{}, 虎嗅热文推荐此轮采集到的数据量为:{}", new Date(), Integer.valueOf(huXiuList != null ? huXiuList.size() : 0));
logger
.
info
(
"{}, 虎嗅热文推荐此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
huXiuList
!=
null
?
huXiuList
.
size
()
:
0
));
// TipsUtils.addHotList(HotSearchType.虎嗅热文推荐.name(), huXiuList);
TipsUtils
.
addHotList
(
HotSearchType
.
虎嗅热文推荐
.
name
(),
huXiuList
);
// logger.info("虎嗅热文推荐采集结束...");
logger
.
info
(
"虎嗅热文推荐采集结束..."
);
//
// /**
/**
// * 36氪人气榜的采集
* 36氪人气榜的采集
// */
*/
// logger.info("36氪人气榜开始采集...");
logger
.
info
(
"36氪人气榜开始采集..."
);
// List<HotSearchList> list36Kr = HotSearch36KrCrawler.hotSearch36Kr(date);
List
<
HotSearchList
>
list36Kr
=
HotSearch36KrCrawler
.
hotSearch36Kr
(
date
);
// logger.info("{}, 36氪人气榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list36Kr != null ? list36Kr.size() : 0));
logger
.
info
(
"{}, 36氪人气榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list36Kr
!=
null
?
list36Kr
.
size
()
:
0
));
// TipsUtils.addHotList(HotSearchType.人气榜36氪.name(), list36Kr);
TipsUtils
.
addHotList
(
HotSearchType
.
人气榜
36
氪
.
name
(),
list36Kr
);
// logger.info("36氪人气榜采集结束...");
logger
.
info
(
"36氪人气榜采集结束..."
);
// }
}
//
// /**
/**
// * 微博热搜的采集
* 微博热搜的采集
// */
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeiBo
(){
logger
.
info
(
"微博热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
weiboList
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
(
date
);
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
weiboList
!=
null
?
weiboList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
微博热搜
.
name
(),
weiboList
);
logger
.
info
(
"微博热搜采集结束..."
);
}
/**
* 微博热搜导语,阅读量,讨论量更新
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"45 0/10 * * * ? "
)
public
void
updateWeiBo
(){
logger
.
info
(
"微博热搜导语更新..."
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
Set
<
String
>
hotSearchIdSet
=
redisDao
.
getRedisSetData
(
RedisConfig
.
WEIBO_HOTSEARCHIDS
);
redisDao
.
removeRedis
(
RedisConfig
.
WEIBO_HOTSEARCHIDS
);
Iterator
<
String
>
hotSearchIterator
=
hotSearchIdSet
.
iterator
();
while
(
hotSearchIterator
.
hasNext
()){
String
id
=
hotSearchIterator
.
next
();
Document
document
=
hotSearchCacheDAO
.
getHotSearchById
(
id
);
if
(
document
!=
null
){
document
=
WeiboHotSearchCrawler
.
weiboUpdate
(
document
);
if
(
document
.
containsKey
(
"topicLead"
)
||
document
.
containsKey
(
"readCount"
)
||
document
.
containsKey
(
"discussCount"
))
{
hotSearchCacheDAO
.
updateWeibo
(
document
,
id
);
}
ZhiWeiTools
.
sleep
(
3000L
);
}
}
logger
.
info
(
"微博热搜导语更新结束..."
);
}
/**
* 今日头条热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerTouTiao
(){
logger
.
info
(
"今日头条热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
toutiaoList
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
date
);
logger
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
logger
.
info
(
"今日头条热搜采集结束..."
);
logger
.
info
(
"今日头条热搜详情趋势阅读量更新..."
);
TouTiaoExecutor
.
countTouTiaoReadCount
(
toutiaoList
);
}
/**
* 百度热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerBaiDu
(){
logger
.
info
(
"百度热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
baiduList
=
BaiDuHotSearchCrawler
.
baiduHotSearch
(
date
);
logger
.
info
(
"{}, 百度热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
baiduList
!=
null
?
baiduList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
百度热搜
.
name
(),
baiduList
);
logger
.
info
(
"百度热搜采集结束..."
);
}
/**
* 抖音热搜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerDouYin
(){
logger
.
info
(
"抖音热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
douyinList
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
(
date
);
logger
.
info
(
"{}, 抖音热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
douyinList
!=
null
?
douyinList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
抖音热搜
.
name
(),
douyinList
);
logger
.
info
(
"抖音热搜采集结束..."
);
}
/**
* 抖音链接的更新
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0/5 * * * ? "
)
public
void
updateDouYinUrl
(){
logger
.
info
(
"抖音链接更新开始..."
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
douyinList
=
DouyinHotSearchCrawler
.
list
;
if
(
douyinList
!=
null
&&
douyinList
.
size
()>
0
){
for
(
int
i
=
0
;
i
<
douyinList
.
size
();
i
++){
String
name
=
douyinList
.
get
(
i
).
getName
();
String
id
=
name
+
"_"
+
douyinList
.
get
(
i
).
getType
();
String
url
=
DouyinHotSearchCrawler
.
getDouyinUrl
(
"https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="
+
name
);
if
(
url
!=
null
)
{
Document
document
=
new
Document
();
document
.
put
(
"id"
,
id
);
document
.
put
(
"url"
,
url
);
hotSearchCacheDAO
.
updateDouyinUrl
(
document
);
}
}
logger
.
info
(
"抖音链接更新结束"
);
}
else
{
logger
.
info
(
"抖音链接更新失败,抖音热搜列表获取为空。"
);
}
}
/**
* 知乎热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerZhihu
(){
logger
.
info
(
"知乎热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
zhihuList
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
(
date
);
logger
.
info
(
"{}, 知乎热搜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
zhihuList
!=
null
?
zhihuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
知乎热搜
.
name
(),
zhihuList
);
logger
.
info
(
"知乎热搜采集结束..."
);
}
/**
* 搜狗微信热词的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeChat
(){
logger
.
info
(
"搜狗微信热词开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
(
date
);
logger
.
info
(
"{}, 搜狗微信热词采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
搜狗微信热搜
.
name
(),
list
);
logger
.
info
(
"搜狗微信热词采集结束..."
);
}
/**
* 搜狗微信热搜的采集(app端采集链接)
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
ceawlerSougouHotData
(){
logger
.
info
(
"搜狗微信热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougouHotDataCrawler
(
date
);
logger
.
info
(
"{}, 搜狗微信热搜此轮采集到的数据量为:{}"
,
new
Date
(),
list
!=
null
?
list
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
搜狗微信客户端热搜
.
name
(),
list
);
logger
.
info
(
"搜狗微信热搜采集结束..."
);
}
/**
* 微博话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerWeiBoTopic
(){
logger
.
info
(
"微博话题开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
(
date
);
logger
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
微博话题
.
name
(),
list
);
logger
.
info
(
"微博话题采集结束..."
);
}
/**
* 腾讯新闻热点的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerTengXun
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
TengXunCrawler
.
getTengXunHotList
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
腾讯新闻
.
name
(),
list
);
}
/**
* 新浪热点的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerXinLangHotSpot
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
新浪热点
.
name
(),
list
);
}
/**
* 新浪热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerXinLangHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
新浪热榜
.
name
(),
list
);
}
/**
* 网易新闻热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerWangYiHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WangYiHotSearchCrawler
.
getWangYiHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
网易热榜
.
name
(),
list
);
}
/**
* 网易新闻跟帖热议的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerWangYiHotComment
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WangYiHotSearchCrawler
.
getWangYicomment
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
网易跟帖热议
.
name
(),
list
);
}
/**
* 凤凰新闻热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"10 * * * * ? "
)
public
void
crawlerFengHuangHotData
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
FengHuangSearchCrawler
.
getFengHuangHotData
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
凤凰新闻热榜
.
name
(),
list
);
}
/**
* 凤凰新闻热搜的采集
*/
// @Async(value = "myScheduler")
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWeiBo(){
public
void
crawlerFengHuangHotSearch
(){
// logger.info("微博热搜开始采集...");
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
// Date date = DateUtils.getMillSecondTime(new Date());
List
<
HotSearchList
>
list
=
FengHuangSearchCrawler
.
getFengHuangHotSearch
(
date
);
// List<HotSearchList> weiboList = WeiboHotSearchCrawler.weiboHotSearchByPhone(date);
TipsUtils
.
addHotList
(
HotSearchType
.
凤凰新闻热搜
.
name
(),
list
);
// logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), weiboList != null ? weiboList.size() : 0);
}
// TipsUtils.addHotList(HotSearchType.微博热搜.name(),weiboList);
// logger.info("微博热搜采集结束...");
/**
// }
* 腾讯较真辟谣榜采集
//
*/
// /**
@Async
(
value
=
"myScheduler"
)
// * 微博热搜导语,阅读量,讨论量更新
@Scheduled
(
cron
=
"10 * * * * ? "
)
// */
public
void
crawlerTengXunVerificationHotSearch
(){
logger
.
info
(
"{},腾讯较真辟谣榜开始采集"
,
new
Date
());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
TengXunCrawler
.
getTengXunVerificationList
(
date
);
logger
.
info
(
"腾讯较真辟谣榜本轮采集数量:{}"
,
list
.
size
());
TipsUtils
.
addHotList
(
HotSearchType
.
腾讯较真榜
.
name
(),
list
);
logger
.
info
(
"{},腾讯较真辟谣榜采集结束"
,
new
Date
());
}
/**
* 搜狐话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerSouHuTopic
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
SouhuTopicCrawler
.
getSouhuTopic
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
搜狐话题
.
name
(),
list
);
}
/**
* 知乎热搜话题的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhihuHotTopic
(){
logger
.
info
(
"知乎热搜话题开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
ZhihuTopicSearchCrawler
.
getZhihuTopicSearch
(
date
);
logger
.
info
(
"{}, 知乎热搜话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
知乎热搜榜单
.
name
(),
list
);
logger
.
info
(
"知乎热搜话题采集结束..."
);
}
/**
* 微博预热榜的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerWeiBoPreheat
(){
logger
.
info
(
"微博预热榜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboPreheatSearch
(
date
);
logger
.
info
(
"{},微博预热榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
HotSearchType
.
微博预热榜
.
name
(),
list
);
logger
.
info
(
"微博预热榜采集结束..."
);
}
/**
* 知乎热搜数码分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuDigital
(){
this
.
crawlerZhiHuChild
(
DIGITAL
);
}
/**
* 知乎热搜国际分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuFocus
(){
this
.
crawlerZhiHuChild
(
FOCUS
);
}
/**
* 知乎热搜时事分类采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"20 * * * * ? "
)
public
void
crawlerZhiHuDepth
(){
this
.
crawlerZhiHuChild
(
DEPTH
);
}
/**
* maimai采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 0/30 * * * ? "
)
public
void
crawlerMaiMaiHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
date
);
int
i
=
0
;
while
(
list
.
size
()==
0
&&
i
<
10
){
ZhiWeiTools
.
sleep
(
5000L
);
list
=
MaiMaiHotSearchCrawler
.
getMaiMaiHotData
(
date
);
i
++;
}
TipsUtils
.
addHotList
(
HotSearchType
.
脉脉热榜
.
name
(),
list
);
}
/**
* B站排行榜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 * * * * ? "
)
public
void
crawlerBilibiliHotSearch
(){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
BililiCrawler
.
getBilibiliHotSearch
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
B
站排行榜
.
name
(),
list
);
}
/**
* B站热搜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"30 * * * * ? "
)
public
void
crawlerBilibiliHotData
()
{
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
list
=
BililiCrawler
.
getBiHotData
(
date
);
TipsUtils
.
addHotList
(
HotSearchType
.
B
站热搜
.
name
(),
list
);
}
/**
* 微博超话的采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 0 0/3 * * ? "
)
public
void
crawlerWeiBoSuperTopic
(){
logger
.
info
(
"微博超话采集开始........"
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
WeiboSuperTopicDAO
weiboTopicDAO
=
new
WeiboSuperTopicDAO
();
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
Document
>
data
=
new
ArrayList
<>();
for
(
WeiboSuperTopic
topic
:
list
){
logger
.
info
(
"topic::::{}"
,
topic
);
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"score_num"
,
topic
.
getScore
());
doc
.
put
(
"fensi_num"
,
topic
.
getFensi
());
doc
.
put
(
"post_num"
,
topic
.
getPostNum
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
data
.
add
(
doc
);
}
weiboTopicDAO
.
addTopicList
(
data
);
logger
.
info
(
"微博话题采集结束........"
);
}
// @Async(value = "myScheduler")
// @Async(value = "myScheduler")
// @Scheduled(cron = "45 0/10 * * * ? ")
// @Scheduled(cron = "0 05 09 * * ? ")
// public void updateWeiBo(){
// public void updateWeiboHistory(){
// logger.info("微博热搜导语更新...");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// Set<String> hotSearchIdSet = redisDao.getRedisSetData(RedisConfig.WEIBO_HOTSEARCHIDS);
// List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
// redisDao.removeRedis(RedisConfig.WEIBO_HOTSEARCHIDS);
// int i=0;
// Iterator<String> hotSearchIterator = hotSearchIdSet.iterator();
// for (Document document : documentList){
// while (hotSearchIterator.hasNext()){
// document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
// String id = hotSearchIterator.next();
// Document document = hotSearchCacheDAO.getHotSearchById(id);
// if(document != null){
// if(document != null){
// document = WeiboHotSearchCrawler.weiboUpdate(document);
// hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
// if(document.containsKey("topicLead") || document.containsKey("readCount") || document.containsKey("discussCount")) {
// ZhiWeiTools.sleep(500L);
// hotSearchCacheDAO.updateWeibo(document, id);
// }
// ZhiWeiTools.sleep(3000L);
// }
// }
// logger.info("微博热搜导语更新结束...");
// }
//
// /**
// * 今日头条热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerTouTiao(){
// logger.info("今日头条热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date);
// logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
// TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
// logger.info("今日头条热搜采集结束...");
// logger.info("今日头条热搜详情趋势阅读量更新...");
// TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// }
//
// /**
// * 百度热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerBaiDu(){
// logger.info("百度热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(date);
// logger.info("{}, 百度热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
// TipsUtils.addHotList(HotSearchType.百度热搜.name(),baiduList);
// logger.info("百度热搜采集结束...");
// }
//
// /**
// * 抖音热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerDouYin(){
// logger.info("抖音热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.getMobileDouyinHotList(date);
// logger.info("{}, 抖音热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(douyinList != null ? douyinList.size() : 0));
// TipsUtils.addHotList(HotSearchType.抖音热搜.name(),douyinList);
// logger.info("抖音热搜采集结束...");
// }
//
// /**
// * 抖音链接的更新
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0/5 * * * ? ")
// public void updateDouYinUrl(){
// logger.info("抖音链接更新开始...");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.list;
// if(douyinList!=null && douyinList.size()>0){
// for(int i=0; i<douyinList.size(); i++){
// String name = douyinList.get(i).getName();
// String id = name+"_"+douyinList.get(i).getType();
// String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
// if(url != null) {
// Document document = new Document();
// document.put("id", id);
// document.put("url", url);
// hotSearchCacheDAO.updateDouyinUrl(document);
// }
// }
// }
// logger.info("抖音链接更新结束");
// }else{
// logger.info("抖音链接更新失败,抖音热搜列表获取为空。");
// }
// }
//
// /**
// * 知乎热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerZhihu(){
// logger.info("知乎热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(date);
// logger.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜.name(),zhihuList);
// logger.info("知乎热搜采集结束...");
// }
//
// /**
// * 搜狗微信热词的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeChat(){
// logger.info("搜狗微信热词开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(date);
// logger.info("{}, 搜狗微信热词采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.搜狗微信热搜.name(),list);
// logger.info("搜狗微信热词采集结束...");
// }
//
// /**
// * 搜狗微信热搜的采集(app端采集链接)
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void ceawlerSougouHotData(){
// logger.info("搜狗微信热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougouHotDataCrawler(date);
// logger.info("{}, 搜狗微信热搜此轮采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
// TipsUtils.addHotList(HotSearchType.搜狗微信客户端热搜.name(),list);
// logger.info("搜狗微信热搜采集结束...");
// }
//
// /**
// * 微博话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeiBoTopic(){
// logger.info("微博话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(date);
// logger.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博话题.name(),list);
// logger.info("微博话题采集结束...");
// }
//
// /**
// * 腾讯新闻热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXun(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunHotList(date);
// TipsUtils.addHotList(HotSearchType.腾讯新闻.name(),list);
// }
//
// /**
// * 新浪热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSpot(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSpot(date);
// TipsUtils.addHotList(HotSearchType.新浪热点.name(),list);
// }
//
// /**
// * 新浪热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.新浪热榜.name(),list);
// }
//
// /**
// * 网易新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYiHotSearch(date);
// TipsUtils.addHotList(HotSearchType.网易热榜.name(),list);
// }
//
// /**
// * 网易新闻跟帖热议的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotComment(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYicomment(date);
// TipsUtils.addHotList(HotSearchType.网易跟帖热议.name(),list);
// }
//
// /**
// * 凤凰新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotData(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotData(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热榜.name(),list);
// }
//
// /**
// * 凤凰新闻热搜的采集
// */
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热搜.name(),list);
// }
//
// /**
// * 腾讯较真辟谣榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXunVerificationHotSearch(){
// logger.info("{},腾讯较真辟谣榜开始采集", new Date());
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunVerificationList(date);
// logger.info("腾讯较真辟谣榜本轮采集数量:{}",list.size());
// TipsUtils.addHotList(HotSearchType.腾讯较真榜.name(), list);
// logger.info("{},腾讯较真辟谣榜采集结束", new Date());
// }
//
// /**
// * 搜狐话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerSouHuTopic(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SouhuTopicCrawler.getSouhuTopic(date);
// TipsUtils.addHotList(HotSearchType.搜狐话题.name(),list);
// }
//
// /**
// * 知乎热搜话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhihuHotTopic(){
// logger.info("知乎热搜话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(date);
// logger.info("{}, 知乎热搜话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),list);
// logger.info("知乎热搜话题采集结束...");
// }
//
// /**
// * 微博预热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerWeiBoPreheat(){
// logger.info("微博预热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboPreheatSearch(date);
// logger.info("{},微博预热榜此轮采集到的数据量为:{}", new Date(),Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博预热榜.name(),list);
// logger.info("微博预热榜采集结束...");
// }
//
// /**
// * 知乎热搜数码分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDigital(){
// this.crawlerZhiHuChild(DIGITAL);
// }
//
// /**
// * 知乎热搜国际分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuFocus(){
// this.crawlerZhiHuChild(FOCUS);
// }
//
// /**
// * 知乎热搜时事分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDepth(){
// this.crawlerZhiHuChild(DEPTH);
// }
//
// /**
// * maimai采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 0/30 * * * ? ")
// public void crawlerMaiMaiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// int i=0;
// while (list.size()==0 && i<10){
// ZhiWeiTools.sleep(5000L);
// list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// i++;
// i++;
// logger.info("更新进度:{}",i*100/documentList.size());
// }
// }
// TipsUtils.addHotList(HotSearchType.脉脉热榜.name(),list);
// logger.info("更新结束");
// }
//
// /**
// * B站排行榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
// TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
// }
//
// /**
// * B站热搜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotData() {
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = BililiCrawler.getBiHotData(date);
// TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
// }
//
// /**
// * 微博超话的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0 0/3 * * ? ")
// public void crawlerWeiBoSuperTopic(){
// logger.info("微博超话采集开始........");
// Date date = DateUtils.getMillSecondTime(new Date());
// WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
// List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
// logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// List<Document> data = new ArrayList<>();
// for(WeiboSuperTopic topic : list){
// logger.info("topic::::{}", topic);
// Document doc = new Document();
// doc.put("_id", topic.getId());
// doc.put("name", topic.getTopicName());
// doc.put("rank", topic.getRank());
// doc.put("score_num", topic.getScore());
// doc.put("fensi_num", topic.getFensi());
// doc.put("post_num", topic.getPostNum());
// doc.put("type", topic.getType());
// doc.put("day", topic.getDay());
// doc.put("time", topic.getTime());
// doc.put("url", topic.getUrl());
// data.add(doc);
// }
// weiboTopicDAO.addTopicList(data);
// logger.info("微博话题采集结束........");
// }
//
//
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "0 05 09 * * ? ")
//// public void updateWeiboHistory(){
//// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//// List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
//// int i=0;
//// for (Document document : documentList){
//// document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
//// if(document != null){
//// hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
//// ZhiWeiTools.sleep(500L);
//// }
//// i++;
//// logger.info("更新进度:{}",i*100/documentList.size());
//// }
//// logger.info("更新结束");
//// }
//
// /**
// * 知乎子类采集函数
// * @param type
// */
// private void crawlerZhiHuChild(String type){
// Date date = DateUtils.getMillSecondTime(new Date());
// String name = this.getTypeName(type);
// logger.info("知乎{}话题热榜采集开始...", name);
// List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(type,name,date);
// logger.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(name,list);
// logger.info("知乎{}话题热榜采集结束...", name);
// }
//
// private String getTypeName(String type){
// String name;
// switch (type) {
// case "digital":
// name = "数码";
// break;
// case "focus":
// name = "国际";
// break;
// case "depth":
// name = "时事";
// break;
// default:
// name = "";
// }
// return name;
// }
// /**
// *快手热榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerKuaiShou(){
// logger.info("快手热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> kuaiShouList = KuaiShouHotSearchCrawler.KuaiShouHotSearchCrawler(date);
// logger.info("{}, 快手此轮采集到的数据量为:{}", new Date(), kuaiShouList != null ? kuaiShouList.size() : 0);
// TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
// logger.info("快手热榜采集结束...");
// }
// }
/**
* 知乎子类采集函数
* @param type
*/
private
void
crawlerZhiHuChild
(
String
type
){
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
String
name
=
this
.
getTypeName
(
type
);
logger
.
info
(
"知乎{}话题热榜采集开始..."
,
name
);
List
<
HotSearchList
>
list
=
ZhihuChildHotSearchCrawler
.
getZhihuTopicSearch
(
type
,
name
,
date
);
logger
.
info
(
"{}, 知乎{}话题此轮采集到的数据量为:{}"
,
new
Date
(),
name
,
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
TipsUtils
.
addHotList
(
name
,
list
);
logger
.
info
(
"知乎{}话题热榜采集结束..."
,
name
);
}
private
String
getTypeName
(
String
type
){
String
name
;
switch
(
type
)
{
case
"digital"
:
name
=
"数码"
;
break
;
case
"focus"
:
name
=
"国际"
;
break
;
case
"depth"
:
name
=
"时事"
;
break
;
default
:
name
=
""
;
}
return
name
;
}
/**
*快手热榜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerKuaiShou
(){
logger
.
info
(
"快手热榜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
kuaiShouList
=
KuaiShouHotSearchCrawler
.
KuaiShouHotSearchCrawler
(
date
);
logger
.
info
(
"{}, 快手此轮采集到的数据量为:{}"
,
new
Date
(),
kuaiShouList
!=
null
?
kuaiShouList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
快手热榜
.
name
(),
kuaiShouList
);
logger
.
info
(
"快手热榜采集结束..."
);
}
}
}
chenweitao
@chenweitao
mentioned in commit
37d43810
Jul 12, 2021
mentioned in commit
37d43810
mentioned in commit 37d43810c159c654a5fafab5c08695a669389c77
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment