Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
811c679b
Commit
811c679b
authored
Oct 31, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改定时器启动方式
parent
89981f4d
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
602 additions
and
581 deletions
+602
-581
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+164
-164
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+59
-39
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+65
-64
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+66
-66
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+58
-58
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+64
-64
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+63
-63
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+63
-63
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.mail.SendMailWeibo
;
import
com.zhiwei.searchhotcrawler.mail.SendMailWeibo
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
/**
/**
* @ClassName: WeiboHotSearch
* @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集)
* @Description: TODO(微博实时热搜采集)
* @author hero
* @author hero
* @date 2017年9月15日 上午10:54:31
* @date 2017年9月15日 上午10:54:31
*/
*/
public
class
WeiboHotSearchCrawler
{
public
class
WeiboHotSearchCrawler
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboHotSearchCrawler
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboHotSearchCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
/**
* @Title: weiboHotSearchTest
* @Title: weiboHotSearchTest
* @author hero
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @Description: TODO(PC端微博热搜采集)
* @param 设定文件
* @param 设定文件
* @return void 返回类型
* @return void 返回类型
*/
*/
public
static
List
<
HotSearchList
>
weiboHotSearch
(){
public
static
List
<
HotSearchList
>
weiboHotSearch
(){
String
url
=
"https://s.weibo.com/top/summary?cate=realtimehot"
;
String
url
=
"https://s.weibo.com/top/summary?cate=realtimehot"
;
List
<
HotSearchList
>
list
=
new
ArrayList
<
HotSearchList
>();
List
<
HotSearchList
>
list
=
new
ArrayList
<
HotSearchList
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
{
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"pl_top_realtimehot"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"pl_top_realtimehot"
)){
try
{
try
{
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", "");
// script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script);
// JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html");
// String html = json.getString("html");
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Elements
elements
=
document
.
select
(
"div#pl_top_realtimehot"
).
select
(
"tbody"
).
select
(
"tr"
);
Elements
elements
=
document
.
select
(
"div#pl_top_realtimehot"
).
select
(
"tbody"
).
select
(
"tr"
);
for
(
Element
element
:
elements
){
for
(
Element
element
:
elements
){
try
{
try
{
String
id
=
"http://s.weibo.com"
+
element
.
select
(
"td.td-02"
).
select
(
"a"
).
attr
(
"href"
);
String
id
=
"http://s.weibo.com"
+
element
.
select
(
"td.td-02"
).
select
(
"a"
).
attr
(
"href"
);
String
name
=
element
.
select
(
"td.td-02"
).
select
(
"a"
).
text
();
String
name
=
element
.
select
(
"td.td-02"
).
select
(
"a"
).
text
();
String
num
=
!
element
.
select
(
"td.td-02"
).
select
(
"span"
).
text
().
equals
(
""
)?
element
.
select
(
"td.td-02"
).
select
(
"span"
).
text
():
"0"
;
String
num
=
!
element
.
select
(
"td.td-02"
).
select
(
"span"
).
text
().
equals
(
""
)?
element
.
select
(
"td.td-02"
).
select
(
"span"
).
text
():
"0"
;
String
rank
=
!
element
.
select
(
"td[class=\"td-01 ranktop\"]"
).
text
().
equals
(
""
)?
element
.
select
(
"td[class=\"td-01 ranktop\"]"
).
text
():
"-1"
;
String
rank
=
!
element
.
select
(
"td[class=\"td-01 ranktop\"]"
).
text
().
equals
(
""
)?
element
.
select
(
"td[class=\"td-01 ranktop\"]"
).
text
():
"-1"
;
int
hotCount
=
Integer
.
valueOf
(
num
);
int
hotCount
=
Integer
.
valueOf
(
num
);
int
rankCount
=
Integer
.
valueOf
(
rank
);
int
rankCount
=
Integer
.
valueOf
(
rank
);
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
true
,
rankCount
,
HotSearchType
.
微博热搜
.
name
());
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
true
,
rankCount
,
HotSearchType
.
微博热搜
.
name
());
list
.
add
(
hotSearch
);
list
.
add
(
hotSearch
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
continue
;
continue
;
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
return
null
;
return
null
;
}
}
}
else
{
}
else
{
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
}
}
break
;
break
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
if
(
i
==
2
){
if
(
i
==
2
){
return
list
;
return
list
;
}
else
{
}
else
{
continue
;
continue
;
}
}
}
}
}
}
return
list
;
return
list
;
}
}
/**
/**
* @Title: weiboHotSearchByPhoneTest
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @Description: TODO(手机端Iphone 微博热搜采集)
* @param 设定文件
* @param 设定文件
* @return void 返回类型
* @return void 返回类型
*/
*/
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
String
htmlBody
;
String
htmlBody
;
try
{
try
{
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"cards"
)){
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"cards"
)){
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
JSONArray
cards
=
json
.
getJSONArray
(
"cards"
);
JSONArray
cards
=
json
.
getJSONArray
(
"cards"
);
int
rank
=
1
;
int
rank
=
1
;
for
(
int
i
=
0
;
i
<
cards
.
size
();
i
++){
for
(
int
i
=
0
;
i
<
cards
.
size
();
i
++){
try
{
try
{
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONArray
cardGroup
=
card
.
getJSONArray
(
"card_group"
);
JSONArray
cardGroup
=
card
.
getJSONArray
(
"card_group"
);
String
title
=
card
.
getString
(
"title"
);
String
title
=
card
.
getString
(
"title"
);
boolean
hot
=
true
;
boolean
hot
=
true
;
if
(
title
.
contains
(
"实时上升热点"
)){
if
(
title
.
contains
(
"实时上升热点"
)){
hot
=
false
;
hot
=
false
;
rank
=
50
;
rank
=
50
;
}
}
for
(
int
j
=
0
;
j
<
cardGroup
.
size
();
j
++){
for
(
int
j
=
0
;
j
<
cardGroup
.
size
();
j
++){
JSONObject
cardInfo
=
cardGroup
.
getJSONObject
(
j
);
JSONObject
cardInfo
=
cardGroup
.
getJSONObject
(
j
);
String
name
=
cardInfo
.
getString
(
"desc"
);
String
name
=
cardInfo
.
getString
(
"desc"
);
int
hotCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
int
hotCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
String
id
=
"http://s.weibo.com/weibo/"
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&Refer=top"
;
String
id
=
"http://s.weibo.com/weibo/"
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&Refer=top"
;
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
hot
,
rank
,
HotSearchType
.
微博热搜
.
name
());
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
hot
,
rank
,
HotSearchType
.
微博热搜
.
name
());
logger
.
info
(
"采集到的数据:::{}"
,
hotSearch
);
logger
.
info
(
"采集到的数据:::{}"
,
hotSearch
);
result
.
add
(
hotSearch
);
result
.
add
(
hotSearch
);
rank
++;
rank
++;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
continue
;
continue
;
}
}
}
}
return
result
;
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
);
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
}
else
{
}
else
{
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
}
}
}
catch
(
IOException
e1
)
{
}
catch
(
IOException
e1
)
{
logger
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
);
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
run
;
package
com
.
zhiwei
.
searchhotcrawler
.
run
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun
;
import
com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboTopicRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboTopicRun
;
import
com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
java.util.concurrent.Executors
;
public
class
HotSearchRun
{
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
ProxyConfig
.
registry
,
ProxyConfig
.
group
,
GroupType
.
PROVIDER
,
10000013
);
public
class
HotSearchRun
{
new
UpdateWechatUserRun
().
start
();
public
static
void
main
(
String
[]
args
)
{
ZhiWeiTools
.
sleep
(
10000
);
new
CacheListener
().
startListen
();
ProxyFactory
.
init
(
ProxyConfig
.
registry
,
ProxyConfig
.
group
,
GroupType
.
PROVIDER
,
10000013
);
//采集程序启动
new
WeiboHotSearchRun
().
start
();
new
UpdateWechatUserRun
().
start
();
new
BaiduHotSearchRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
new
SougoHotSearchRun
().
start
();
new
CacheListener
().
startListen
();
new
DouyinHotSearchRun
().
start
();
new
ZhihuHotSearchRun
().
start
();
ScheduledExecutorService
scheduledThreadPool
=
Executors
.
newScheduledThreadPool
(
6
);
new
WeiboTopicRun
().
start
();
//推送程序启动
scheduledThreadPool
.
scheduleAtFixedRate
(
new
WeiboHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
new
SendWeiboHotSearchRun
().
start
();
new
SendZhihuHotSearchRun
().
start
();
scheduledThreadPool
.
scheduleAtFixedRate
(
new
BaiduHotSearchRun
(),
0
,
5
,
TimeUnit
.
MINUTES
);
}
}
scheduledThreadPool
.
scheduleAtFixedRate
(
new
SougoHotSearchRun
(),
0
,
5
,
TimeUnit
.
MINUTES
);
scheduledThreadPool
.
scheduleAtFixedRate
(
new
DouyinHotSearchRun
(),
0
,
10
,
TimeUnit
.
MINUTES
);
scheduledThreadPool
.
scheduleAtFixedRate
(
new
ZhihuHotSearchRun
(),
0
,
10
,
TimeUnit
.
MINUTES
);
scheduledThreadPool
.
scheduleAtFixedRate
(
new
WeiboTopicRun
(),
0
,
1
,
TimeUnit
.
DAYS
);
//采集程序启动
// new WeiboHotSearchRun().start();
// new BaiduHotSearchRun().start();
// new SougoHotSearchRun().start();
// new DouyinHotSearchRun().start();
// new ZhihuHotSearchRun().start();
// new WeiboTopicRun().start();
//推送程序启动
new
SendWeiboHotSearchRun
().
start
();
new
SendZhihuHotSearchRun
().
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
BaiduHotSearchRun
extends
Thread
{
public
class
BaiduHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiduHotSearchRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiduHotSearchRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
5
);
// TimeUnit.MINUTES.sleep(5);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
// ZhiWeiTools.sleep(60*60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
private
void
getHotList
()
{
private
void
getHotList
()
{
logger
.
info
(
"百度风云榜采集开始........"
);
logger
.
info
(
"百度风云榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
logger
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
saveDataList
=
new
ArrayList
<>();
List
<
DBObject
>
saveDataList
=
new
ArrayList
<>();
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
list
.
forEach
(
baiduHotSearch
->{
list
.
forEach
(
baiduHotSearch
->{
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
baiduHotSearch
);
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
baiduHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"name"
,
baiduHotSearch
.
getName
());
doc
.
put
(
"name"
,
baiduHotSearch
.
getName
());
doc
.
put
(
"url"
,
baiduHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
baiduHotSearch
.
getUrl
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getType
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getType
());
saveDataList
.
add
(
doc
);
saveDataList
.
add
(
doc
);
});
});
}
}
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
logger
.
info
(
"百度风云榜采集结束........"
);
logger
.
info
(
"百度风云榜采集结束........"
);
}
}
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
DouyinHotSearchRun
extends
Thread
{
public
class
DouyinHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotSearchRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotSearchRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
10
);
// TimeUnit.MINUTES.sleep(10);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
// ZhiWeiTools.sleep(60*60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
/**
/**
* 获取热搜列表
* 获取热搜列表
* TODO
* TODO
* @return void
* @return void
*/
*/
private
void
getHotList
()
{
private
void
getHotList
()
{
logger
.
info
(
"抖音热搜榜采集开始........"
);
logger
.
info
(
"抖音热搜榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
logger
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
douyinHotSearch
:
list
){
for
(
HotSearchList
douyinHotSearch
:
list
){
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
douyinHotSearch
);
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
douyinHotSearch
);
DBObject
douyin
=
new
BasicDBObject
();
DBObject
douyin
=
new
BasicDBObject
();
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"type"
,
douyinHotSearch
.
getType
());
douyin
.
put
(
"type"
,
douyinHotSearch
.
getType
());
data
.
add
(
douyin
);
data
.
add
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
}
}
logger
.
info
(
"抖音热搜榜采集结束........"
);
logger
.
info
(
"抖音热搜榜采集结束........"
);
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SougoHotSearchRun
extends
Thread
{
public
class
SougoHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
5
);
// TimeUnit.MINUTES.sleep(5);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
// ZhiWeiTools.sleep(60*60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
private
void
getHotList
()
{
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
logger
.
info
(
"搜狗微信采集开始........"
);
logger
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
sougoHotSearch
:
list
){
for
(
HotSearchList
sougoHotSearch
:
list
){
DBObject
doc
=
new
BasicDBObject
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"name"
,
sougoHotSearch
.
getName
());
doc
.
put
(
"name"
,
sougoHotSearch
.
getName
());
doc
.
put
(
"url"
,
sougoHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
sougoHotSearch
.
getUrl
());
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getType
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getType
());
data
.
add
(
doc
);
data
.
add
(
doc
);
}
}
hotSearchDAO
.
addHotSearchList
(
data
);
hotSearchDAO
.
addHotSearchList
(
data
);
logger
.
info
(
"搜狗微信采集结束........"
);
logger
.
info
(
"搜狗微信采集结束........"
);
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
WeiboHotSearchRun
extends
Thread
{
public
class
WeiboHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboHotSearchRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboHotSearchRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
1
);
// TimeUnit.MINUTES.sleep(1);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
1000
);
// ZhiWeiTools.sleep(60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
private
void
getHotList
()
{
private
void
getHotList
()
{
logger
.
info
(
"微博话题采集开始........"
);
logger
.
info
(
"微博话题采集开始........"
);
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
weiboHotSearch
:
list
){
for
(
HotSearchList
weiboHotSearch
:
list
){
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
doc
.
put
(
"name"
,
weiboHotSearch
.
getName
());
doc
.
put
(
"name"
,
weiboHotSearch
.
getName
());
doc
.
put
(
"url"
,
weiboHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
weiboHotSearch
.
getUrl
());
doc
.
put
(
"count"
,
weiboHotSearch
.
getCount
());
doc
.
put
(
"count"
,
weiboHotSearch
.
getCount
());
doc
.
put
(
"hot"
,
weiboHotSearch
.
isHot
());
doc
.
put
(
"hot"
,
weiboHotSearch
.
isHot
());
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getType
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getType
());
data
.
add
(
doc
);
data
.
add
(
doc
);
}
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
weiboHotSearchDAO
.
addHotSearchList
(
data
);
logger
.
info
(
"微博话题采集结束........"
);
logger
.
info
(
"微博话题采集结束........"
);
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.WeiboTopic
;
import
com.zhiwei.searchhotcrawler.bean.WeiboTopic
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler
;
import
com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO
;
import
com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
WeiboTopicRun
extends
Thread
{
public
class
WeiboTopicRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboTopicRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboTopicRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getTopicList
();
getTopicList
();
TimeUnit
.
DAYS
.
sleep
(
1
);
// TimeUnit.DAYS.sleep(1);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
// ZhiWeiTools.sleep(60*60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
private
void
getTopicList
()
{
private
void
getTopicList
()
{
WeiboTopicDAO
weiboTopicDAO
=
new
WeiboTopicDAO
();
WeiboTopicDAO
weiboTopicDAO
=
new
WeiboTopicDAO
();
logger
.
info
(
"微博超话采集开始........"
);
logger
.
info
(
"微博超话采集开始........"
);
List
<
WeiboTopic
>
list
=
WeiboHuatiCrawler
.
startCrawler
();
List
<
WeiboTopic
>
list
=
WeiboHuatiCrawler
.
startCrawler
();
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
WeiboTopic
topic
:
list
){
for
(
WeiboTopic
topic
:
list
){
logger
.
info
(
"topic::::{}"
,
topic
);
logger
.
info
(
"topic::::{}"
,
topic
);
DBObject
doc
=
new
BasicDBObject
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"score_num"
,
topic
.
getScore
());
doc
.
put
(
"score_num"
,
topic
.
getScore
());
doc
.
put
(
"fensi_num"
,
topic
.
getFensi
());
doc
.
put
(
"fensi_num"
,
topic
.
getFensi
());
doc
.
put
(
"post_num"
,
topic
.
getPostNum
());
doc
.
put
(
"post_num"
,
topic
.
getPostNum
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"type"
,
topic
.
getType
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"day"
,
topic
.
getDay
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"time"
,
topic
.
getTime
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
data
.
add
(
doc
);
data
.
add
(
doc
);
}
}
weiboTopicDAO
.
addTopicList
(
data
);
weiboTopicDAO
.
addTopicList
(
data
);
logger
.
info
(
"微博话题采集结束........"
);
logger
.
info
(
"微博话题采集结束........"
);
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
811c679b
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
ZhihuHotSearchRun
extends
Thread
{
public
class
ZhihuHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
ZhihuHotSearchRun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
ZhihuHotSearchRun
.
class
);
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
// boolean f = true;
while
(
f
)
{
// while(f) {
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
10
);
// TimeUnit.MINUTES.sleep(10);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
// ZhiWeiTools.sleep(60*60*1000);
}
}
ZhiWeiTools
.
sleep
(
50
);
// ZhiWeiTools.sleep(50);
}
// }
}
}
private
void
getHotList
()
{
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
logger
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
logger
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
list
.
addAll
(
mobilelist
);
logger
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
for
(
HotSearchList
zhihuHotSearch
:
list
){
for
(
HotSearchList
zhihuHotSearch
:
list
){
DBObject
zhihu
=
new
BasicDBObject
();
DBObject
zhihu
=
new
BasicDBObject
();
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
zhihu
.
put
(
"count"
,
zhihuHotSearch
.
getCount
());
zhihu
.
put
(
"count"
,
zhihuHotSearch
.
getCount
());
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
isHot
());
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
isHot
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getType
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getType
());
hotSearchDAO
.
addHotSearch
(
zhihu
);
hotSearchDAO
.
addHotSearch
(
zhihu
);
}
}
logger
.
info
(
"知乎话题采集结束........"
);
logger
.
info
(
"知乎话题采集结束........"
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment