Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
c6957bee
Commit
c6957bee
authored
Aug 07, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新浪,搜狐,凤凰采集
parent
5dbdbf3f
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
276 additions
and
27 deletions
+276
-27
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
+5
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
+63
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
+61
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
+2
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
+117
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
+2
-20
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
+26
-6
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
View file @
c6957bee
...
@@ -9,5 +9,9 @@ public enum HotSearchType {
...
@@ -9,5 +9,9 @@ public enum HotSearchType {
微博话题
,
微博话题
,
今日头条热搜
,
今日头条热搜
,
知乎热搜榜单
,
知乎热搜榜单
,
腾讯新闻
腾讯新闻
,
新浪热榜
,
新浪热点
,
搜狐话题
,
凤凰新闻热榜
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/FengHuangSearchCrawler.java
0 → 100644
View file @
c6957bee
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
@Log4j2
public
class
FengHuangSearchCrawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
throwException
(
false
).
retryTimes
(
3
).
build
();
/**
* 获取凤凰新闻热榜
* @return
*/
public
static
List
<
HotSearchList
>
getFengHuangHotList
(){
log
.
info
(
"凤凰新闻热榜开始采集"
);
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
for
(
int
page
=
1
;
page
<=
2
;
page
++)
{
String
url
=
"https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="
+
page
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"凤凰新闻热榜页面连接异常..."
,
e
);
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
)){
JSONArray
jsonArray
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
).
getJSONArray
(
"list"
);
if
(
jsonArray
!=
null
){
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++){
Integer
rank
=
i
+
1
;
String
name
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"hotLabel"
).
getString
(
"desp"
);
String
topicLead
=
jsonArray
.
getJSONObject
(
i
).
getString
(
"title"
);
String
fenghuangUrl
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"link"
).
getString
(
"weburl"
);
String
hotValue
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"hotLabel"
).
getString
(
"hotGrade"
);
Integer
count
=
TipsUtils
.
getHotCount
(
hotValue
);
Integer
commentCount
=
jsonArray
.
getJSONObject
(
i
).
getIntValue
(
"commentsall"
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
fenghuangUrl
,
name
,
count
,
rank
,
HotSearchType
.
凤凰新闻热榜
.
name
(),
commentCount
,
topicLead
);
list
.
add
(
hotSearchList
);
}
}
}
}
log
.
info
(
"{}, 此轮凤凰新闻热榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"凤凰新闻热榜采集结束"
);
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/SouhuTopicCrawler.java
0 → 100644
View file @
c6957bee
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
@Log4j2
public
class
SouhuTopicCrawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
public
static
List
<
HotSearchList
>
getSouhuTopic
(){
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
log
.
info
(
"搜狐话题榜开始采集..."
);
JSONArray
dataJson
=
null
;
String
htmlBody
=
null
;
String
url
=
"https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++){
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"搜狐话题页面连接失败"
,
e
);
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
)){
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
dataJson
=
jsonObject
.
getJSONObject
(
"topicList"
).
getJSONArray
(
"datas"
);
if
(
dataJson
!=
null
)
{
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++){
Integer
rank
=
i
+
1
;
String
name
=
dataJson
.
getJSONObject
(
i
).
getJSONObject
(
"eventNewsInfo"
).
getString
(
"title"
);
String
hotValue
=
dataJson
.
getJSONObject
(
i
).
getString
(
"value"
);
Integer
count
=
TipsUtils
.
getHotCount
(
hotValue
.
substring
(
0
,
hotValue
.
indexOf
(
"观点"
)));
String
souguUrl
=
dataJson
.
getJSONObject
(
i
).
getJSONObject
(
"eventNewsInfo"
).
getString
(
"h5Link"
);
String
icon
=
dataJson
.
getJSONObject
(
i
).
getJSONObject
(
"attrInfo"
).
getString
(
"displayText"
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
souguUrl
,
name
,
count
,
true
,
rank
,
HotSearchType
.
搜狐话题
.
name
(),
icon
);
hotSearchLists
.
add
(
hotSearchList
);
}
log
.
info
(
"{}, 此轮搜狐话题榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
hotSearchLists
!=
null
?
hotSearchLists
.
size
()
:
0
));
log
.
info
(
"搜狐话题榜采集结束"
);
return
hotSearchLists
;
}
}
ZhiWeiTools
.
sleep
(
3000L
);
}
return
hotSearchLists
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/TengXunCrawler.java
View file @
c6957bee
...
@@ -43,6 +43,7 @@ public class TengXunCrawler {
...
@@ -43,6 +43,7 @@ public class TengXunCrawler {
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"idlist"
))
{
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"idlist"
))
{
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
dataJson
=
topSearch
.
getJSONArray
(
"idlist"
).
getJSONObject
(
0
).
getJSONArray
(
"newslist"
);
dataJson
=
topSearch
.
getJSONArray
(
"idlist"
).
getJSONObject
(
0
).
getJSONArray
(
"newslist"
);
if
(
dataJson
!=
null
)
{
for
(
int
i
=
1
;
i
<
dataJson
.
size
();
i
++)
{
for
(
int
i
=
1
;
i
<
dataJson
.
size
();
i
++)
{
Integer
rank
=
i
;
Integer
rank
=
i
;
String
name
=
dataJson
.
getJSONObject
(
i
).
getString
(
"title"
);
String
name
=
dataJson
.
getJSONObject
(
i
).
getString
(
"title"
);
...
@@ -71,6 +72,7 @@ public class TengXunCrawler {
...
@@ -71,6 +72,7 @@ public class TengXunCrawler {
list
.
add
(
hotSearchList
);
list
.
add
(
hotSearchList
);
}
}
}
}
}
ZhiWeiTools
.
sleep
(
3000L
);
ZhiWeiTools
.
sleep
(
3000L
);
}
}
log
.
info
(
"{}, 此轮腾讯新闻热榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 此轮腾讯新闻热榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/XinLangHotSearchCrawler.java
0 → 100644
View file @
c6957bee
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.select.Elements
;
import
java.io.IOException
;
import
java.util.*
;
@Log4j2
public
class
XinLangHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
* 新浪热榜的采集
* @return
*/
public
static
List
<
HotSearchList
>
getXinLangHotSearch
(){
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
log
.
info
(
"新浪热榜开始采集"
);
String
url
=
"https://sinanews.sina.cn/h5/top_news_list.d.html"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
JSONObject
jsonObject
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
jsonObject
==
null
;
t
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"新浪热榜页面连接异常..."
,
e
);
}
if
(
htmlBody
!=
null
)
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
String
html
=
document
.
getElementsByTag
(
"script"
).
last
().
html
();
jsonObject
=
JSONObject
.
parseObject
(
html
.
substring
(
html
.
indexOf
(
"{"
),
html
.
length
()
-
1
));
// log.info(jsonObject);
JSONArray
jsonArray
=
jsonObject
.
getJSONObject
(
"data"
).
getJSONObject
(
"data"
).
getJSONArray
(
"result"
);
if
(
jsonArray
!=
null
)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
String
name
=
jsonArray
.
getJSONObject
(
i
).
getString
(
"text"
);
Integer
rank
=
i
+
1
;
String
hotValue
=
jsonArray
.
getJSONObject
(
i
).
getString
(
"hotValue"
);
Integer
count
=
TipsUtils
.
getHotCount
(
hotValue
);
String
showTags
=
jsonArray
.
getJSONObject
(
i
).
getString
(
"showTags"
);
String
icon
=
null
;
if
(
showTags
.
contains
(
"新"
))
{
icon
=
"新"
;
}
else
if
(
showTags
.
contains
(
"热"
))
{
icon
=
"热"
;
}
else
if
(
showTags
.
contains
(
"沸"
))
{
icon
=
"沸"
;
}
HotSearchList
hotSearchList
=
new
HotSearchList
(
null
,
name
,
count
,
true
,
rank
,
HotSearchType
.
新浪热榜
.
name
(),
icon
);
hotSearchLists
.
add
(
hotSearchList
);
}
log
.
info
(
"{}, 此轮新浪热榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
hotSearchLists
!=
null
?
hotSearchLists
.
size
()
:
0
));
log
.
info
(
"新浪热榜采集结束"
);
return
hotSearchLists
;
}
}
ZhiWeiTools
.
sleep
(
3000L
);
}
return
hotSearchLists
;
}
/**
* 新浪热点的采集
* @return
*/
public
static
List
<
HotSearchList
>
getXinLangHotSpot
(){
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
log
.
info
(
"新浪热点开始采集"
);
String
url
=
"http://interface.sina.cn/wap_api/hot_rank_data.d.json"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
htmlBody
=
null
;
JSONArray
dataJson
=
null
;
for
(
int
t
=
0
;
t
<
3
&&
dataJson
==
null
;
t
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
error
(
"新浪热点页面连接异常..."
,
e
);
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
dataJson
=
jsonObject
.
getJSONArray
(
"lists"
);
if
(
dataJson
!=
null
)
{
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++)
{
Integer
rank
=
i
+
1
;
String
name
=
dataJson
.
getJSONObject
(
i
).
getString
(
"title"
);
String
xinlangUrl
=
dataJson
.
getJSONObject
(
i
).
getString
(
"wapurl"
);
Integer
hot
=
dataJson
.
getJSONObject
(
i
).
getIntValue
(
"hot_value"
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
xinlangUrl
,
name
,
hot
,
rank
,
HotSearchType
.
新浪热点
.
name
());
hotSearchLists
.
add
(
hotSearchList
);
}
log
.
info
(
"{}, 此轮新浪热点采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
hotSearchLists
!=
null
?
hotSearchLists
.
size
()
:
0
));
log
.
info
(
"新浪热点采集结束"
);
return
hotSearchLists
;
}
}
ZhiWeiTools
.
sleep
(
3000L
);
}
return
hotSearchLists
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuChildHotSearchCrawler.java
View file @
c6957bee
...
@@ -9,6 +9,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
...
@@ -9,6 +9,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Request
;
...
@@ -54,7 +55,7 @@ public class ZhihuChildHotSearchCrawler {
...
@@ -54,7 +55,7 @@ public class ZhihuChildHotSearchCrawler {
Integer
rank
=
i
+
1
;
Integer
rank
=
i
+
1
;
String
name
=
jsonObject
.
getJSONObject
(
"title_area"
).
getString
(
"text"
);
String
name
=
jsonObject
.
getJSONObject
(
"title_area"
).
getString
(
"text"
);
String
hotCountString
=
jsonObject
.
getJSONObject
(
"metrics_area"
).
getString
(
"text"
);
String
hotCountString
=
jsonObject
.
getJSONObject
(
"metrics_area"
).
getString
(
"text"
);
Integer
count
=
getHotCount
(
hotCountString
);
Integer
count
=
TipsUtils
.
getHotCount
(
hotCountString
.
substring
(
0
,
hotCountString
.
indexOf
(
"领域热度"
))
);
String
childUrl
=
jsonObject
.
getJSONObject
(
"link"
).
getString
(
"url"
);
String
childUrl
=
jsonObject
.
getJSONObject
(
"link"
).
getString
(
"url"
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
childUrl
,
name
,
count
,
rank
,
HotSearchType
.
知乎热搜
.
name
()
+
typeName
+
"分类"
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
childUrl
,
name
,
count
,
rank
,
HotSearchType
.
知乎热搜
.
name
()
+
typeName
+
"分类"
);
list
.
add
(
hotSearchList
);
list
.
add
(
hotSearchList
);
...
@@ -64,23 +65,4 @@ public class ZhihuChildHotSearchCrawler {
...
@@ -64,23 +65,4 @@ public class ZhihuChildHotSearchCrawler {
}
}
return
list
;
return
list
;
}
}
/**
* 截取出热度值
* @param hotCountString
* @return
*/
private
static
Integer
getHotCount
(
String
hotCountString
){
Integer
count
;
if
(
hotCountString
.
contains
(
"万"
)){
hotCountString
=
hotCountString
.
replaceAll
(
"万.*"
,
""
).
trim
();
count
=
(
int
)(
Double
.
parseDouble
(
hotCountString
)*
10000
);
}
else
if
(
hotCountString
.
contains
(
"亿"
)){
hotCountString
=
hotCountString
.
replaceAll
(
"亿.*"
,
""
).
trim
();
count
=
(
int
)(
Double
.
parseDouble
(
hotCountString
)*
10000000
);
}
else
{
count
=
Integer
.
getInteger
(
hotCountString
.
substring
(
0
,
hotCountString
.
indexOf
(
"领域热度"
)));
}
return
count
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
View file @
c6957bee
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.FengHuangSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.SouhuTopicCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.TengXunCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.TengXunCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.XinLangHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
...
@@ -15,13 +18,15 @@ import java.util.concurrent.TimeUnit;
...
@@ -15,13 +18,15 @@ import java.util.concurrent.TimeUnit;
@Log4j2
@Log4j2
public
class
ThreadOneRun
extends
Thread
{
public
class
ThreadOneRun
extends
Thread
{
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
private
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
@Override
@Override
public
void
run
()
{
public
void
run
()
{
boolean
f
=
true
;
boolean
f
=
true
;
while
(
f
)
{
while
(
f
)
{
try
{
try
{
getHotList
();
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
1
);
TimeUnit
.
MINUTES
.
sleep
(
3
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
1000
);
ZhiWeiTools
.
sleep
(
60
*
1000
);
...
@@ -31,15 +36,30 @@ public class ThreadOneRun extends Thread {
...
@@ -31,15 +36,30 @@ public class ThreadOneRun extends Thread {
}
}
private
void
getHotList
(){
private
void
getHotList
(){
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
List
<
HotSearchList
>
tengXunlist
=
TengXunCrawler
.
getTengXunHotList
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
addHotList
(
"腾讯新闻"
,
tengXunlist
);
List
<
HotSearchList
>
list
=
TengXunCrawler
.
getTengXunHotList
();
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
xinLanglist
=
XinLangHotSearchCrawler
.
getXinLangHotSearch
();
addHotList
(
"新浪热榜"
,
xinLanglist
);
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
souhuList
=
SouhuTopicCrawler
.
getSouhuTopic
();
addHotList
(
"搜狐话题"
,
souhuList
);
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
xinLangHotList
=
XinLangHotSearchCrawler
.
getXinLangHotSpot
();
addHotList
(
"新浪热点"
,
xinLangHotList
);
ZhiWeiTools
.
sleep
(
3000L
);
List
<
HotSearchList
>
fengHuangHotList
=
FengHuangSearchCrawler
.
getFengHuangHotList
();
addHotList
(
"凤凰新闻热榜"
,
fengHuangHotList
);
}
private
void
addHotList
(
String
type
,
List
<
HotSearchList
>
list
){
if
(
list
==
null
||
list
.
size
()
==
0
){
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
"腾讯新闻"
,
new
Date
());
TipsUtils
.
sendTips
(
type
,
new
Date
());
}
else
{
}
else
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
sendTips
(
"腾讯新闻"
,
new
Date
());
TipsUtils
.
recoveryTips
(
type
,
new
Date
());
}
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment