Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
66e28559
Commit
66e28559
authored
Jan 10, 2022
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
新增B站标签采集和知乎热搜标签采集 See merge request
!167
parents
36e2a228
1fd52a37
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
214 additions
and
57 deletions
+214
-57
pom.xml
+6
-1
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+11
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
+97
-24
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+82
-31
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+18
-0
No files found.
pom.xml
View file @
66e28559
...
@@ -48,7 +48,12 @@
...
@@ -48,7 +48,12 @@
<artifactId>
crawler-core
</artifactId>
<artifactId>
crawler-core
</artifactId>
<version>
0.6.7.4-SNAPSHOT
</version>
<version>
0.6.7.4-SNAPSHOT
</version>
</dependency>
</dependency>
<!-- https://mvnrepository.com/artifact/org.conscrypt/conscrypt-openjdk-uber -->
<dependency>
<groupId>
org.conscrypt
</groupId>
<artifactId>
conscrypt-openjdk-uber
</artifactId>
<version>
2.5.2
</version>
</dependency>
<!-- 日志依赖 -->
<!-- 日志依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
<dependency>
<dependency>
...
...
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
View file @
66e28559
...
@@ -94,7 +94,7 @@ public class HotSearchList implements Serializable{
...
@@ -94,7 +94,7 @@ public class HotSearchList implements Serializable{
private
String
topicResult
;
private
String
topicResult
;
/**
/**
* 观看数(目前近B站排行榜及综合热门使用)
* 观看数(目前近B站排行榜及综合热门
,知乎浏览量
使用)
*/
*/
private
Long
view
;
private
Long
view
;
...
@@ -122,6 +122,16 @@ public class HotSearchList implements Serializable{
...
@@ -122,6 +122,16 @@ public class HotSearchList implements Serializable{
* 内容
* 内容
*/
*/
private
String
content
;
private
String
content
;
/**
* 粉丝数(目前仅B站排行榜和知乎热搜使用)
*/
private
Long
fans
;
/**
* 标签(目前仅B站排行榜和知乎热搜使用)
*/
private
String
tag
;
public
HotSearchList
(){}
public
HotSearchList
(){}
public
HotSearchList
(
String
url
,
String
name
,
Long
count
,
Boolean
hot
,
Integer
rank
,
String
type
,
String
icon
,
Date
date
){
public
HotSearchList
(
String
url
,
String
name
,
Long
count
,
Boolean
hot
,
Integer
rank
,
String
type
,
String
icon
,
Date
date
){
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/BililiCrawler.java
View file @
66e28559
...
@@ -7,19 +7,21 @@ import com.zhiwei.crawler.core.proxy.ProxyHolder;
...
@@ -7,19 +7,21 @@ import com.zhiwei.crawler.core.proxy.ProxyHolder;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.ExecutorService
;
import
java.util.concurrent.Executors
;
@Log4j2
@Log4j2
public
class
BililiCrawler
{
public
class
BililiCrawler
{
...
@@ -32,6 +34,7 @@ public class BililiCrawler {
...
@@ -32,6 +34,7 @@ public class BililiCrawler {
*/
*/
public
static
List
<
HotSearchList
>
getBilibiliHotSearch
(
Date
date
){
public
static
List
<
HotSearchList
>
getBilibiliHotSearch
(
Date
date
){
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
List
<
HotSearchList
>
hotSearchLists
=
new
ArrayList
<>();
ExecutorService
executor
=
Executors
.
newFixedThreadPool
(
10
);
log
.
info
(
"bilibili排行榜开始采集..."
);
log
.
info
(
"bilibili排行榜开始采集..."
);
JSONArray
dataJson
=
null
;
JSONArray
dataJson
=
null
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
...
@@ -43,38 +46,108 @@ public class BililiCrawler {
...
@@ -43,38 +46,108 @@ public class BililiCrawler {
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
log
.
error
(
"B站排行榜页面连接失败"
,
e
.
fillInStackTrace
());
log
.
error
(
"B站排行榜页面连接失败"
,
e
.
fillInStackTrace
());
}
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
)){
try
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
)){
dataJson
=
jsonObject
.
getJSONArray
(
"list"
);
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
if
(
dataJson
!=
null
)
{
dataJson
=
jsonObject
.
getJSONArray
(
"list"
);
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++)
{
if
(
dataJson
!=
null
)
{
JSONObject
data
=
dataJson
.
getJSONObject
(
i
);
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++)
{
int
rank
=
i
+
1
;
JSONObject
data
=
dataJson
.
getJSONObject
(
i
);
String
name
=
data
.
getString
(
"title"
);
int
rank
=
i
+
1
;
String
topicLead
=
data
.
getString
(
"desc"
);
String
name
=
data
.
getString
(
"title"
);
long
count
=
data
.
getLongValue
(
"score"
);
String
topicLead
=
data
.
getString
(
"desc"
);
String
bvid
=
data
.
getString
(
"bvid"
);
long
count
=
data
.
getLongValue
(
"score"
);
String
pic
=
data
.
getString
(
"pic"
);
String
bvid
=
data
.
getString
(
"bvid"
);
String
bUrl
=
"https://www.bilibili.com/video/"
+
bvid
;
String
pic
=
data
.
getString
(
"pic"
);
Long
view
=
null
;
String
bUrl
=
"https://www.bilibili.com/video/"
+
bvid
;
Long
barrage
=
null
;
Long
view
=
null
;
if
(
data
.
containsKey
(
"stat"
))
{
Long
barrage
=
null
;
JSONObject
stat
=
data
.
getJSONObject
(
"stat"
);
if
(
data
.
containsKey
(
"stat"
))
{
view
=
stat
.
getLongValue
(
"view"
);
JSONObject
stat
=
data
.
getJSONObject
(
"stat"
);
barrage
=
stat
.
getLongValue
(
"danmaku"
);
view
=
stat
.
getLongValue
(
"view"
);
barrage
=
stat
.
getLongValue
(
"danmaku"
);
}
//获取主持人
String
downtext
=
null
;
if
(
data
.
containsKey
(
"owner"
))
{
JSONObject
stat
=
data
.
getJSONObject
(
"owner"
);
downtext
=
stat
.
getString
(
"name"
);
}
HotSearchList
hotSearchList
=
new
HotSearchList
(
bUrl
,
name
,
topicLead
,
count
,
null
,
date
,
rank
,
HotSearchType
.
B
站排行榜
.
name
(),
view
,
barrage
,
pic
);
hotSearchList
.
setDowntext
(
downtext
);
executor
.
execute
(
new
Runnable
()
{
@Override
public
void
run
()
{
HotSearchList
tag
=
getTag
(
bUrl
,
hotSearchList
);
hotSearchLists
.
add
(
tag
);
}
});
}
}
HotSearchList
hotSearchList
=
new
HotSearchList
(
bUrl
,
name
,
topicLead
,
count
,
null
,
date
,
rank
,
HotSearchType
.
B
站排行榜
.
name
(),
view
,
barrage
,
pic
);
//进行多线程任务是否执行完毕 如到达指定时间也结束循环
hotSearchLists
.
add
(
hotSearchList
);
executor
.
shutdown
();
long
time
=
0L
;
while
(
true
){
if
(
executor
.
isTerminated
()){
break
;
}
try
{
Thread
.
sleep
(
3000
);
time
=
3000
+
time
;
if
(
time
>
50000
){
break
;
}
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
}
}
}
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"B站排行榜页面解析异常:{}"
,
e
);
}
}
ZhiWeiTools
.
sleep
(
3000L
);
}
}
log
.
info
(
"{}, B站排行榜此轮采集到的数据量为:{}"
,
new
Date
(),
hotSearchLists
!=
null
?
hotSearchLists
.
size
()
:
0
);
log
.
info
(
"{}, B站排行榜此轮采集到的数据量为:{}"
,
new
Date
(),
hotSearchLists
!=
null
?
hotSearchLists
.
size
()
:
0
);
log
.
info
(
"B站排行榜采集结束"
);
log
.
info
(
"B站排行榜采集结束"
);
return
hotSearchLists
;
return
hotSearchLists
;
}
}
//获取标签及粉丝量
private
static
HotSearchList
getTag
(
String
url
,
HotSearchList
hotSearchList
)
{
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
{
System
.
setProperty
(
"https.protocols"
,
"TLSv1,TLSv1.1,TLSv1.2,SSLv3"
);
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
);
String
htmlBody
=
response
.
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"v-wrap"
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
//获取标签
String
tags
=
"`"
+
document
.
select
(
"li.tag"
).
text
()+
";"
;
String
tag
=
tags
.
replaceAll
(
" "
,
";`"
);
hotSearchList
.
setTag
(
tag
);
//获取粉丝数
if
(
htmlBody
.
contains
(
"v_upinfo"
))
{
String
text
=
document
.
select
(
"div.follow-btn"
).
select
(
"span"
).
text
();
String
fan
=
text
.
split
(
" "
)[
2
];
Long
fanCount
=
null
;
if
(
fan
.
contains
(
"万"
)){
double
dou
=
Double
.
parseDouble
(
fan
.
replaceAll
(
"万"
,
" "
));
fanCount
=
new
Double
(
dou
*
10000
).
longValue
();
}
else
{
fanCount
=
Long
.
valueOf
(
fan
);
}
hotSearchList
.
setFans
(
fanCount
);
}
return
hotSearchList
;
}
else
{
return
hotSearchList
;
}
}
catch
(
Exception
e
)
{
log
.
error
(
"单条B站排行榜数据页面连接失败:{}"
,
e
);
return
hotSearchList
;
}
}
/**
/**
* B站热搜的采集
* B站热搜的采集
* @param date
* @param date
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
66e28559
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.*
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Map
;
import
com.zhiwei.crawler.core.config.SslProvider
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -20,6 +18,12 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
...
@@ -20,6 +18,12 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
static
java
.
util
.
Objects
.
nonNull
;
/**
/**
* @ClassName: ZhihuHotCrawler
* @ClassName: ZhihuHotCrawler
...
@@ -30,7 +34,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
...
@@ -30,7 +34,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
@Log4j2
@Log4j2
public
class
ZhihuHotSearchCrawler
{
public
class
ZhihuHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
sslProvider
(
SslProvider
.
CONSCRYPT
).
retryTimes
(
3
).
build
();
/**
/**
* @Title: getZhihuHotList
* @Title: getZhihuHotList
* @author hero
* @author hero
...
@@ -100,37 +104,84 @@ public class ZhihuHotSearchCrawler {
...
@@ -100,37 +104,84 @@ public class ZhihuHotSearchCrawler {
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
log
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
return
list
;
return
list
;
}
}
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
))
{
try
{
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
))
{
JSONArray
dataJson
=
topSearch
.
getJSONArray
(
"data"
);
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
String
link
=
null
;
JSONArray
dataJson
=
topSearch
.
getJSONArray
(
"data"
);
String
displayQuery
=
null
;
String
link
=
null
;
Long
hotCount
=
null
;
String
displayQuery
=
null
;
String
hotText
=
null
;
Long
hotCount
=
null
;
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++)
{
String
hotText
=
null
;
JSONObject
data
=
dataJson
.
getJSONObject
(
i
).
getJSONObject
(
"target"
);
for
(
int
i
=
0
;
i
<
dataJson
.
size
();
i
++)
{
displayQuery
=
data
.
getString
(
"title"
);
JSONObject
data
=
dataJson
.
getJSONObject
(
i
).
getJSONObject
(
"target"
);
link
=
"https://www.zhihu.com/question/"
+
data
.
getLongValue
(
"id"
);
displayQuery
=
data
.
getString
(
"title"
);
hotText
=
dataJson
.
getJSONObject
(
i
).
getString
(
"detail_text"
);
link
=
"https://www.zhihu.com/question/"
+
data
.
getLongValue
(
"id"
);
hotText
=
dataJson
.
getJSONObject
(
i
).
getString
(
"detail_text"
);
//计算热度
//计算热度
try
{
try
{
if
(
hotText
.
contains
(
"万"
))
{
if
(
hotText
.
contains
(
"万"
))
{
hotText
=
hotText
.
replaceAll
(
"万.*"
,
""
).
trim
();
hotText
=
hotText
.
replaceAll
(
"万.*"
,
""
).
trim
();
hotCount
=
(
long
)
(
Double
.
parseDouble
(
hotText
)
*
10000
);
hotCount
=
(
long
)
(
Double
.
parseDouble
(
hotText
)
*
10000
);
}
else
if
(
hotText
.
contains
(
"亿"
))
{
}
else
if
(
hotText
.
contains
(
"亿"
))
{
hotText
=
hotText
.
replaceAll
(
"亿.*"
,
""
).
trim
();
hotText
=
hotText
.
replaceAll
(
"亿.*"
,
""
).
trim
();
hotCount
=
(
long
)
(
Double
.
parseDouble
(
hotText
)
*
100000000
);
hotCount
=
(
long
)
(
Double
.
parseDouble
(
hotText
)
*
100000000
);
}
else
{
}
else
{
hotCount
=
Long
.
getLong
(
hotText
);
hotCount
=
Long
.
getLong
(
hotText
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
catch
(
Exception
e
)
{
org
.
bson
.
Document
doc
=
getTag
(
link
);
e
.
printStackTrace
();
String
tog
=
nonNull
(
doc
.
get
(
"tag"
))
?
doc
.
getString
(
"tag"
)
:
null
;
Long
view
=
nonNull
(
doc
.
get
(
"view"
))
?
Long
.
valueOf
(
doc
.
get
(
"view"
).
toString
())
:
null
;
Long
fans
=
nonNull
(
doc
.
get
(
"fans"
))
?
Long
.
valueOf
(
doc
.
get
(
"fans"
).
toString
())
:
null
;
HotSearchList
zhihu
=
new
HotSearchList
(
link
,
displayQuery
,
hotCount
,
i
+
1
,
HotSearchType
.
知乎热搜
.
name
(),
date
);
zhihu
.
setFans
(
fans
);
zhihu
.
setView
(
view
);
zhihu
.
setTag
(
tog
);
list
.
add
(
zhihu
);
}
}
HotSearchList
zhihu
=
new
HotSearchList
(
link
,
displayQuery
,
hotCount
,
i
+
1
,
HotSearchType
.
知乎热搜
.
name
(),
date
);
list
.
add
(
zhihu
);
}
}
}
catch
(
Exception
e
)
{
log
.
info
(
"知乎热搜解析异常"
,
e
);
}
}
return
list
;
return
list
;
}
}
//访问pc端 获取标签及浏览量关注数
private
static
org
.
bson
.
Document
getTag
(
String
url
)
{
org
.
bson
.
Document
doc
=
new
org
.
bson
.
Document
();
doc
.
put
(
"tag"
,
null
);
//浏览量
doc
.
put
(
"view"
,
null
);
//粉丝
doc
.
put
(
"fans"
,
null
);
Map
<
String
,
String
>
Map
=
HeaderTool
.
getCommonHead
();
Map
.
put
(
"cookie"
,
"_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
Map
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
String
htmlBody
=
response
.
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"QuestionHeader"
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
//获取标签
String
content
=
"`"
+
document
.
select
(
"div.Tag"
).
text
()+
";"
;
String
label
=
content
.
replaceAll
(
" "
,
";`"
);
doc
.
put
(
"tag"
,
label
.
trim
());
String
strong
=
document
.
select
(
"div.NumberBoard-itemInner"
).
select
(
"strong"
).
text
();
String
[]
count
=
strong
.
split
(
" "
);
//获取关注数
doc
.
put
(
"fans"
,
Long
.
valueOf
(
count
[
0
].
replaceAll
(
","
,
""
).
trim
()));
//获取浏览量
doc
.
put
(
"view"
,
Long
.
valueOf
(
count
[
1
].
replaceAll
(
","
,
""
).
trim
()));
return
doc
;
}
else
{
return
doc
;
}
}
catch
(
Exception
e
)
{
log
.
error
(
"单条知乎热搜数据页面连接失败"
,
e
);
return
doc
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
View file @
66e28559
...
@@ -96,6 +96,9 @@ public class HotSearchCacheDAO {
...
@@ -96,6 +96,9 @@ public class HotSearchCacheDAO {
document
.
put
(
"view"
,
hotSearch
.
getView
());
document
.
put
(
"view"
,
hotSearch
.
getView
());
document
.
put
(
"barrage"
,
hotSearch
.
getBarrage
());
document
.
put
(
"barrage"
,
hotSearch
.
getBarrage
());
document
.
put
(
"pictureUrl"
,
hotSearch
.
getPictureUrl
());
document
.
put
(
"pictureUrl"
,
hotSearch
.
getPictureUrl
());
document
.
put
(
"tag"
,
hotSearch
.
getTag
());
document
.
put
(
"downtext"
,
hotSearch
.
getDowntext
());
document
.
put
(
"fans"
,
hotSearch
.
getFans
());
}
}
if
(
"B站综合热门"
.
equals
(
hotSearch
.
getType
()))
{
if
(
"B站综合热门"
.
equals
(
hotSearch
.
getType
()))
{
document
.
put
(
"heatLabel"
,
hotSearch
.
getHeatLabel
());
document
.
put
(
"heatLabel"
,
hotSearch
.
getHeatLabel
());
...
@@ -103,6 +106,11 @@ public class HotSearchCacheDAO {
...
@@ -103,6 +106,11 @@ public class HotSearchCacheDAO {
document
.
put
(
"pictureUrl"
,
hotSearch
.
getPictureUrl
());
document
.
put
(
"pictureUrl"
,
hotSearch
.
getPictureUrl
());
document
.
put
(
"commentCount"
,
hotSearch
.
getCommentCount
());
document
.
put
(
"commentCount"
,
hotSearch
.
getCommentCount
());
}
}
if
(
"知乎热搜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
put
(
"tag"
,
hotSearch
.
getTag
());
document
.
put
(
"view"
,
hotSearch
.
getView
());
document
.
put
(
"fans"
,
hotSearch
.
getFans
());
}
addAndUpdateData
(
document
);
addAndUpdateData
(
document
);
if
(
"百度热搜"
.
equals
(
hotSearch
.
getType
()))
{
if
(
"百度热搜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
remove
(
"topic_lead"
);
document
.
remove
(
"topic_lead"
);
...
@@ -113,6 +121,9 @@ public class HotSearchCacheDAO {
...
@@ -113,6 +121,9 @@ public class HotSearchCacheDAO {
if
(
"网易热榜"
.
equals
(
hotSearch
.
getType
()))
{
if
(
"网易热榜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
remove
(
"downtext"
);
document
.
remove
(
"downtext"
);
}
}
if
(
"B站排行榜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
remove
(
"downtext"
);
}
dataes
.
add
(
document
);
dataes
.
add
(
document
);
}
}
return
dataes
;
return
dataes
;
...
@@ -278,6 +289,13 @@ public class HotSearchCacheDAO {
...
@@ -278,6 +289,13 @@ public class HotSearchCacheDAO {
if
(
"B站综合热门"
.
equals
(
type
))
{
if
(
"B站综合热门"
.
equals
(
type
))
{
nowDoc
.
put
(
"pictureUrl"
,
pictureUrl
);
nowDoc
.
put
(
"pictureUrl"
,
pictureUrl
);
}
}
if
(
"知乎热搜"
.
equals
(
type
))
{
nowDoc
.
put
(
"tag"
,
nonNull
(
document
.
get
(
"tag"
))
?
document
.
getString
(
"tag"
)
:
null
);
}
if
(
"B站排行榜"
.
equals
(
type
))
{
nowDoc
.
put
(
"tag"
,
nonNull
(
document
.
get
(
"tag"
))
?
document
.
getString
(
"tag"
)
:
null
);
nowDoc
.
put
(
"downtext"
,
nonNull
(
document
.
get
(
"downtext"
))
?
document
.
getString
(
"downtext"
)
:
null
);
}
if
(
"微博热搜"
.
equals
(
type
))
{
if
(
"微博热搜"
.
equals
(
type
))
{
nowDoc
=
WeiboHotSearchCrawler
.
weiboUpdate
(
nowDoc
);
nowDoc
=
WeiboHotSearchCrawler
.
weiboUpdate
(
nowDoc
);
//更新微博话题贡献者,关于功能
//更新微博话题贡献者,关于功能
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment