Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
8df0ca9e
Commit
8df0ca9e
authored
Nov 11, 2020
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'mlbWork' into 'master'
头条采集入库后更新阅读量 See merge request
!51
parents
f8861322
54207549
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
35 additions
and
23 deletions
+35
-23
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
+4
-1
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+10
-10
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+14
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/TouTiaoExecutor.java
+3
-8
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+4
-4
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
View file @
8df0ca9e
...
@@ -8,6 +8,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
...
@@ -8,6 +8,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -147,11 +148,12 @@ public class ToutiaoHotSearchCrawler {
...
@@ -147,11 +148,12 @@ public class ToutiaoHotSearchCrawler {
// }
// }
/**
/**
*
获取
今日头条热搜阅读量
*
更新
今日头条热搜阅读量
* @param hotSearchList
* @param hotSearchList
* @return
* @return
*/
*/
public
static
HotSearchList
toutiaoReadCount
(
HotSearchList
hotSearchList
){
public
static
HotSearchList
toutiaoReadCount
(
HotSearchList
hotSearchList
){
HotSearchListDAO
hotSearchListDAO
=
new
HotSearchListDAO
();
if
(
hotSearchList
.
getUrl
()
!=
null
)
{
if
(
hotSearchList
.
getUrl
()
!=
null
)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
String
url
=
hotSearchList
.
getUrl
();
String
url
=
hotSearchList
.
getUrl
();
...
@@ -171,6 +173,7 @@ public class ToutiaoHotSearchCrawler {
...
@@ -171,6 +173,7 @@ public class ToutiaoHotSearchCrawler {
Integer
count
=
TipsUtils
.
getHotCount
(
readCount
);
Integer
count
=
TipsUtils
.
getHotCount
(
readCount
);
log
.
info
(
"{},阅读量:{}"
,
hotSearchList
.
getName
(),
count
);
log
.
info
(
"{},阅读量:{}"
,
hotSearchList
.
getName
(),
count
);
hotSearchList
.
setCommentCount
(
count
);
hotSearchList
.
setCommentCount
(
count
);
hotSearchListDAO
.
updateTouTiaoReadCount
(
hotSearchList
);
return
hotSearchList
;
return
hotSearchList
;
}
}
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
View file @
8df0ca9e
...
@@ -48,9 +48,9 @@ public class HotSearchCacheDAO {
...
@@ -48,9 +48,9 @@ public class HotSearchCacheDAO {
document
.
put
(
"topic_lead"
,
hotSearch
.
getTopicLead
());
document
.
put
(
"topic_lead"
,
hotSearch
.
getTopicLead
());
document
.
put
(
"comment_count"
,
hotSearch
.
getCommentCount
());
document
.
put
(
"comment_count"
,
hotSearch
.
getCommentCount
());
}
}
if
(
"今日头条热搜"
.
equals
(
hotSearch
.
getType
())){
//
if("今日头条热搜".equals(hotSearch.getType())){
document
.
put
(
"comment_count"
,
hotSearch
.
getCommentCount
());
//
document.put("comment_count", hotSearch.getCommentCount());
}
//
}
if
(
"腾讯较真榜"
.
equals
(
hotSearch
.
getType
())){
if
(
"腾讯较真榜"
.
equals
(
hotSearch
.
getType
())){
document
.
put
(
"topic_result"
,
hotSearch
.
getTopicResult
());
document
.
put
(
"topic_result"
,
hotSearch
.
getTopicResult
());
}
}
...
@@ -82,7 +82,7 @@ public class HotSearchCacheDAO {
...
@@ -82,7 +82,7 @@ public class HotSearchCacheDAO {
String
topicResult
=
document
.
getString
(
"topic_result"
)!=
null
?
document
.
getString
(
"topic_result"
):
null
;
String
topicResult
=
document
.
getString
(
"topic_result"
)!=
null
?
document
.
getString
(
"topic_result"
):
null
;
String
id
=
name
+
"_"
+
type
;
String
id
=
name
+
"_"
+
type
;
boolean
recommend
=
false
;
boolean
recommend
=
false
;
Integer
readCount
=
document
.
getInteger
(
"comment_count"
);
//
Integer readCount = document.getInteger("comment_count");
if
(
"微博热搜"
.
equals
(
type
)){
if
(
"微博热搜"
.
equals
(
type
)){
String
icon
=
document
.
getString
(
"icon"
);
String
icon
=
document
.
getString
(
"icon"
);
if
(
"recom"
.
equals
(
icon
)
||
"jian"
.
equals
(
icon
)){
if
(
"recom"
.
equals
(
icon
)
||
"jian"
.
equals
(
icon
)){
...
@@ -128,9 +128,9 @@ public class HotSearchCacheDAO {
...
@@ -128,9 +128,9 @@ public class HotSearchCacheDAO {
nowDoc
.
put
(
"preCount"
,
preCount
);
nowDoc
.
put
(
"preCount"
,
preCount
);
nowDoc
.
put
(
"duration"
,
durationNow
);
nowDoc
.
put
(
"duration"
,
durationNow
);
nowDoc
.
put
(
"recommend"
,
recommend
);
nowDoc
.
put
(
"recommend"
,
recommend
);
if
(
readCount
!=
null
){
//
if(readCount != null){
nowDoc
.
put
(
"readCount"
,
readCount
);
//
nowDoc.put("readCount",readCount);
}
//
}
if
(
topicResult
!=
null
){
if
(
topicResult
!=
null
){
nowDoc
.
put
(
"topicResult"
,
topicResult
);
nowDoc
.
put
(
"topicResult"
,
topicResult
);
}
}
...
@@ -154,9 +154,9 @@ public class HotSearchCacheDAO {
...
@@ -154,9 +154,9 @@ public class HotSearchCacheDAO {
nowDoc
.
put
(
"preRank"
,
null
);
nowDoc
.
put
(
"preRank"
,
null
);
nowDoc
.
put
(
"preCount"
,
null
);
nowDoc
.
put
(
"preCount"
,
null
);
nowDoc
.
put
(
"recommend"
,
recommend
);
nowDoc
.
put
(
"recommend"
,
recommend
);
if
(
readCount
!=
null
){
//
if(readCount != null){
nowDoc
.
put
(
"readCount"
,
readCount
);
//
nowDoc.put("readCount",readCount);
}
//
}
if
(
topicResult
!=
null
){
if
(
topicResult
!=
null
){
nowDoc
.
put
(
"topicResult"
,
topicResult
);
nowDoc
.
put
(
"topicResult"
,
topicResult
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
View file @
8df0ca9e
...
@@ -78,5 +78,19 @@ public class HotSearchListDAO{
...
@@ -78,5 +78,19 @@ public class HotSearchListDAO{
}
}
return
null
;
return
null
;
}
}
/**
* 今日头条热搜详情趋势添加阅读量
* @param hotSearchList
*/
public
void
updateTouTiaoReadCount
(
HotSearchList
hotSearchList
){
BasicDBObject
basicDBObject
=
new
BasicDBObject
();
basicDBObject
.
put
(
"_id"
,
hotSearchList
.
getId
());
Document
document
=
(
Document
)
mongoCollection
.
find
(
basicDBObject
).
first
();
if
(
document
!=
null
){
document
.
put
(
"commentCount"
,
hotSearchList
.
getCommentCount
());
mongoCollection
.
replaceOne
(
basicDBObject
,
document
);
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/TouTiaoExecutor.java
View file @
8df0ca9e
...
@@ -14,8 +14,6 @@ public class TouTiaoExecutor extends Thread {
...
@@ -14,8 +14,6 @@ public class TouTiaoExecutor extends Thread {
private
HotSearchList
hotSearchList
;
private
HotSearchList
hotSearchList
;
private
static
List
<
HotSearchList
>
resultList
;
public
TouTiaoExecutor
(
HotSearchList
hotSearchList
){
public
TouTiaoExecutor
(
HotSearchList
hotSearchList
){
this
.
hotSearchList
=
hotSearchList
;
this
.
hotSearchList
=
hotSearchList
;
}
}
...
@@ -23,8 +21,7 @@ public class TouTiaoExecutor extends Thread {
...
@@ -23,8 +21,7 @@ public class TouTiaoExecutor extends Thread {
@Override
@Override
public
void
run
()
{
public
void
run
()
{
try
{
try
{
hotSearchList
=
ToutiaoHotSearchCrawler
.
toutiaoReadCount
(
hotSearchList
);
hotSearchList
=
ToutiaoHotSearchCrawler
.
toutiaoReadCount
(
hotSearchList
);
resultList
.
add
(
hotSearchList
);
}
catch
(
Exception
e
){
}
catch
(
Exception
e
){
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
@@ -35,8 +32,7 @@ public class TouTiaoExecutor extends Thread {
...
@@ -35,8 +32,7 @@ public class TouTiaoExecutor extends Thread {
* @param list
* @param list
* @return
* @return
*/
*/
public
static
List
<
HotSearchList
>
countTouTiaoReadCount
(
List
<
HotSearchList
>
list
){
public
static
void
countTouTiaoReadCount
(
List
<
HotSearchList
>
list
){
resultList
=
new
ArrayList
<>();
// ExecutorService service = Executors.newFixedThreadPool(list.size());
// ExecutorService service = Executors.newFixedThreadPool(list.size());
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++){
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++){
TipsUtils
.
service
.
execute
(
new
TouTiaoExecutor
(
list
.
get
(
i
)));
TipsUtils
.
service
.
execute
(
new
TouTiaoExecutor
(
list
.
get
(
i
)));
...
@@ -44,11 +40,10 @@ public class TouTiaoExecutor extends Thread {
...
@@ -44,11 +40,10 @@ public class TouTiaoExecutor extends Thread {
// TipsUtils.service.shutdown();
// TipsUtils.service.shutdown();
try
{
try
{
if
(!
TipsUtils
.
service
.
awaitTermination
(
1
,
TimeUnit
.
MINUTES
)){
if
(!
TipsUtils
.
service
.
awaitTermination
(
1
,
TimeUnit
.
MINUTES
)){
log
.
info
(
"
查询今日头条阅读量超时
"
);
log
.
info
(
"
今日头条阅读量更新结束
"
);
}
}
}
catch
(
InterruptedException
e
)
{
}
catch
(
InterruptedException
e
)
{
log
.
info
(
e
.
fillInStackTrace
());
log
.
info
(
e
.
fillInStackTrace
());
}
}
return
resultList
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
8df0ca9e
...
@@ -85,11 +85,11 @@ public class GatherTimer {
...
@@ -85,11 +85,11 @@ public class GatherTimer {
logger
.
info
(
"今日头条热搜开始采集..."
);
logger
.
info
(
"今日头条热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
toutiaoList
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
date
);
List
<
HotSearchList
>
toutiaoList
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
date
);
List
<
HotSearchList
>
toutiaoResult
=
new
ArrayList
<>();
logger
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoList
!=
null
?
toutiaoList
.
size
()
:
0
);
toutiaoResult
=
TouTiaoExecutor
.
countTouTiaoReadCount
(
toutiaoList
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoList
);
logger
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
toutiaoResult
!=
null
?
toutiaoResult
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
今日头条热搜
.
name
(),
toutiaoResult
);
logger
.
info
(
"今日头条热搜采集结束..."
);
logger
.
info
(
"今日头条热搜采集结束..."
);
logger
.
info
(
"今日头条热搜详情趋势阅读量更新..."
);
TouTiaoExecutor
.
countTouTiaoReadCount
(
toutiaoList
);
}
}
/**
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment