Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
21bf95d3
Commit
21bf95d3
authored
Aug 10, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'mlbWork' into 'master'
Mlb work See merge request
!17
parents
cb69d3bb
eb385cb2
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
164 additions
and
21 deletions
+164
-21
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+29
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+14
-0
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+4
-2
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+26
-11
src/main/java/com/zhiwei/searchhotcrawler/timer/DouYinUrlHotSearchRun.java
+61
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+3
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
+3
-3
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
+24
-4
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
21bf95d3
...
...
@@ -76,4 +76,33 @@ public class DouyinHotSearchCrawler {
return
list
;
}
/**
* 获取抖音url
* @param url
* @return
*/
public
static
String
getDouyinUrl
(
String
url
){
String
resultUrl
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
.
debug
(
"获取抖音热搜榜链接时出现问题:{}"
,
e
);
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"aweme_list"
)){
JSONArray
jsonArray
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"aweme_list"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++){
JSONObject
jsonObject
=
jsonArray
.
getJSONObject
(
i
);
if
(
jsonObject
.
containsKey
(
"share_url"
)){
resultUrl
=
jsonObject
.
getString
(
"share_url"
);
if
(!
""
.
equals
(
resultUrl
))
{
return
resultUrl
;
}
}
}
}
return
resultUrl
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
View file @
21bf95d3
...
...
@@ -141,6 +141,20 @@ public class HotSearchCacheDAO {
}
}
/**
* 抖音链接更新
* @param document
*/
public
void
updateDouyinUrl
(
Document
document
){
String
id
=
(
String
)
document
.
get
(
"id"
);
Document
query
=
new
Document
(
"_id"
,
id
);
Document
nowDoc
=
(
Document
)
collection
.
find
(
query
).
first
();
if
(
Objects
.
nonNull
(
nowDoc
))
{
nowDoc
.
put
(
"url"
,
document
.
get
(
"url"
));
collection
.
replaceOne
(
query
,
nowDoc
);
}
}
/**
* 计算热搜时长
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
21bf95d3
...
...
@@ -45,14 +45,16 @@ public class HotSearchRun {
//采集程序启动
new
WeiboHotSearchRun
().
start
();
new
BaiduHotSearchRun
().
start
();
new
SougoHotSearchRun
().
start
();
//
new SougoHotSearchRun().start();
new
DouyinHotSearchRun
().
start
();
new
ZhihuHotSearchRun
().
start
();
//
new ZhihuHotSearchRun().start();
new
WeiboSuperTopicRun
().
start
();
new
WeiboTopicRun
().
start
();
new
ToutiaoHotSearchRun
().
start
();
new
ZhihuTopSearchRun
().
start
();
new
ZhihuChildHotSearchRun
().
start
();
new
ThreadOneRun
().
start
();
//抖音链接更新
new
DouYinUrlHotSearchRun
().
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
21bf95d3
...
...
@@ -6,6 +6,8 @@ import java.util.List;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -41,18 +43,30 @@ public class BaiduHotSearchRun extends Thread{
private
void
getHotList
()
{
log
.
info
(
"百度风云榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
recoveryTips
(
"百度热搜"
,
new
Date
());
}
else
{
TipsUtils
.
sendTips
(
"百度热搜"
,
new
Date
());
}
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List
<
HotSearchList
>
baiduList
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
baiduList
!=
null
?
baiduList
.
size
()
:
0
));
// if(Objects.nonNull(list) && !list.isEmpty()) {
// List<Document> data = hotSearchCacheDAO.addData(list);
// hotSearchDAO.addHotSearchList(data);
// TipsUtils.recoveryTips("百度热搜",new Date());
// } else {
// TipsUtils.sendTips("百度热搜",new Date());
// }
TipsUtils
.
addHotList
(
"百度热搜"
,
baiduList
);
log
.
info
(
"百度风云榜采集结束........"
);
ZhiWeiTools
.
sleep
(
2000L
);
log
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
sougouList
=
SougoHotSearchCrawler
.
sougoHotSearch
();
log
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
sougouList
!=
null
?
sougouList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"搜狗微信热搜"
,
sougouList
);
log
.
info
(
"搜狗微信采集结束........"
);
log
.
info
(
"知乎话题采集开始........"
);
List
<
HotSearchList
>
zhihuList
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
log
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
zhihuList
!=
null
?
zhihuList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"知乎热搜"
,
zhihuList
);
log
.
info
(
"知乎话题采集结束........"
);
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/timer/DouYinUrlHotSearchRun.java
0 → 100644
View file @
21bf95d3
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.dubbo.common.utils.StringUtils
;
import
org.bson.Document
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
@Log4j2
public
class
DouYinUrlHotSearchRun
extends
Thread
{
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getUrlList
();
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private
void
getUrlList
()
{
log
.
info
(
"抖音链接更新开始........"
);
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchRun
.
list
;
if
(
list
!=
null
&&
list
.
size
()>
0
)
{
for
(
int
i
=
0
;
i
<
list
.
size
();
i
++)
{
String
name
=
list
.
get
(
i
).
getName
();
String
id
=
name
+
"_"
+
list
.
get
(
i
).
getType
();
String
url
=
DouyinHotSearchCrawler
.
getDouyinUrl
(
"https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="
+
name
);
if
(
url
!=
null
)
{
Document
document
=
new
Document
();
document
.
put
(
"id"
,
id
);
document
.
put
(
"url"
,
url
);
hotSearchCacheDAO
.
updateDouyinUrl
(
document
);
}
}
log
.
info
(
"抖音链接更新结束........"
);
}
else
{
log
.
info
(
"抖音链接更新失败,获取抖音数据为空"
);
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
21bf95d3
...
...
@@ -22,6 +22,8 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public
class
DouyinHotSearchRun
extends
Thread
{
public
static
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
@Override
public
void
run
()
{
boolean
f
=
true
;
...
...
@@ -46,7 +48,7 @@ public class DouyinHotSearchRun extends Thread{
log
.
info
(
"抖音热搜榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
log
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
"抖音热搜"
,
new
Date
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ThreadOneRun.java
View file @
21bf95d3
...
...
@@ -18,15 +18,13 @@ import java.util.concurrent.TimeUnit;
@Log4j2
public
class
ThreadOneRun
extends
Thread
{
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
private
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
3
);
TimeUnit
.
MINUTES
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
1000
);
...
...
@@ -54,6 +52,8 @@ public class ThreadOneRun extends Thread {
private
void
addHotList
(
String
type
,
List
<
HotSearchList
>
list
){
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
type
,
new
Date
());
}
else
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
View file @
21bf95d3
package
com
.
zhiwei
.
searchhotcrawler
.
util
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
org.bson.Document
;
import
org.checkerframework.checker.units.qual.A
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -30,8 +33,8 @@ public class TipsUtils {
if
(!
typeTips
.
containsKey
(
type
))
{
//发送预警
String
crawlerContent
=
String
.
format
(
"%s数据采集异常"
,
type
);
QYWechatUtil
.
send
(
key
,
QYWechatUtil
.
MSGTYPE_TEXT
,
crawlerContent
,
null
,
null
);
//
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
//
null, null);
}
typeTips
.
put
(
type
,
time
);
}
...
...
@@ -49,8 +52,8 @@ public class TipsUtils {
typeTips
.
remove
(
type
);
//发送恢复通知
String
crawlerContent
=
String
.
format
(
"%s数据采集恢复正常"
,
type
);
QYWechatUtil
.
send
(
key
,
QYWechatUtil
.
MSGTYPE_TEXT
,
crawlerContent
,
null
,
null
);
//
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
//
null, null);
}
}
}
...
...
@@ -74,4 +77,21 @@ public class TipsUtils {
}
return
count
;
}
/**
* 数据添加
* @param type
* @param list
*/
public
static
void
addHotList
(
String
type
,
List
<
HotSearchList
>
list
){
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
type
,
new
Date
());
}
else
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
recoveryTips
(
type
,
new
Date
());
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment