Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
64440cea
Commit
64440cea
authored
Aug 22, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修复知乎及抖音榜单采集中断问题
parent
136bcddb
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
112 additions
and
39 deletions
+112
-39
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+3
-7
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+1
-9
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+19
-18
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+16
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+21
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+16
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+16
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+20
-3
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
64440cea
...
...
@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler {
}
}
}
catch
(
IOException
e
)
{
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
.
fillInStackTrace
()
);
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
return
list
;
}
return
list
;
...
...
@@ -81,7 +81,7 @@ public class ZhihuHotSearchCrawler {
* @return List<ZhihuHotSearch> 返回类型
*/
public
static
List
<
HotSearchList
>
getMobileZhihuHotList
(){
List
<
HotSearchList
>
list
=
n
ull
;
List
<
HotSearchList
>
list
=
n
ew
ArrayList
<>();
;
String
url
=
"https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0"
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
headerMap
.
put
(
"Host"
,
"api.zhihu.com"
);
...
...
@@ -90,11 +90,9 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"X-UDID"
,
"AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8="
);
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
for
(
int
j
=
0
;
j
<
3
;
j
++){
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
)){
list
=
new
ArrayList
<>();
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
words
=
topSearch
.
getJSONArray
(
"data"
);
String
link
=
null
;
...
...
@@ -106,13 +104,11 @@ public class ZhihuHotSearchCrawler {
HotSearchList
zhihu
=
new
HotSearchList
(
link
,
displayQuery
,
null
,
i
,
HotSearchType
.
知乎热搜
.
name
());
list
.
add
(
zhihu
);
}
break
;
}
}
catch
(
IOException
e
)
{
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
.
fillInStackTrace
()
);
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
);
return
list
;
}
}
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
View file @
64440cea
...
...
@@ -38,28 +38,20 @@ public class HotSearchListDAO extends MongoDBTemplate{
* @param list
*/
public
void
addHotSearchList
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
public
void
addHotSearch
(
DBObject
doc
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
save
(
doc
);
ZhiWeiTools
.
sleep
(
200
);
break
;
this
.
getReadColl
().
insert
(
doc
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
/**
* 查询据上次变化量
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
64440cea
package
com
.
zhiwei
.
searchhotcrawler
.
run
;
import
java.util.concurrent.Executors
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
...
...
@@ -21,19 +17,18 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
HotSearchRun
{
private
ScheduledExecutorService
scheduExec
;
public
HotSearchRun
()
{
this
.
scheduExec
=
Executors
.
newScheduledThreadPool
(
10
);
}
public
void
showTimer
()
{
scheduExec
.
scheduleAtFixedRate
(
new
WeiboHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
ZhihuHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
BaiduHotSearchRun
(),
0
,
5
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
SougoHotSearchRun
(),
0
,
5
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
DouyinHotSearchRun
(),
0
,
10
,
TimeUnit
.
MINUTES
);
}
// private ScheduledExecutorService scheduExec;
//
// public HotSearchRun() {
// this.scheduExec = Executors.newScheduledThreadPool(5);
// }
// public void showTimer() {
// scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
// }
public
static
void
main
(
String
[]
args
)
{
...
...
@@ -41,8 +36,14 @@ public class HotSearchRun {
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
new
HotSearchRun
().
showTimer
();
new
CacheListener
().
startListen
();
//采集程序启动
new
WeiboHotSearchRun
().
start
();
new
BaiduHotSearchRun
().
start
();
new
SougoHotSearchRun
().
start
();
new
DouyinHotSearchRun
().
start
();
new
ZhihuHotSearchRun
().
start
();
//推送程序启动
new
SendWeiboHotSearchRun
().
start
();
new
SendZhihuHotSearchRun
().
start
();
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
64440cea
...
...
@@ -4,6 +4,7 @@ import java.util.ArrayList;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -14,6 +15,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
BaiduHotSearchRun
extends
Thread
{
...
...
@@ -22,6 +24,20 @@ public class BaiduHotSearchRun extends Thread{
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
}
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
logger
.
info
(
"百度风云榜采集开始........"
);
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
logger
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
64440cea
...
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -10,17 +11,36 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
DouyinHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotSearchRun
.
class
);
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
}
ZhiWeiTools
.
sleep
(
50
);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private
void
getHotList
()
{
logger
.
info
(
"抖音热搜榜采集开始........"
);
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
logger
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
64440cea
...
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -10,9 +11,9 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SougoHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchRun
.
class
);
...
...
@@ -21,6 +22,20 @@ public class SougoHotSearchRun extends Thread {
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
}
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
logger
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
64440cea
...
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -13,6 +14,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
WeiboHotSearchRun
extends
Thread
{
...
...
@@ -21,6 +23,20 @@ public class WeiboHotSearchRun extends Thread{
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
}
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
logger
.
info
(
"微博话题采集开始........"
);
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearch
();
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
64440cea
...
...
@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.timer;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -12,6 +13,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
ZhihuHotSearchRun
extends
Thread
{
...
...
@@ -20,10 +22,25 @@ public class ZhihuHotSearchRun extends Thread{
@Override
public
void
run
()
{
logger
.
info
(
"知乎话题采集开始........"
);
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
}
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
logger
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
//
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
//
list.addAll(mobilelist);
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
logger
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
for
(
HotSearchList
zhihuHotSearch
:
list
){
DBObject
zhihu
=
new
BasicDBObject
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment