Commit 64440cea by zhiwei

修复知乎及抖音榜单采集中断问题

parent 136bcddb
......@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler {
}
}
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
logger.debug("获取知乎热搜时出现问题:{}", e);
return list;
}
return list;
......@@ -81,7 +81,7 @@ public class ZhihuHotSearchCrawler {
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<HotSearchList> getMobileZhihuHotList(){
List<HotSearchList> list = null;
List<HotSearchList> list = new ArrayList<>();;
String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0";
Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "api.zhihu.com");
......@@ -90,11 +90,9 @@ public class ZhihuHotSearchCrawler {
headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
for(int j=0;j<3;j++){
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(htmlBody != null && htmlBody.contains("author")){
list = new ArrayList<>();
JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = topSearch.getJSONArray("data");
String link = null;
......@@ -106,13 +104,11 @@ public class ZhihuHotSearchCrawler {
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
break;
}
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
logger.debug("获取知乎热搜时出现问题:{}", e);
return list;
}
}
return list;
}
}
......@@ -38,28 +38,20 @@ public class HotSearchListDAO extends MongoDBTemplate{
* @param list
*/
public void addHotSearchList(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
}
}
}
public void addHotSearch(DBObject doc){
for(int i=0; i<3; i++){
try {
this.getReadColl().save(doc);
ZhiWeiTools.sleep(200);
break;
this.getReadColl().insert(doc);
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
}
}
}
/**
* 查询据上次变化量
......
package com.zhiwei.searchhotcrawler.run;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
......@@ -21,19 +17,18 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public class HotSearchRun {
private ScheduledExecutorService scheduExec;
public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(10);
}
public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
}
// private ScheduledExecutorService scheduExec;
//
// public HotSearchRun() {
// this.scheduExec = Executors.newScheduledThreadPool(5);
// }
// public void showTimer() {
// scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
// scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
// }
public static void main(String[] args) {
......@@ -41,8 +36,14 @@ public class HotSearchRun {
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
new HotSearchRun().showTimer();
new CacheListener().startListen();
//采集程序启动
new WeiboHotSearchRun().start();
new BaiduHotSearchRun().start();
new SougoHotSearchRun().start();
new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
......
......@@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -14,6 +15,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchRun extends Thread{
......@@ -22,6 +24,20 @@ public class BaiduHotSearchRun extends Thread{
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("百度风云榜采集开始........");
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
......@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -10,17 +11,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private void getHotList() {
logger.info("抖音热搜榜采集开始........");
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
......@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -10,9 +11,9 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
......@@ -21,6 +22,20 @@ public class SougoHotSearchRun extends Thread {
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
......@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -13,6 +14,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchRun extends Thread{
......@@ -21,6 +23,20 @@ public class WeiboHotSearchRun extends Thread{
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("微博话题采集开始........");
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
......@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -12,6 +13,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchRun extends Thread{
......@@ -20,10 +22,25 @@ public class ZhihuHotSearchRun extends Thread{
@Override
public void run() {
logger.info("知乎话题采集开始........");
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
// List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
// list.addAll(mobilelist);
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment