Commit 64440cea by zhiwei

修复知乎及抖音榜单采集中断问题

parent 136bcddb
...@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler { ...@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler {
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace()); logger.debug("获取知乎热搜时出现问题:{}", e);
return list; return list;
} }
return list; return list;
...@@ -81,7 +81,7 @@ public class ZhihuHotSearchCrawler { ...@@ -81,7 +81,7 @@ public class ZhihuHotSearchCrawler {
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<HotSearchList> getMobileZhihuHotList(){ public static List<HotSearchList> getMobileZhihuHotList(){
List<HotSearchList> list = null; List<HotSearchList> list = new ArrayList<>();;
String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0"; String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0";
Map<String,String> headerMap = HeaderTool.getCommonHead(); Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "api.zhihu.com"); headerMap.put("Host", "api.zhihu.com");
...@@ -90,28 +90,24 @@ public class ZhihuHotSearchCrawler { ...@@ -90,28 +90,24 @@ public class ZhihuHotSearchCrawler {
headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8="); headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20"); headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
for(int j=0;j<3;j++){ try {
try { String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); if(htmlBody != null && htmlBody.contains("author")){
if(htmlBody != null && htmlBody.contains("author")){ JSONObject topSearch = JSONObject.parseObject(htmlBody);
list = new ArrayList<>(); JSONArray words = topSearch.getJSONArray("data");
JSONObject topSearch = JSONObject.parseObject(htmlBody); String link = null;
JSONArray words = topSearch.getJSONArray("data"); String displayQuery = null;
String link = null; for (int i = 0; i < words.size(); i++) {
String displayQuery = null; JSONObject word = words.getJSONObject(i).getJSONObject("target");
for (int i = 0; i < words.size(); i++) { displayQuery = word.getString("title");
JSONObject word = words.getJSONObject(i).getJSONObject("target"); link = "https://www.zhihu.com/question/"+word.getLongValue("id");
displayQuery = word.getString("title"); HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
link = "https://www.zhihu.com/question/"+word.getLongValue("id"); list.add(zhihu);
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
break;
} }
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
return list;
} }
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e);
return list;
} }
return list; return list;
} }
......
...@@ -38,26 +38,18 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -38,26 +38,18 @@ public class HotSearchListDAO extends MongoDBTemplate{
* @param list * @param list
*/ */
public void addHotSearchList(List<DBObject> list){ public void addHotSearchList(List<DBObject> list){
for(int i=0; i<3; i++){ try {
try { this.getReadColl().insert(list);
this.getReadColl().insert(list); } catch (Exception e) {
ZhiWeiTools.sleep(200); logger.error("存储数据时出错,错误为:{}", e);
break;
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
}
} }
} }
public void addHotSearch(DBObject doc){ public void addHotSearch(DBObject doc){
for(int i=0; i<3; i++){ try {
try { this.getReadColl().insert(doc);
this.getReadColl().save(doc); } catch (Exception e) {
ZhiWeiTools.sleep(200); logger.error("存储数据时出错,错误为:{}", e);
break;
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
}
} }
} }
......
package com.zhiwei.searchhotcrawler.run; package com.zhiwei.searchhotcrawler.run;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import com.zhiwei.common.config.GroupType; import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
...@@ -21,19 +17,18 @@ import com.zhiwei.tools.tools.ZhiWeiTools; ...@@ -21,19 +17,18 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public class HotSearchRun { public class HotSearchRun {
private ScheduledExecutorService scheduExec; // private ScheduledExecutorService scheduExec;
//
public HotSearchRun() { // public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(10); // this.scheduExec = Executors.newScheduledThreadPool(5);
} // }
// public void showTimer() {
public void showTimer() { // scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES); // scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES); // scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES); // scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES); // scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES); // }
}
public static void main(String[] args) { public static void main(String[] args) {
...@@ -41,8 +36,14 @@ public class HotSearchRun { ...@@ -41,8 +36,14 @@ public class HotSearchRun {
new UpdateWechatUserRun().start(); new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000); ZhiWeiTools.sleep(10000);
new HotSearchRun().showTimer();
new CacheListener().startListen(); new CacheListener().startListen();
//采集程序启动
new WeiboHotSearchRun().start();
new BaiduHotSearchRun().start();
new SougoHotSearchRun().start();
new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start(); new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start(); new SendZhihuHotSearchRun().start();
} }
......
...@@ -4,6 +4,7 @@ import java.util.ArrayList; ...@@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -14,6 +15,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -14,6 +15,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchRun extends Thread{ public class BaiduHotSearchRun extends Thread{
...@@ -22,6 +24,20 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -22,6 +24,20 @@ public class BaiduHotSearchRun extends Thread{
@Override @Override
public void run() { public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("百度风云榜采集开始........"); logger.info("百度风云榜采集开始........");
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer; ...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -10,17 +11,36 @@ import org.slf4j.LoggerFactory; ...@@ -10,17 +11,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchRun extends Thread{ public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private void getHotList() {
logger.info("抖音热搜榜采集开始........"); logger.info("抖音热搜榜采集开始........");
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer; ...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -10,9 +11,9 @@ import org.slf4j.LoggerFactory; ...@@ -10,9 +11,9 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchRun extends Thread { public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
...@@ -21,6 +22,20 @@ public class SougoHotSearchRun extends Thread { ...@@ -21,6 +22,20 @@ public class SougoHotSearchRun extends Thread {
@Override @Override
public void run() { public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("搜狗微信采集开始........"); logger.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer; ...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -13,6 +14,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -13,6 +14,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchRun extends Thread{ public class WeiboHotSearchRun extends Thread{
...@@ -21,6 +23,20 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -21,6 +23,20 @@ public class WeiboHotSearchRun extends Thread{
@Override @Override
public void run() { public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("微博话题采集开始........"); logger.info("微博话题采集开始........");
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
......
...@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.timer; ...@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -12,6 +13,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -12,6 +13,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchRun extends Thread{ public class ZhihuHotSearchRun extends Thread{
...@@ -20,10 +22,25 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -20,10 +22,25 @@ public class ZhihuHotSearchRun extends Thread{
@Override @Override
public void run() { public void run() {
logger.info("知乎话题采集开始........"); boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
// List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
// list.addAll(mobilelist); list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){ for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject(); DBObject zhihu = new BasicDBObject();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment