Commit 8df0ca9e by chenweitao

Merge branch 'mlbWork' into 'master'

头条采集入库后更新阅读量

See merge request !51
parents f8861322 54207549
...@@ -8,6 +8,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder; ...@@ -8,6 +8,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -147,11 +148,12 @@ public class ToutiaoHotSearchCrawler { ...@@ -147,11 +148,12 @@ public class ToutiaoHotSearchCrawler {
// } // }
/** /**
* 获取今日头条热搜阅读量 * 更新今日头条热搜阅读量
* @param hotSearchList * @param hotSearchList
* @return * @return
*/ */
public static HotSearchList toutiaoReadCount(HotSearchList hotSearchList){ public static HotSearchList toutiaoReadCount(HotSearchList hotSearchList){
HotSearchListDAO hotSearchListDAO = new HotSearchListDAO();
if (hotSearchList.getUrl() != null) { if (hotSearchList.getUrl() != null) {
String htmlBody = null; String htmlBody = null;
String url = hotSearchList.getUrl(); String url = hotSearchList.getUrl();
...@@ -171,6 +173,7 @@ public class ToutiaoHotSearchCrawler { ...@@ -171,6 +173,7 @@ public class ToutiaoHotSearchCrawler {
Integer count = TipsUtils.getHotCount(readCount); Integer count = TipsUtils.getHotCount(readCount);
log.info("{},阅读量:{}", hotSearchList.getName(), count); log.info("{},阅读量:{}", hotSearchList.getName(), count);
hotSearchList.setCommentCount(count); hotSearchList.setCommentCount(count);
hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
return hotSearchList; return hotSearchList;
} }
} }
......
...@@ -48,9 +48,9 @@ public class HotSearchCacheDAO { ...@@ -48,9 +48,9 @@ public class HotSearchCacheDAO {
document.put("topic_lead", hotSearch.getTopicLead()); document.put("topic_lead", hotSearch.getTopicLead());
document.put("comment_count", hotSearch.getCommentCount()); document.put("comment_count", hotSearch.getCommentCount());
} }
if("今日头条热搜".equals(hotSearch.getType())){ // if("今日头条热搜".equals(hotSearch.getType())){
document.put("comment_count", hotSearch.getCommentCount()); // document.put("comment_count", hotSearch.getCommentCount());
} // }
if("腾讯较真榜".equals(hotSearch.getType())){ if("腾讯较真榜".equals(hotSearch.getType())){
document.put("topic_result",hotSearch.getTopicResult()); document.put("topic_result",hotSearch.getTopicResult());
} }
...@@ -82,7 +82,7 @@ public class HotSearchCacheDAO { ...@@ -82,7 +82,7 @@ public class HotSearchCacheDAO {
String topicResult = document.getString("topic_result")!=null?document.getString("topic_result"):null; String topicResult = document.getString("topic_result")!=null?document.getString("topic_result"):null;
String id = name + "_" + type; String id = name + "_" + type;
boolean recommend = false; boolean recommend = false;
Integer readCount = document.getInteger("comment_count"); // Integer readCount = document.getInteger("comment_count");
if("微博热搜".equals(type)){ if("微博热搜".equals(type)){
String icon = document.getString("icon"); String icon = document.getString("icon");
if("recom".equals(icon) || "jian".equals(icon)){ if("recom".equals(icon) || "jian".equals(icon)){
...@@ -128,9 +128,9 @@ public class HotSearchCacheDAO { ...@@ -128,9 +128,9 @@ public class HotSearchCacheDAO {
nowDoc.put("preCount", preCount); nowDoc.put("preCount", preCount);
nowDoc.put("duration", durationNow); nowDoc.put("duration", durationNow);
nowDoc.put("recommend",recommend); nowDoc.put("recommend",recommend);
if(readCount != null){ // if(readCount != null){
nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
} // }
if(topicResult != null){ if(topicResult != null){
nowDoc.put("topicResult",topicResult); nowDoc.put("topicResult",topicResult);
} }
...@@ -154,9 +154,9 @@ public class HotSearchCacheDAO { ...@@ -154,9 +154,9 @@ public class HotSearchCacheDAO {
nowDoc.put("preRank", null); nowDoc.put("preRank", null);
nowDoc.put("preCount", null); nowDoc.put("preCount", null);
nowDoc.put("recommend",recommend); nowDoc.put("recommend",recommend);
if(readCount != null){ // if(readCount != null){
nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
} // }
if(topicResult != null){ if(topicResult != null){
nowDoc.put("topicResult",topicResult); nowDoc.put("topicResult",topicResult);
} }
......
...@@ -78,5 +78,19 @@ public class HotSearchListDAO{ ...@@ -78,5 +78,19 @@ public class HotSearchListDAO{
} }
return null; return null;
} }
/**
* 今日头条热搜详情趋势添加阅读量
* @param hotSearchList
*/
public void updateTouTiaoReadCount(HotSearchList hotSearchList){
BasicDBObject basicDBObject = new BasicDBObject();
basicDBObject.put("_id", hotSearchList.getId());
Document document = (Document)mongoCollection.find(basicDBObject).first();
if(document != null){
document.put("commentCount",hotSearchList.getCommentCount());
mongoCollection.replaceOne(basicDBObject,document);
}
}
} }
...@@ -14,8 +14,6 @@ public class TouTiaoExecutor extends Thread { ...@@ -14,8 +14,6 @@ public class TouTiaoExecutor extends Thread {
private HotSearchList hotSearchList; private HotSearchList hotSearchList;
private static List<HotSearchList> resultList;
public TouTiaoExecutor(HotSearchList hotSearchList){ public TouTiaoExecutor(HotSearchList hotSearchList){
this.hotSearchList = hotSearchList; this.hotSearchList = hotSearchList;
} }
...@@ -23,8 +21,7 @@ public class TouTiaoExecutor extends Thread { ...@@ -23,8 +21,7 @@ public class TouTiaoExecutor extends Thread {
@Override @Override
public void run() { public void run() {
try { try {
hotSearchList = ToutiaoHotSearchCrawler.toutiaoReadCount(hotSearchList); hotSearchList = ToutiaoHotSearchCrawler.toutiaoReadCount(hotSearchList);
resultList.add(hotSearchList);
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); e.printStackTrace();
} }
...@@ -35,8 +32,7 @@ public class TouTiaoExecutor extends Thread { ...@@ -35,8 +32,7 @@ public class TouTiaoExecutor extends Thread {
* @param list * @param list
* @return * @return
*/ */
public static List<HotSearchList> countTouTiaoReadCount(List<HotSearchList> list){ public static void countTouTiaoReadCount(List<HotSearchList> list){
resultList= new ArrayList<>();
// ExecutorService service = Executors.newFixedThreadPool(list.size()); // ExecutorService service = Executors.newFixedThreadPool(list.size());
for(int i=0; i<list.size(); i++){ for(int i=0; i<list.size(); i++){
TipsUtils.service.execute(new TouTiaoExecutor(list.get(i))); TipsUtils.service.execute(new TouTiaoExecutor(list.get(i)));
...@@ -44,11 +40,10 @@ public class TouTiaoExecutor extends Thread { ...@@ -44,11 +40,10 @@ public class TouTiaoExecutor extends Thread {
// TipsUtils.service.shutdown(); // TipsUtils.service.shutdown();
try { try {
if(!TipsUtils.service.awaitTermination(1, TimeUnit.MINUTES)){ if(!TipsUtils.service.awaitTermination(1, TimeUnit.MINUTES)){
log.info("查询今日头条阅读量超时"); log.info("今日头条阅读量更新结束");
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
log.info(e.fillInStackTrace()); log.info(e.fillInStackTrace());
} }
return resultList;
} }
} }
...@@ -85,11 +85,11 @@ public class GatherTimer { ...@@ -85,11 +85,11 @@ public class GatherTimer {
logger.info("今日头条热搜开始采集..."); logger.info("今日头条热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date()); Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date); List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date);
List<HotSearchList> toutiaoResult = new ArrayList<>(); logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
toutiaoResult = TouTiaoExecutor.countTouTiaoReadCount(toutiaoList); TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoResult != null ? toutiaoResult.size() : 0);
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoResult);
logger.info("今日头条热搜采集结束..."); logger.info("今日头条热搜采集结束...");
logger.info("今日头条热搜详情趋势阅读量更新...");
TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
} }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment