Commit 21bf95d3 by 马黎滨

Merge branch 'mlbWork' into 'master'

Mlb work

See merge request !17
parents cb69d3bb eb385cb2
...@@ -76,4 +76,33 @@ public class DouyinHotSearchCrawler { ...@@ -76,4 +76,33 @@ public class DouyinHotSearchCrawler {
return list; return list;
} }
/**
* 获取抖音url
* @param url
* @return
*/
public static String getDouyinUrl(String url){
String resultUrl = null;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取抖音热搜榜链接时出现问题:{}", e);
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("aweme_list")){
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("aweme_list");
for(int i=0; i<jsonArray.size(); i++){
JSONObject jsonObject = jsonArray.getJSONObject(i);
if(jsonObject.containsKey("share_url")){
resultUrl = jsonObject.getString("share_url");
if(!"".equals(resultUrl)) {
return resultUrl;
}
}
}
}
return resultUrl;
}
} }
...@@ -141,6 +141,20 @@ public class HotSearchCacheDAO { ...@@ -141,6 +141,20 @@ public class HotSearchCacheDAO {
} }
} }
/**
* 抖音链接更新
* @param document
*/
public void updateDouyinUrl(Document document){
String id = (String) document.get("id");
Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first();
if (Objects.nonNull(nowDoc)) {
nowDoc.put("url",document.get("url"));
collection.replaceOne(query, nowDoc);
}
}
/** /**
* 计算热搜时长 * 计算热搜时长
......
...@@ -45,14 +45,16 @@ public class HotSearchRun { ...@@ -45,14 +45,16 @@ public class HotSearchRun {
//采集程序启动 //采集程序启动
new WeiboHotSearchRun().start(); new WeiboHotSearchRun().start();
new BaiduHotSearchRun().start(); new BaiduHotSearchRun().start();
new SougoHotSearchRun().start(); // new SougoHotSearchRun().start();
new DouyinHotSearchRun().start(); new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start(); // new ZhihuHotSearchRun().start();
new WeiboSuperTopicRun().start(); new WeiboSuperTopicRun().start();
new WeiboTopicRun().start(); new WeiboTopicRun().start();
new ToutiaoHotSearchRun().start(); new ToutiaoHotSearchRun().start();
new ZhihuTopSearchRun().start(); new ZhihuTopSearchRun().start();
new ZhihuChildHotSearchRun().start(); new ZhihuChildHotSearchRun().start();
new ThreadOneRun().start(); new ThreadOneRun().start();
//抖音链接更新
new DouYinUrlHotSearchRun().start();
} }
} }
...@@ -6,6 +6,8 @@ import java.util.List; ...@@ -6,6 +6,8 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
...@@ -41,18 +43,30 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -41,18 +43,30 @@ public class BaiduHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
log.info("百度风云榜采集开始........"); log.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); // HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); // HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch();
log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
if(Objects.nonNull(list) && !list.isEmpty()) { // if(Objects.nonNull(list) && !list.isEmpty()) {
List<Document> data = hotSearchCacheDAO.addData(list); // List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); // hotSearchDAO.addHotSearchList(data);
TipsUtils.recoveryTips("百度热搜",new Date()); // TipsUtils.recoveryTips("百度热搜",new Date());
} else { // } else {
TipsUtils.sendTips("百度热搜",new Date()); // TipsUtils.sendTips("百度热搜",new Date());
} // }
TipsUtils.addHotList("百度热搜",baiduList);
log.info("百度风云榜采集结束........"); log.info("百度风云榜采集结束........");
ZhiWeiTools.sleep(2000L);
log.info("搜狗微信采集开始........");
List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch();
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
TipsUtils.addHotList("搜狗微信热搜",sougouList);
log.info("搜狗微信采集结束........");
log.info("知乎话题采集开始........");
List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList();
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
TipsUtils.addHotList("知乎热搜",zhihuList);
log.info("知乎话题采集结束........");
} }
} }
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import org.apache.dubbo.common.utils.StringUtils;
import org.bson.Document;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Log4j2
public class DouYinUrlHotSearchRun extends Thread {
@Override
public void run() {
boolean f = true;
while (f) {
try {
getUrlList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60 * 60 * 1000);
}
ZhiWeiTools.sleep(50);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private void getUrlList() {
log.info("抖音链接更新开始........");
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchRun.list;
if(list != null && list.size()>0) {
for (int i = 0; i < list.size(); i++) {
String name = list.get(i).getName();
String id = name+"_"+list.get(i).getType();
String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
if(url != null) {
Document document = new Document();
document.put("id", id);
document.put("url", url);
hotSearchCacheDAO.updateDouyinUrl(document);
}
}
log.info("抖音链接更新结束........");
}else{
log.info("抖音链接更新失败,获取抖音数据为空");
}
}
}
...@@ -22,6 +22,8 @@ import com.zhiwei.tools.tools.ZhiWeiTools; ...@@ -22,6 +22,8 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2 @Log4j2
public class DouyinHotSearchRun extends Thread{ public class DouyinHotSearchRun extends Thread{
public static List<HotSearchList> list = new ArrayList<>();
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
...@@ -46,7 +48,7 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -46,7 +48,7 @@ public class DouyinHotSearchRun extends Thread{
log.info("抖音热搜榜采集开始........"); log.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); list = DouyinHotSearchCrawler.getMobileDouyinHotList();
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){ if(list == null || list.size() == 0){
TipsUtils.sendTips("抖音热搜",new Date()); TipsUtils.sendTips("抖音热搜",new Date());
......
...@@ -18,15 +18,13 @@ import java.util.concurrent.TimeUnit; ...@@ -18,15 +18,13 @@ import java.util.concurrent.TimeUnit;
@Log4j2 @Log4j2
public class ThreadOneRun extends Thread { public class ThreadOneRun extends Thread {
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
while(f) { while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(3); TimeUnit.MINUTES.sleep(1);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000); ZhiWeiTools.sleep(60*1000);
...@@ -54,6 +52,8 @@ public class ThreadOneRun extends Thread { ...@@ -54,6 +52,8 @@ public class ThreadOneRun extends Thread {
private void addHotList(String type, List<HotSearchList> list){ private void addHotList(String type, List<HotSearchList> list){
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
if(list == null || list.size() == 0){ if(list == null || list.size() == 0){
TipsUtils.sendTips(type,new Date()); TipsUtils.sendTips(type,new Date());
} else { } else {
......
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import org.bson.Document;
import org.checkerframework.checker.units.qual.A; import org.checkerframework.checker.units.qual.A;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -30,8 +33,8 @@ public class TipsUtils { ...@@ -30,8 +33,8 @@ public class TipsUtils {
if (!typeTips.containsKey(type)) { if (!typeTips.containsKey(type)) {
//发送预警 //发送预警
String crawlerContent = String.format("%s数据采集异常", type); String crawlerContent = String.format("%s数据采集异常", type);
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent, // QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
null, null); // null, null);
} }
typeTips.put(type, time); typeTips.put(type, time);
} }
...@@ -49,8 +52,8 @@ public class TipsUtils { ...@@ -49,8 +52,8 @@ public class TipsUtils {
typeTips.remove(type); typeTips.remove(type);
//发送恢复通知 //发送恢复通知
String crawlerContent = String.format("%s数据采集恢复正常", type); String crawlerContent = String.format("%s数据采集恢复正常", type);
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent, // QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
null, null); // null, null);
} }
} }
} }
...@@ -74,4 +77,21 @@ public class TipsUtils { ...@@ -74,4 +77,21 @@ public class TipsUtils {
} }
return count; return count;
} }
/**
* 数据添加
* @param type
* @param list
*/
public static void addHotList(String type, List<HotSearchList> list){
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
if(list == null || list.size() == 0){
TipsUtils.sendTips(type,new Date());
} else {
List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data);
TipsUtils.recoveryTips(type,new Date());
}
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment