Commit fb89c7b3 by 马黎滨

采集程序添加预警

parent e67a2230
...@@ -50,6 +50,11 @@ ...@@ -50,6 +50,11 @@
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
<version>1.18.8</version> <version>1.18.8</version>
</dependency> </dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.0.4-RELEASE</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
......
...@@ -6,10 +6,9 @@ import java.util.Date; ...@@ -6,10 +6,9 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import com.mongodb.client.ListIndexesIterable; import com.mongodb.client.*;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.IndexOptions; import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Sorts;
import com.zhiwei.searchhotcrawler.config.DBConfig; import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
...@@ -64,5 +63,20 @@ public class HotSearchListDAO{ ...@@ -64,5 +63,20 @@ public class HotSearchListDAO{
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
public Date getLastTimeByType(String type){
try {
BasicDBObject basicDBObject = new BasicDBObject();
basicDBObject.put("type", type);
MongoCursor<Document> cursor = mongoCollection.find(basicDBObject).sort(
Sorts.orderBy(Sorts.descending("time"))).skip(0).limit(1).iterator();
while (cursor.hasNext()) {
return (Date) cursor.next().get("time");
}
}catch (Exception e){
log.error("查询数据时出错,错误为:{}",e);
}
return null;
}
} }
...@@ -7,6 +7,7 @@ import java.util.Objects; ...@@ -7,6 +7,7 @@ import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -47,6 +48,8 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -47,6 +48,8 @@ public class BaiduHotSearchRun extends Thread{
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
} else {
TipsUtils.sendTips("百度热搜",new Date());
} }
log.info("百度风云榜采集结束........"); log.info("百度风云榜采集结束........");
} }
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -47,6 +48,9 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -47,6 +48,9 @@ public class DouyinHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("抖音热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("抖音热搜榜采集结束........"); log.info("抖音热搜榜采集结束........");
......
...@@ -7,6 +7,7 @@ import java.util.concurrent.TimeUnit; ...@@ -7,6 +7,7 @@ import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -43,6 +44,9 @@ public class SougoHotSearchRun extends Thread { ...@@ -43,6 +44,9 @@ public class SougoHotSearchRun extends Thread {
log.info("搜狗微信采集开始........"); log.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("搜狗微信热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("搜狗微信采集结束........"); log.info("搜狗微信采集结束........");
......
...@@ -5,6 +5,7 @@ import com.zhiwei.searchhotcrawler.crawler.ToutiaoHotSearchCrawler; ...@@ -5,6 +5,7 @@ import com.zhiwei.searchhotcrawler.crawler.ToutiaoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -39,6 +40,9 @@ public class ToutiaoHotSearchRun extends Thread{ ...@@ -39,6 +40,9 @@ public class ToutiaoHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(); List<HotSearchList> list = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone();
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("今日头条热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("今日头条热搜采集结束........"); log.info("今日头条热搜采集结束........");
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
...@@ -37,6 +38,9 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -37,6 +38,9 @@ public class WeiboHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
} }
......
...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -37,6 +38,9 @@ public class WeiboTopicRun extends Thread{ ...@@ -37,6 +38,9 @@ public class WeiboTopicRun extends Thread{
log.info("微博话题采集开始........"); log.info("微博话题采集开始........");
List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(); List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone();
log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博话题",new Date());
}
List<Document> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList topic : list){ for(HotSearchList topic : list){
Document doc = new Document(); Document doc = new Document();
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -44,6 +45,9 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -44,6 +45,9 @@ public class ZhihuHotSearchRun extends Thread{
// List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); // List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> list = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getMobileZhihuHotList();
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("知乎话题",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("知乎话题采集结束........"); log.info("知乎话题采集结束........");
......
...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuTopicSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.ZhihuTopicSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -36,6 +37,9 @@ public class ZhihuTopSearchRun extends Thread { ...@@ -36,6 +37,9 @@ public class ZhihuTopSearchRun extends Thread {
log.info("知乎热搜采集开始...,当前线程名字:{}", Thread.currentThread().getName()); log.info("知乎热搜采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(); List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch();
log.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("知乎热搜榜单",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("知乎热搜话题采集结束........"); log.info("知乎热搜话题采集结束........");
......
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import okhttp3.MediaType;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map;
/**
* http请求的工具类
*/
public final class HttpClientUtils {
private static final Logger LOGGER = LogManager.getLogger(HttpClientUtils.class);
private static final String NAME_VALUE_SEPARATOR = "=";
private static final String QUERY_PARAM_SEP = "&";
private static final String URL_QUERY_PARAM_SEPARATOR = "?";
private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
public static String sendPost(String url, String jsonParam){
return sendPost(url, jsonParam, null, Charset.forName("UTF-8"));
}
public static String sendPost(String url, String jsonParam, Map<String, String> headers, final Charset charset) {
if (StringUtils.isEmpty(url)) {
LOGGER.error("URL can not be empty or null.");
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Post Request:{}", url);
}
String result = null;
Request request= RequestUtils.wrapPost(url, headers, RequestBody.create(MediaType.get("application/json"),
jsonParam));
try(Response response = httpBoot.syncCall(request)) {
result = response.body().string();
}catch (IOException e) {
LOGGER.error("http connection error :" + e.getMessage(), e);
}
return result;
}
}
package com.zhiwei.searchhotcrawler.util;
import com.alibaba.fastjson.JSONObject;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 企业微信机器人推送工具
*
* @ClassName: QYWechatUtil
* @author: 陈炜涛
* @date: 2019年7月17日 下午2:33:12
*
* @Copyright: 2019 www.zhiweidata.com
*/
public class QYWechatUtil {
/** 推送地址 **/
private static final String SEND_URL = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=";
/** markdown模式 **/
public static final String MSGTYPE_MARKDOWN = "markdown";
/** 文字 **/
public static final String MSGTYPE_TEXT = "text";
/** 图片,需另外封装 **/
public static final String MSGTYPE_IMAGE = "image";
/** 图文,需另外封装 **/
public static final String MSGTYPE_NEWS = "news";
/**
* @param key
* 发送预警的key 目标机器人
* @param content
* @param mentionedList
* '@'对象id集合
* @param mentionedMobileList
* 手机号码集合
* @return
* @return: String
* @throws @author:
* 陈炜涛
* @date: 2019年7月17日 下午2:56:40
*/
public static String send(String key, String msgtype, String content, List<String> mentionedList,
List<String> mentionedMobileList) {
msgtype = msgtype != null && !msgtype.isEmpty() ? msgtype : MSGTYPE_TEXT;
TextBody text = new TextBody(content, mentionedList, mentionedMobileList);
Map<String, Object> dataMap = new HashMap<>();
dataMap.put("msgtype", msgtype);
dataMap.put(msgtype, text);
return HttpClientUtils.sendPost(SEND_URL + key, JSONObject.toJSONString(dataMap));
}
}
/**
* 中转对象仅在此处使用
*
* @ClassName: Body
* @author: 陈炜涛
* @date: 2019年7月17日 下午2:50:19
*
* @Copyright: 2019 www.zhiweidata.com
*/
class TextBody {
/**
* 消息内容
*/
private String content;
/**
* 通知人id
*/
private List<String> mentionedList;
/**
* 通知人手机号
*/
private List<String> mentionedMobileList;
public TextBody() {
super();
}
public TextBody(String content, List<String> mentionedList, List<String> mentionedMobileList) {
super();
this.content = content;
this.mentionedList = mentionedList;
this.mentionedMobileList = mentionedMobileList;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public List<String> getMentionedList() {
return mentionedList;
}
public void setMentionedList(List<String> mentionedList) {
this.mentionedList = mentionedList;
}
public List<String> getMentionedMobileList() {
return mentionedMobileList;
}
public void setMentionedMobileList(List<String> mentionedMobileList) {
this.mentionedMobileList = mentionedMobileList;
}
@Override
public String toString() {
return "TextBody [content=" + content + ", mentionedList=" + mentionedList + ", mentionedMobileList="
+ mentionedMobileList + "]";
}
}
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
/**
* 预警发送
*/
public class TipsUtils {
private static Long timeDifference = 5 * 60 * 1000L;
private static String key = "a8e26ce3-8aaa-4d3e-bcf6-30b81526050b";
private Logger logger = LoggerFactory.getLogger(TipsUtils.class);
//未采集到数据发送预警信息
public static void sendTips(String type, Date time){
//1.未采集到的程序触发
//2.获取数据库最后一条数据判断该程序几分钟没有采集到数据
//3.符合条件发送预警
HotSearchListDAO hotSearchListDAO = new HotSearchListDAO();
Date lastTime = hotSearchListDAO.getLastTimeByType(type);
if(time.getTime() - lastTime.getTime() > timeDifference){
String crawlerContent = String.format("%s已经连续%s分钟未采集到数据",type,(time.getTime() - lastTime.getTime())/1000/60);
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
null, null);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment