Commit 95f06f34 by 马黎滨

Merge branch 'mlbWork' into 'master'

Mlb work

See merge request !27
parents a9573994 1c4d8472
......@@ -11,7 +11,6 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<spring.version>4.2.2.RELEASE</spring.version>
<quartz.version>2.2.3</quartz.version>
</properties>
<developers>
......@@ -57,16 +56,16 @@
<artifactId>crawler-core</artifactId>
<version>0.6.0.4-RELEASE</version>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>${quartz.version}</version>
</dependency>
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz-jobs</artifactId>
<version>${quartz.version}</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.quartz-scheduler</groupId>-->
<!-- <artifactId>quartz</artifactId>-->
<!-- <version>${quartz.version}</version>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.quartz-scheduler</groupId>-->
<!-- <artifactId>quartz-jobs</artifactId>-->
<!-- <version>${quartz.version}</version>-->
<!-- </dependency>-->
<!-- Spring文件配置 -->
<dependency>
<groupId>org.springframework</groupId>
......
......@@ -46,18 +46,18 @@ public class HotSearchRun {
//采集程序启动
// new WeiboHotSearchRun().start();
// new BaiduHotSearchRun().start();
//// new SougoHotSearchRun().start();
// new DouyinHotSearchRun().start();
//// new ZhihuHotSearchRun().start();
// new WeiboSuperTopicRun().start();
// new WeiboTopicRun().start();
//// new ToutiaoHotSearchRun().start();
//// new ZhihuTopSearchRun().start();
// new ZhihuChildHotSearchRun().start();
// new ThreadOneRun().start();
//// //抖音链接更新
// new DouYinUrlHotSearchRun().start();
new WeiboHotSearchRun().start();
new BaiduHotSearchRun().start();
// new SougoHotSearchRun().start();
new DouyinHotSearchRun().start();
// new ZhihuHotSearchRun().start();
new WeiboSuperTopicRun().start();
new WeiboTopicRun().start();
// new ToutiaoHotSearchRun().start();
// new ZhihuTopSearchRun().start();
new ZhihuChildHotSearchRun().start();
new ThreadOneRun().start();
// //抖音链接更新
new DouYinUrlHotSearchRun().start();
}
}
......@@ -42,32 +42,32 @@ public class BaiduHotSearchRun extends Thread{
private void getHotList() {
// log.info("百度风云榜采集开始........");
//// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
//// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch();
// log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
//// if(Objects.nonNull(list) && !list.isEmpty()) {
//// List<Document> data = hotSearchCacheDAO.addData(list);
//// hotSearchDAO.addHotSearchList(data);
//// TipsUtils.recoveryTips("百度热搜",new Date());
//// } else {
//// TipsUtils.sendTips("百度热搜",new Date());
//// }
// TipsUtils.addHotList("百度热搜",baiduList);
// log.info("百度风云榜采集结束........");
// ZhiWeiTools.sleep(2000L);
// log.info("搜狗微信采集开始........");
// List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch();
// log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
// TipsUtils.addHotList("搜狗微信热搜",sougouList);
// log.info("搜狗微信采集结束........");
// ZhiWeiTools.sleep(2000L);
// log.info("知乎话题采集开始........");
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList();
// log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList("知乎热搜",zhihuList);
// log.info("知乎话题采集结束........");
log.info("百度风云榜采集开始........");
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(new Date());
log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
// if(Objects.nonNull(list) && !list.isEmpty()) {
// List<Document> data = hotSearchCacheDAO.addData(list);
// hotSearchDAO.addHotSearchList(data);
// TipsUtils.recoveryTips("百度热搜",new Date());
// } else {
// TipsUtils.sendTips("百度热搜",new Date());
// }
TipsUtils.addHotList("百度热搜",baiduList);
log.info("百度风云榜采集结束........");
ZhiWeiTools.sleep(2000L);
log.info("搜狗微信采集开始........");
List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(new Date());
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
TipsUtils.addHotList("搜狗微信热搜",sougouList);
log.info("搜狗微信采集结束........");
ZhiWeiTools.sleep(2000L);
log.info("知乎话题采集开始........");
List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(new Date());
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
TipsUtils.addHotList("知乎热搜",zhihuList);
log.info("知乎话题采集结束........");
}
}
\ No newline at end of file
......@@ -38,24 +38,24 @@ public class DouYinUrlHotSearchRun extends Thread {
* @return void
*/
private void getUrlList() {
// log.info("抖音链接更新开始........");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> list = DouyinHotSearchRun.list;
// if(list != null && list.size()>0) {
// for (int i = 0; i < list.size(); i++) {
// String name = list.get(i).getName();
// String id = name+"_"+list.get(i).getType();
// String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
// if(url != null) {
// Document document = new Document();
// document.put("id", id);
// document.put("url", url);
// hotSearchCacheDAO.updateDouyinUrl(document);
// }
// }
// log.info("抖音链接更新结束........");
// }else{
// log.info("抖音链接更新失败,获取抖音数据为空");
// }
log.info("抖音链接更新开始........");
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchRun.list;
if(list != null && list.size()>0) {
for (int i = 0; i < list.size(); i++) {
String name = list.get(i).getName();
String id = name+"_"+list.get(i).getType();
String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
if(url != null) {
Document document = new Document();
document.put("id", id);
document.put("url", url);
hotSearchCacheDAO.updateDouyinUrl(document);
}
}
log.info("抖音链接更新结束........");
}else{
log.info("抖音链接更新失败,获取抖音数据为空");
}
}
}
......@@ -48,23 +48,23 @@ public class DouyinHotSearchRun extends Thread{
* @return void
*/
private void getHotList() {
// log.info("抖音热搜榜采集开始........");
// list = DouyinHotSearchCrawler.getMobileDouyinHotList();
// log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList("抖音热搜",list);
// log.info("抖音热搜榜采集结束........");
// ZhiWeiTools.sleep(3000L);
// log.info("今日头条热搜采集开始........");
// List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone();
// log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(toutiaoList != null ? toutiaoList.size() : 0));
// TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
// log.info("今日头条热搜采集结束........");
// ZhiWeiTools.sleep(3000L);
// log.info("知乎热搜榜单采集开始...");
// List<HotSearchList> zhihuList = ZhihuTopicSearchCrawler.getZhihuTopicSearch();
// log.info("{}, 知乎热搜榜单此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),zhihuList);
// log.info("知乎热搜榜单采集结束........");
log.info("抖音热搜榜采集开始........");
list = DouyinHotSearchCrawler.getMobileDouyinHotList(new Date());
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList("抖音热搜",list);
log.info("抖音热搜榜采集结束........");
ZhiWeiTools.sleep(3000L);
log.info("今日头条热搜采集开始........");
List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(new Date());
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(toutiaoList != null ? toutiaoList.size() : 0));
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
log.info("今日头条热搜采集结束........");
ZhiWeiTools.sleep(3000L);
log.info("知乎热搜榜单采集开始...");
List<HotSearchList> zhihuList = ZhihuTopicSearchCrawler.getZhihuTopicSearch(new Date());
log.info("{}, 知乎热搜榜单此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),zhihuList);
log.info("知乎热搜榜单采集结束........");
}
}
......@@ -31,22 +31,22 @@ public class ThreadOneRun extends Thread {
}
private void getHotList(){
// List<HotSearchList> tengXunlist = TengXunCrawler.getTengXunHotList();
// TipsUtils.addHotList("腾讯新闻",tengXunlist);
// ZhiWeiTools.sleep(1500L);
// List<HotSearchList> xinLanglist = XinLangHotSearchCrawler.getXinLangHotSearch();
// TipsUtils.addHotList("新浪热榜",xinLanglist);
// ZhiWeiTools.sleep(1500L);
// List<HotSearchList> souhuList = SouhuTopicCrawler.getSouhuTopic();
// TipsUtils.addHotList("搜狐话题",souhuList);
// ZhiWeiTools.sleep(1500L);
// List<HotSearchList> xinLangHotList = XinLangHotSearchCrawler.getXinLangHotSpot();
// TipsUtils.addHotList("新浪热点",xinLangHotList);
// ZhiWeiTools.sleep(1500L);
// List<HotSearchList> fengHuangHotList = FengHuangSearchCrawler.getFengHuangHotList();
// TipsUtils.addHotList("凤凰新闻热榜",fengHuangHotList);
// ZhiWeiTools.sleep(1500L);
// List<HotSearchList> fengHuangHotDataList = FengHuangSearchCrawler.getFengHuangHotData();
// TipsUtils.addHotList("凤凰新闻热搜",fengHuangHotDataList);
List<HotSearchList> tengXunlist = TengXunCrawler.getTengXunHotList(new Date());
TipsUtils.addHotList("腾讯新闻",tengXunlist);
ZhiWeiTools.sleep(1500L);
List<HotSearchList> xinLanglist = XinLangHotSearchCrawler.getXinLangHotSearch(new Date());
TipsUtils.addHotList("新浪热榜",xinLanglist);
ZhiWeiTools.sleep(1500L);
List<HotSearchList> souhuList = SouhuTopicCrawler.getSouhuTopic(new Date());
TipsUtils.addHotList("搜狐话题",souhuList);
ZhiWeiTools.sleep(1500L);
List<HotSearchList> xinLangHotList = XinLangHotSearchCrawler.getXinLangHotSpot(new Date());
TipsUtils.addHotList("新浪热点",xinLangHotList);
ZhiWeiTools.sleep(1500L);
List<HotSearchList> fengHuangHotList = FengHuangSearchCrawler.getFengHuangHotData(new Date());
TipsUtils.addHotList("凤凰新闻热榜",fengHuangHotList);
ZhiWeiTools.sleep(1500L);
List<HotSearchList> fengHuangHotDataList = FengHuangSearchCrawler.getFengHuangHotSearch(new Date());
TipsUtils.addHotList("凤凰新闻热搜",fengHuangHotDataList);
}
}
......@@ -34,17 +34,17 @@ public class WeiboHotSearchRun extends Thread{
private void getHotList() {
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
// log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// if(list == null || list.size() == 0){
// TipsUtils.sendTips("微博热搜",new Date());
// }else {
// List<Document> data = hotSearchCacheDAO.addData(list);
// hotSearchDAO.addHotSearchList(data);
// TipsUtils.recoveryTips("微博热搜",new Date());
// }
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(new Date());
log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博热搜",new Date());
}else {
List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data);
TipsUtils.recoveryTips("微博热搜",new Date());
}
}
}
......@@ -32,28 +32,28 @@ public class WeiboSuperTopicRun extends Thread{
private void getTopicList() {
// WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
// log.info("微博超话采集开始........");
// List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
// log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// List<Document> data = new ArrayList<>();
// for(WeiboSuperTopic topic : list){
// log.info("topic::::{}", topic);
// Document doc = new Document();
// doc.put("_id", topic.getId());
// doc.put("name", topic.getTopicName());
// doc.put("rank", topic.getRank());
// doc.put("score_num", topic.getScore());
// doc.put("fensi_num", topic.getFensi());
// doc.put("post_num", topic.getPostNum());
// doc.put("type", topic.getType());
// doc.put("day", topic.getDay());
// doc.put("time", topic.getTime());
// doc.put("url", topic.getUrl());
// data.add(doc);
// }
// weiboTopicDAO.addTopicList(data);
// log.info("微博话题采集结束........");
WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
log.info("微博超话采集开始........");
List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<Document> data = new ArrayList<>();
for(WeiboSuperTopic topic : list){
log.info("topic::::{}", topic);
Document doc = new Document();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("url", topic.getUrl());
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
log.info("微博话题采集结束........");
}
}
......@@ -33,35 +33,35 @@ public class WeiboTopicRun extends Thread{
private void getTopicList() {
// HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// log.info("微博话题采集开始........");
// List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone();
// log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// if(list == null || list.size() == 0){
// TipsUtils.sendTips("微博话题",new Date());
// }else{
// TipsUtils.recoveryTips("微博话题",new Date());
// }
// List<Document> data = new ArrayList<>();
// for(HotSearchList topic : list){
// Document doc = new Document();
// doc.put("_id", topic.getId());
// doc.put("name", topic.getName());
// doc.put("url", topic.getUrl());
// doc.put("count", topic.getCount());
// doc.put("hot", topic.getHot());
// doc.put("day", topic.getDay());
// doc.put("time", topic.getTime());
// doc.put("rank", topic.getRank());
// doc.put("type", topic.getType());
// doc.put("topic_lead", topic.getTopicLead());
// doc.put("comment_count", topic.getCommentCount());
// data.add(doc);
// hotSearchCacheDAO.addAndUpdateData(doc);
// }
// weiboHotSearchDAO.addHotSearchList(data);
// log.info("微博话题采集结束........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
log.info("微博话题采集开始........");
List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(new Date());
log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博话题",new Date());
}else{
TipsUtils.recoveryTips("微博话题",new Date());
}
List<Document> data = new ArrayList<>();
for(HotSearchList topic : list){
Document doc = new Document();
doc.put("_id", topic.getId());
doc.put("name", topic.getName());
doc.put("url", topic.getUrl());
doc.put("count", topic.getCount());
doc.put("hot", topic.getHot());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("rank", topic.getRank());
doc.put("type", topic.getType());
doc.put("topic_lead", topic.getTopicLead());
doc.put("comment_count", topic.getCommentCount());
data.add(doc);
hotSearchCacheDAO.addAndUpdateData(doc);
}
weiboHotSearchDAO.addHotSearchList(data);
log.info("微博话题采集结束........");
}
}
......@@ -35,50 +35,50 @@ public class ZhihuChildHotSearchRun extends Thread {
}
private void getHotList() {
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// for (int i = 0; i < childType.size(); i++) {
// String name = this.getTypeName(childType.get(i));
// if (!"".equals(name)) {
// log.info("知乎{}话题热榜采集开始...", name);
// List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(childType.get(i), name);
// log.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
// if (list == null || list.size() == 0) {
// TipsUtils.sendTips("知乎热搜"+name+"分类", new Date());
// }else {
// List<Document> data = hotSearchCacheDAO.addData(list);
// hotSearchDAO.addHotSearchList(data);
// TipsUtils.recoveryTips("知乎热搜"+name+"分类",new Date());
// }
// log.info("知乎{}话题热榜采集结束...", name);
// ZhiWeiTools.sleep(3000);
// }
// }
// //网易实时热榜采集
// ZhiWeiTools.sleep(3000L);
// List<HotSearchList> wangyiHotSearchList = WangYiHotSearchCrawler.getWangYiHotSearch();
// TipsUtils.addHotList("网易热榜",wangyiHotSearchList);
// //网易跟帖热议采集
// ZhiWeiTools.sleep(3000L);
// List<HotSearchList> wangyiComment = WangYiHotSearchCrawler.getWangYicomment();
// TipsUtils.addHotList("网易跟帖热议",wangyiComment);
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
for (int i = 0; i < childType.size(); i++) {
String name = this.getTypeName(childType.get(i));
if (!"".equals(name)) {
log.info("知乎{}话题热榜采集开始...", name);
List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(childType.get(i), name,new Date());
log.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
if (list == null || list.size() == 0) {
TipsUtils.sendTips("知乎热搜"+name+"分类", new Date());
}else {
List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data);
TipsUtils.recoveryTips("知乎热搜"+name+"分类",new Date());
}
log.info("知乎{}话题热榜采集结束...", name);
ZhiWeiTools.sleep(3000);
}
}
//网易实时热榜采集
ZhiWeiTools.sleep(3000L);
List<HotSearchList> wangyiHotSearchList = WangYiHotSearchCrawler.getWangYiHotSearch(new Date());
TipsUtils.addHotList("网易热榜",wangyiHotSearchList);
//网易跟帖热议采集
ZhiWeiTools.sleep(3000L);
List<HotSearchList> wangyiComment = WangYiHotSearchCrawler.getWangYicomment(new Date());
TipsUtils.addHotList("网易跟帖热议",wangyiComment);
}
// private String getTypeName(String type){
// String name;
// switch (type) {
// case "digital":
// name = "数码";
// break;
// case "focus":
// name = "国际";
// break;
// case "depth":
// name = "时事";
// break;
// default:
// name = "";
// }
// return name;
// }
private String getTypeName(String type){
String name;
switch (type) {
case "digital":
name = "数码";
break;
case "focus":
name = "国际";
break;
case "depth":
name = "时事";
break;
default:
name = "";
}
return name;
}
}
......@@ -21,9 +21,9 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@Component
@EnableScheduling
@EnableAsync
//@Component
//@EnableScheduling
//@EnableAsync
public class GatherTimer {
private Logger logger = LoggerFactory.getLogger(GatherTimer.class);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment