Commit 811c679b by zhiwei

修改定时器启动方式

parent 89981f4d
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo; import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
/** /**
* @ClassName: WeiboHotSearch * @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集) * @Description: TODO(微博实时热搜采集)
* @author hero * @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
public class WeiboHotSearchCrawler { public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端微博热搜采集) * @Description: TODO(PC端微博热搜采集)
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> weiboHotSearch(){ public static List<HotSearchList> weiboHotSearch(){
String url = "https://s.weibo.com/top/summary?cate=realtimehot"; String url = "https://s.weibo.com/top/summary?cate=realtimehot";
List<HotSearchList> list = new ArrayList<HotSearchList>(); List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){ for(int i =0; i<3; i++){
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){ if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try { try {
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0]; // String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", ""); // script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script); // JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html"); // String html = json.getString("html");
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr"); Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){ for(Element element : elements){
try { try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href"); String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text(); String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0"; String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0";
String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("")?element.select("td[class=\"td-01 ranktop\"]").text():"-1"; String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("")?element.select("td[class=\"td-01 ranktop\"]").text():"-1";
int hotCount = Integer.valueOf(num); int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank); int rankCount = Integer.valueOf(rank);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name()); HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name());
list.add(hotSearch); list.add(hotSearch);
} catch (Exception e) { } catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误", e); logger.error("解析微博时时热搜时出现解析错误", e);
continue; continue;
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace()); logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null; return null;
} }
}else{ }else{
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题"); logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
if(i==2){ if(i==2){
return list; return list;
}else{ }else{
continue; continue;
} }
} }
} }
return list; return list;
} }
/** /**
* @Title: weiboHotSearchByPhoneTest * @Title: weiboHotSearchByPhoneTest
* @author hero * @author hero
* @Description: TODO(手机端Iphone 微博热搜采集) * @Description: TODO(手机端Iphone 微博热搜采集)
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> weiboHotSearchByPhone(){ public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"; String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>(); Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody; String htmlBody;
try { try {
List<HotSearchList> result = new ArrayList<HotSearchList>(); List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){ if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){
try { try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data"); JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards"); JSONArray cards = json.getJSONArray("cards");
int rank = 1; int rank = 1;
for(int i=0;i<cards.size();i++){ for(int i=0;i<cards.size();i++){
try { try {
JSONObject card = cards.getJSONObject(i); JSONObject card = cards.getJSONObject(i);
JSONArray cardGroup = card.getJSONArray("card_group"); JSONArray cardGroup = card.getJSONArray("card_group");
String title = card.getString("title"); String title = card.getString("title");
boolean hot = true; boolean hot = true;
if(title.contains("实时上升热点")){ if(title.contains("实时上升热点")){
hot = false; hot = false;
rank = 50; rank = 50;
} }
for(int j=0; j<cardGroup.size(); j++){ for(int j=0; j<cardGroup.size(); j++){
JSONObject cardInfo = cardGroup.getJSONObject(j); JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc"); String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr"); int hotCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name()); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch); logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch); result.add(hotSearch);
rank++; rank++;
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace()); logger.error("解析微博时时热搜时出现解析错误",e);
continue; continue;
} }
} }
return result; return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace()); logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e);
return Collections.emptyList(); return Collections.emptyList();
} }
}else{ }else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题"); logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
} catch (IOException e1) { } catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1.fillInStackTrace()); logger.error("解析微博时时热搜时出现连接失败",e1);
return Collections.emptyList(); return Collections.emptyList();
} }
return Collections.emptyList(); return Collections.emptyList();
} }
} }
package com.zhiwei.searchhotcrawler.run; package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.common.config.GroupType; import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun; import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun; import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun; import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun; import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun; import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun; import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun; import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun; import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun; import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import java.util.concurrent.Executors;
public class HotSearchRun { import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public static void main(String[] args) {
ProxyFactory.init(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER, 10000013); public class HotSearchRun {
new UpdateWechatUserRun().start(); public static void main(String[] args) {
ZhiWeiTools.sleep(10000);
new CacheListener().startListen(); ProxyFactory.init(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER, 10000013);
//采集程序启动
new WeiboHotSearchRun().start(); new UpdateWechatUserRun().start();
new BaiduHotSearchRun().start(); ZhiWeiTools.sleep(10000);
new SougoHotSearchRun().start(); new CacheListener().startListen();
new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start(); ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
new WeiboTopicRun().start();
//推送程序启动 scheduledThreadPool.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start(); scheduledThreadPool.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5, TimeUnit.MINUTES);
}
} scheduledThreadPool.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 10, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new WeiboTopicRun(), 0, 1, TimeUnit.DAYS);
//采集程序启动
// new WeiboHotSearchRun().start();
// new BaiduHotSearchRun().start();
// new SougoHotSearchRun().start();
// new DouyinHotSearchRun().start();
// new ZhihuHotSearchRun().start();
// new WeiboTopicRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
}
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchRun extends Thread{ public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(5); // TimeUnit.MINUTES.sleep(5);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); // ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
private void getHotList() { private void getHotList() {
logger.info("百度风云榜采集开始........"); logger.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>(); List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{ list.forEach(baiduHotSearch ->{
int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch); int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId()); doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getName()); doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getUrl()); doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount()); doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay()); doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime()); doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank()); doc.put("rank", baiduHotSearch.getRank());
doc.put("type", baiduHotSearch.getType()); doc.put("type", baiduHotSearch.getType());
saveDataList.add(doc); saveDataList.add(doc);
}); });
} }
hotSearchDAO.addHotSearchList(saveDataList); hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........"); logger.info("百度风云榜采集结束........");
} }
} }
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchRun extends Thread{ public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(10); // TimeUnit.MINUTES.sleep(10);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); // ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
/** /**
* 获取热搜列表 * 获取热搜列表
* TODO * TODO
* @return void * @return void
*/ */
private void getHotList() { private void getHotList() {
logger.info("抖音热搜榜采集开始........"); logger.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){ for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch); int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject(); DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId()); douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getName()); douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getRank()); douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getCount()); douyin.put("count", douyinHotSearch.getCount());
douyin.put("day", douyinHotSearch.getDay()); douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime()); douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount); douyin.put("changeCount", changeCount);
douyin.put("url", null); douyin.put("url", null);
douyin.put("type", douyinHotSearch.getType()); douyin.put("type", douyinHotSearch.getType());
data.add(douyin); data.add(douyin);
hotSearchDAO.addHotSearch(douyin); hotSearchDAO.addHotSearch(douyin);
} }
logger.info("抖音热搜榜采集结束........"); logger.info("抖音热搜榜采集结束........");
} }
} }
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchRun extends Thread { public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(5); // TimeUnit.MINUTES.sleep(5);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); // ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("搜狗微信采集开始........"); logger.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList sougoHotSearch : list){ for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId()); doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getName()); doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getUrl()); doc.put("url", sougoHotSearch.getUrl());
doc.put("day", sougoHotSearch.getDay()); doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime()); doc.put("time", sougoHotSearch.getTime());
doc.put("rank", sougoHotSearch.getRank()); doc.put("rank", sougoHotSearch.getRank());
doc.put("type", sougoHotSearch.getType()); doc.put("type", sougoHotSearch.getType());
data.add(doc); data.add(doc);
} }
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........"); logger.info("搜狗微信采集结束........");
} }
} }
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchRun extends Thread{ public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(1); // TimeUnit.MINUTES.sleep(1);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000); // ZhiWeiTools.sleep(60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
private void getHotList() { private void getHotList() {
logger.info("微博话题采集开始........"); logger.info("微博话题采集开始........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO(); HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch(); // List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){ for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch); int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId()); doc.put("_id", weiboHotSearch.getId());
doc.put("name", weiboHotSearch.getName()); doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl()); doc.put("url", weiboHotSearch.getUrl());
doc.put("count", weiboHotSearch.getCount()); doc.put("count", weiboHotSearch.getCount());
doc.put("hot", weiboHotSearch.isHot()); doc.put("hot", weiboHotSearch.isHot());
doc.put("day", weiboHotSearch.getDay()); doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime()); doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank()); doc.put("rank", weiboHotSearch.getRank());
doc.put("type", weiboHotSearch.getType()); doc.put("type", weiboHotSearch.getType());
data.add(doc); data.add(doc);
} }
weiboHotSearchDAO.addHotSearchList(data); weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........"); logger.info("微博话题采集结束........");
} }
} }
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboTopic; import com.zhiwei.searchhotcrawler.bean.WeiboTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO; import com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboTopicRun extends Thread{ public class WeiboTopicRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboTopicRun.class); private static Logger logger = LoggerFactory.getLogger(WeiboTopicRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getTopicList(); getTopicList();
TimeUnit.DAYS.sleep(1); // TimeUnit.DAYS.sleep(1);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); // ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
private void getTopicList() { private void getTopicList() {
WeiboTopicDAO weiboTopicDAO = new WeiboTopicDAO(); WeiboTopicDAO weiboTopicDAO = new WeiboTopicDAO();
logger.info("微博超话采集开始........"); logger.info("微博超话采集开始........");
List<WeiboTopic> list = WeiboHuatiCrawler.startCrawler(); List<WeiboTopic> list = WeiboHuatiCrawler.startCrawler();
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){ for(WeiboTopic topic : list){
logger.info("topic::::{}", topic); logger.info("topic::::{}", topic);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId()); doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName()); doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank()); doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore()); doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi()); doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum()); doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType()); doc.put("type", topic.getType());
doc.put("day", topic.getDay()); doc.put("day", topic.getDay());
doc.put("time", topic.getTime()); doc.put("time", topic.getTime());
doc.put("url", topic.getUrl()); doc.put("url", topic.getUrl());
data.add(doc); data.add(doc);
} }
weiboTopicDAO.addTopicList(data); weiboTopicDAO.addTopicList(data);
logger.info("微博话题采集结束........"); logger.info("微博话题采集结束........");
} }
} }
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchRun extends Thread{ public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; // boolean f = true;
while(f) { // while(f) {
try { try {
getHotList(); getHotList();
TimeUnit.MINUTES.sleep(10); // TimeUnit.MINUTES.sleep(10);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); // ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); // ZhiWeiTools.sleep(50);
} // }
} }
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName()); logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist); list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){ for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject(); DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getId()); zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName()); zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl()); zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount()); zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot()); zhihu.put("hot", zhihuHotSearch.isHot());
zhihu.put("day", zhihuHotSearch.getDay()); zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime()); zhihu.put("time", zhihuHotSearch.getTime());
zhihu.put("changeCount", 0); zhihu.put("changeCount", 0);
zhihu.put("rank", zhihuHotSearch.getRank()); zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", zhihuHotSearch.getType()); zhihu.put("type", zhihuHotSearch.getType());
hotSearchDAO.addHotSearch(zhihu); hotSearchDAO.addHotSearch(zhihu);
} }
logger.info("知乎话题采集结束........"); logger.info("知乎话题采集结束........");
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment