Commit 811c679b by zhiwei

修改定时器启动方式

parent 89981f4d
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil;
/**
* @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集)
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: weiboHotSearchTest
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearch(){
String url = "https://s.weibo.com/top/summary?cate=realtimehot";
List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try {
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html");
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){
try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0";
String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("")?element.select("td[class=\"td-01 ranktop\"]").text():"-1";
int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name());
list.add(hotSearch);
} catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误", e);
continue;
}
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null;
}
}else{
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) {
if(i==2){
return list;
}else{
continue;
}
}
}
return list;
}
/**
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody;
try {
List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){
try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
int rank = 1;
for(int i=0;i<cards.size();i++){
try {
JSONObject card = cards.getJSONObject(i);
JSONArray cardGroup = card.getJSONArray("card_group");
String title = card.getString("title");
boolean hot = true;
if(title.contains("实时上升热点")){
hot = false;
rank = 50;
}
for(int j=0; j<cardGroup.size(); j++){
JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch);
rank++;
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace());
continue;
}
}
return result;
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
return Collections.emptyList();
}
}else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
} catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1.fillInStackTrace());
return Collections.emptyList();
}
return Collections.emptyList();
}
}
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil;
/**
* @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集)
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: weiboHotSearchTest
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearch(){
String url = "https://s.weibo.com/top/summary?cate=realtimehot";
List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try {
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html");
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){
try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0";
String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("")?element.select("td[class=\"td-01 ranktop\"]").text():"-1";
int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name());
list.add(hotSearch);
} catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误", e);
continue;
}
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null;
}
}else{
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) {
if(i==2){
return list;
}else{
continue;
}
}
}
return list;
}
/**
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody;
try {
List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){
try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
int rank = 1;
for(int i=0;i<cards.size();i++){
try {
JSONObject card = cards.getJSONObject(i);
JSONArray cardGroup = card.getJSONArray("card_group");
String title = card.getString("title");
boolean hot = true;
if(title.contains("实时上升热点")){
hot = false;
rank = 50;
}
for(int j=0; j<cardGroup.size(); j++){
JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch);
rank++;
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e);
continue;
}
}
return result;
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e);
return Collections.emptyList();
}
}else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
} catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1);
return Collections.emptyList();
}
return Collections.emptyList();
}
}
package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class HotSearchRun {
public static void main(String[] args) {
ProxyFactory.init(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER, 10000013);
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
new CacheListener().startListen();
//采集程序启动
new WeiboHotSearchRun().start();
new BaiduHotSearchRun().start();
new SougoHotSearchRun().start();
new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start();
new WeiboTopicRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
}
package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.tools.tools.ZhiWeiTools;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class HotSearchRun {
public static void main(String[] args) {
ProxyFactory.init(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER, 10000013);
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
new CacheListener().startListen();
ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
scheduledThreadPool.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 10, TimeUnit.MINUTES);
scheduledThreadPool.scheduleAtFixedRate(new WeiboTopicRun(), 0, 1, TimeUnit.DAYS);
//采集程序启动
// new WeiboHotSearchRun().start();
// new BaiduHotSearchRun().start();
// new SougoHotSearchRun().start();
// new DouyinHotSearchRun().start();
// new ZhihuHotSearchRun().start();
// new WeiboTopicRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{
int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank());
doc.put("type", baiduHotSearch.getType());
saveDataList.add(doc);
});
}
hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........");
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getHotList();
// TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
private void getHotList() {
logger.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{
int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank());
doc.put("type", baiduHotSearch.getType());
saveDataList.add(doc);
});
}
hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........");
}
}
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private void getHotList() {
logger.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getCount());
douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount);
douyin.put("url", null);
douyin.put("type", douyinHotSearch.getType());
data.add(douyin);
hotSearchDAO.addHotSearch(douyin);
}
logger.info("抖音热搜榜采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getHotList();
// TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
/**
* 获取热搜列表
* TODO
* @return void
*/
private void getHotList() {
logger.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getCount());
douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount);
douyin.put("url", null);
douyin.put("type", douyinHotSearch.getType());
data.add(douyin);
hotSearchDAO.addHotSearch(douyin);
}
logger.info("抖音热搜榜采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getUrl());
doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime());
doc.put("rank", sougoHotSearch.getRank());
doc.put("type", sougoHotSearch.getType());
data.add(doc);
}
hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getHotList();
// TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getUrl());
doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime());
doc.put("rank", sougoHotSearch.getRank());
doc.put("type", sougoHotSearch.getType());
data.add(doc);
}
hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
logger.info("微博话题采集开始........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId());
doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl());
doc.put("count", weiboHotSearch.getCount());
doc.put("hot", weiboHotSearch.isHot());
doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank());
doc.put("type", weiboHotSearch.getType());
data.add(doc);
}
weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getHotList();
// TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
private void getHotList() {
logger.info("微博话题采集开始........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId());
doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl());
doc.put("count", weiboHotSearch.getCount());
doc.put("hot", weiboHotSearch.isHot());
doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank());
doc.put("type", weiboHotSearch.getType());
data.add(doc);
}
weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboTopicRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboTopicRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getTopicList();
TimeUnit.DAYS.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getTopicList() {
WeiboTopicDAO weiboTopicDAO = new WeiboTopicDAO();
logger.info("微博超话采集开始........");
List<WeiboTopic> list = WeiboHuatiCrawler.startCrawler();
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){
logger.info("topic::::{}", topic);
DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("url", topic.getUrl());
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
logger.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboTopicRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboTopicRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getTopicList();
// TimeUnit.DAYS.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
private void getTopicList() {
WeiboTopicDAO weiboTopicDAO = new WeiboTopicDAO();
logger.info("微博超话采集开始........");
List<WeiboTopic> list = WeiboHuatiCrawler.startCrawler();
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){
logger.info("topic::::{}", topic);
DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("url", topic.getUrl());
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
logger.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot());
zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime());
zhihu.put("changeCount", 0);
zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", zhihuHotSearch.getType());
hotSearchDAO.addHotSearch(zhihu);
}
logger.info("知乎话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
@Override
public void run() {
// boolean f = true;
// while(f) {
try {
getHotList();
// TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
// ZhiWeiTools.sleep(60*60*1000);
}
// ZhiWeiTools.sleep(50);
// }
}
private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot());
zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime());
zhihu.put("changeCount", 0);
zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", zhihuHotSearch.getType());
hotSearchDAO.addHotSearch(zhihu);
}
logger.info("知乎话题采集结束........");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment