Commit ca20b119 by zhiwei

将百度热搜、微信热搜、抖音热搜合并到master

parent a65b651d
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class BaiDuHotSearch implements Serializable {
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(kw+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private int count; //搜索指数
private String day; //天
private Date time; //时间
private int changeCount; //据上分钟变化量
private Integer rank; //排名
public BaiDuHotSearch(){}
public BaiDuHotSearch(Integer rank, String kw, String everurl,int count){
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.count = count;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new BaiDuHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", count = " + count
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ ", changeCount = " + changeCount
+ "]";
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public int getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
public class DouyinHotSearch implements Serializable {
private static final long serialVersionUID = -7707110236217797510L;
private String id; //主键(word+时间)
// private String url; //消息链接
private Integer position; //排名
private String word; //热搜关键词
private int hot_value; //热度值
private Date time; //时间
private int changeCount; //据上分钟变化量
public DouyinHotSearch(){}
public DouyinHotSearch(Integer position, String word, Integer hot_value) {
this.id = word + "_" + new Date().getTime();
this.position = position;
this.word = word;
this.hot_value = hot_value;
this.time = new Date();
}
@Override
public String toString(){
return "new DouyinHotSearch["
+ "id = " + id
+ ", position = " + position
+ ", word = " + word
+ ", hot_value = " + hot_value
+ ", time = " + time
+ ", changeCount = " + changeCount
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Integer getPosition() {
return position;
}
public void setPosition(Integer position) {
this.position = position;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getHot_value() {
return hot_value;
}
public void setHot_value(int hot_value) {
this.hot_value = hot_value;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class SougoHotSearch implements Serializable{
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(关键词+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private String day; //天
private Date time; //时间
private Integer rank; //排名
public SougoHotSearch() {}
public SougoHotSearch(Integer rank, String kw, String everurl) {
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new SougoHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
......@@ -20,6 +20,9 @@ public class Config {
dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName");
collZhihuName = conf.getProperty("collZhihuName");
collBaiduName = conf.getProperty("collBaiduName");
collSougoName = conf.getProperty("collSougoName");
collDouyinName = conf.getProperty("collDouyinName");
collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) {
......@@ -35,7 +38,9 @@ public class Config {
public static String authDB;
public static String dbName;
public static String collWeiboName;
public static String collBaiduName;
public static String collZhihuName;
public static String collWechatUserName;
public static String collSougoName;
public static String collDouyinName;
}
package com.zhiwei.searchhotcrawler.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
/**
* @ClassName:BaiDuHotSearch
* @Description: TODO(百度风云榜热搜采集)
* @author hero
* @date 2019年7月10日 上午10:54:31
*/
public class BaiDuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot();
/**
* @Title: BaiDuHotSearchTest
* @author hero
* @Description: TODO(PC端百度风云榜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<BaiDuHotSearch> baiduHotSearch(){
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
List<BaiDuHotSearch> list = new ArrayList<BaiDuHotSearch>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("mainBody")){
try {
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("table.list-table").select("tr");
for (Element element : elements) {
try {
//获取排名rank
String rankStr = null;
//根据网页标签,给rankStr做判断
if(!element.select("td.first").select("span.num-top").isEmpty()) {
rankStr = element.select("td.first").select("span.num-top").text();
}else if(!element.select("td.first").select("span.num-normal").isEmpty()) {
rankStr = element.select("td.first").select("span.num-normal").text();
}
Integer rank = null;
//判断rankStr是否为空
if(StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
//获取id(主键String)
// String id = element.select("td.keyword").select("a").text() + "_" +
// TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss");
//获取关键词(String)
String kw = element.select("td.keyword").select("a.list-title").text();
logger.info("关键词:{}", kw);
//获取关键词相关链接everurl(String)
String everurl = element.select("td.keyword").select("a.list-title").attr("href");
//获取搜索指数count(int)
String hot = null;
//判断热度值所在的规则是否为null
if(!element.select("td.last").select("span.icon-fall").isEmpty()) {
hot = element.select("td.last").select("span.icon-fall").text();
}else if(!element.select("td.last").select("span.icon-rise").isEmpty()) {
hot = element.select("td.last").select("span.icon-rise").text();
}
int count = 0;
//判断hot是否为空
if(StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot);
}
BaiDuHotSearch hotSearch = new BaiDuHotSearch(rank,kw,everurl,count);
if(Objects.nonNull(rank)) {
list.add(hotSearch);
}
} catch (Exception e) {
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger.error("解析百度风云榜时出现解析错误", e);
continue;
}
}
}catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,数据不是json结构",e.fillInStackTrace());
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
return null;
}
}else{
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger.info("解析百度风云榜时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
}
}
logger.info("次轮采集的数据量为:", list.size());
return list;
}
}
\ No newline at end of file
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
/**
* @className DouyinHotSearchCrawler
* @Description:抖音热搜榜采集程序
* @author win 10
* @date:2019年07月11日 上午10:26:21
*/
public class DouyinHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot();
/**
* @Title: getMobileDouyinHotList
* @author hero
* @Description: 移动端抖音热搜榜
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<DouyinHotSearch> getMobileDouyinHotList(){
List<DouyinHotSearch> list = null;
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
// Map<String,String> headerMap = HeaderTool.getCommonHead();
// headerMap.put("Host", "api.zhihu.com");
// headerMap.put("Referer", url);
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36");
// headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
// headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
for(int j=0;j<3;j++){
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody != null){
if(htmlBody.contains("word_list")){
list = new ArrayList<DouyinHotSearch>();
JSONObject data = JSONObject.parseObject(htmlBody);
JSONArray word_list = data.getJSONObject("data").getJSONArray("word_list");
String positionStr = null;
String word = null;
String hot_value_str = null;
for (int i = 0; i < word_list.size(); i++) {
JSONObject wl = word_list.getJSONObject(i);
//获取排名
positionStr = wl.getString("position");
Integer position = null;
position = Integer.valueOf(positionStr);
//获取关键词
word = wl.getString("word");
//获取热度值
hot_value_str =wl.getString("hot_value");
Integer hot_value = null;
hot_value = Integer.valueOf(hot_value_str);
logger.info("热度为:::{}", hot_value);
DouyinHotSearch douyin = new DouyinHotSearch(position, word, hot_value);
list.add(douyin);
}
break;
}else{
System.out.println("---------------");
}
}
} catch (IOException e) {
logger.debug("获取抖音热搜榜时出现问题:{}", e.fillInStackTrace());
continue;
}
}
return list;
}
}
package com.zhiwei.searchhotcrawler.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch;
/**
* @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集)
* @author hero
* @date 2019年7月10日 上午10:54:31
*/
public class SougoHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot();
/**
* @Title: SougoHotSearchTest
* @author hero
* @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<SougoHotSearch> sougoHotSearch(){
String url = "https://weixin.sogou.com";
List<SougoHotSearch> list = new ArrayList<SougoHotSearch>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("topwords")){
try {
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("ol#topwords").select("li");
for (Element element : elements) {
try {
//获取排名rank
String rankStr = null;
if(!element.select("li").select("i").isEmpty()) {
rankStr = element.select("li").select("i").text();
}
Integer rank = null;
if(StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
//获取关键词(String)
String kw = element.select("li").select("a").text();
logger.info("关键词:{}", kw);
//获取关键词相关链接everurl(String)
String everurl = element.select("li").select("a").attr("href");
SougoHotSearch hotSearch = new SougoHotSearch(rank,kw,everurl);
if(Objects.nonNull(rank)) {
list.add(hotSearch);
}
} catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误", e);
continue;
}
}
}catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,数据不是json结构",e.fillInStackTrace());
return null;
}
}else{
logger.info("解析搜狗微信时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
}
}
logger.info("此轮采集的数据量为:", list.size());
return list;
}
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.cache.CacheManager;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchDAO extends MongoDBTemplate{
public BaiduHotSearchDAO() {
super();
super.setDbName(Config.dbName);
// Date date = new Date();
// String time = TimeParse.dateFormartString(date, "yyyy");
// if(Calendar.MONTH<6){
// collWeiboName = Config.collWeiboName + time+"_01";
// }else{
// collWeiboName = Config.collWeiboName + time+"_06";
// }
// System.out.println("collWeiboName========="+collWeiboName);
super.setCollName(Config.collBaiduName);
}
/**
* @Title: addBaiduSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addBaiduSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param baiduHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(BaiDuHotSearch baiduHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("kw", baiduHotSearch.getKw());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = baiduHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getWeiboHotOneHour(){
// List<DBObject> list = new ArrayList<DBObject>();
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
// query.put("changeCount", 0);
//
// try {
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// String name = doc.get("name").toString();
// if(CacheManager.getCacheByKey(name)==null){
// CacheManager.putCache(name, doc, 48*60*60*1000);
// list.add(doc);
// }
// }
// cur.close();
// } catch (Exception e) {
// return null;
// }
// return list;
// }
}
package com.zhiwei.searchhotcrawler.dao;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchDAO extends MongoDBTemplate{
public DouyinHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collDouyinName);
}
@SuppressWarnings("deprecation")
public void addDouyinHotSearch(DBObject douyin){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(douyin,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param douyinHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(DouyinHotSearch douyinHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("word", douyinHotSearch.getWord());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = douyinHotSearch.getHot_value() - Integer.valueOf(doc.get("hot_value").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getDouyinHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getDouyinHotSearch(){
// List<DBObject> list = null;
// try {
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
//
// long count = this.getReadColl().count(query);
// if(count>0){
// list = new ArrayList<DBObject>();
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// list.add(doc);
// }
// cur.close();
// }
// return list;
// } catch (Exception e) {
// e.printStackTrace();
// return list;
// }
// }
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.List;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchDAO extends MongoDBTemplate{
public SougoHotSearchDAO() {
super();
super.setDbName(Config.dbName);
// Date date = new Date();
// String time = TimeParse.dateFormartString(date, "yyyy");
// if(Calendar.MONTH<6){
// collWeiboName = Config.collWeiboName + time+"_01";
// }else{
// collWeiboName = Config.collWeiboName + time+"_06";
// }
// System.out.println("collWeiboName========="+collWeiboName);
super.setCollName(Config.collSougoName);
}
/**
* @Title: addSougoHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addSougoSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param sougoHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
// public int getChangeCount(SougoHotSearch sougoHotSearch){
// int result = 0;
// DBObject query = new BasicDBObject();
// query.put("kw", sougoHotSearch.getKw());
// DBObject sort = new BasicDBObject();
// sort.put("time", -1);
// try {
// DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// result = sougoHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
// break;
// }
// cur.close();
// } catch (Exception e) {
// e.printStackTrace();
// return result;
// }
// return result;
// }
}
......@@ -29,11 +29,13 @@ public class MongoDBTemplate {
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
if(reader==null)
{
reader = new MongoClient(address, Arrays.asList(credential));
// reader = new MongoClient(address, Arrays.asList(credential));
reader = new MongoClient(address);
}
if(writer==null)
{
writer = new MongoClient(address, Arrays.asList(credential));
// writer = new MongoClient(address, Arrays.asList(credential));
writer = new MongoClient(address);
}
} catch (MongoException e) {
e.printStackTrace();
......
......@@ -5,8 +5,11 @@ import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
......@@ -22,8 +25,11 @@ public class HotSearchRun {
}
public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
}
public static void main(String[] args) {
......
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.BaiduHotSearchDAO;
public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
private BaiduHotSearchDAO baiduHotSearchDAO = new BaiduHotSearchDAO();
@Override
public void run() {
logger.info("百度风云榜采集开始........");
List<BaiDuHotSearch> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>();
for(BaiDuHotSearch baiduHotSearch : list){
int changeCount = baiduHotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId());
doc.put("kw", baiduHotSearch.getKw());
doc.put("everurl", baiduHotSearch.getEverurl());
doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank());
data.add(doc);
}
baiduHotSearchDAO.addBaiduSearch(data);
logger.info("百度风云榜采集结束........");
}
}
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.DouyinHotSearchDAO;
public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
private DouyinHotSearchDAO douyinHotSearchDAO = new DouyinHotSearchDAO();
@Override
public void run() {
logger.info("抖音热搜榜采集开始........");
List<DouyinHotSearch> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>();
for(DouyinHotSearch douyinHotSearch : list){
int changeCount = douyinHotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId());
douyin.put("word", douyinHotSearch.getWord());
douyin.put("position", douyinHotSearch.getPosition());
douyin.put("hot_value", douyinHotSearch.getHot_value());
// douyin.put("url", douyinHotSearch.getUrl());
douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount);
data.add(douyin);
douyinHotSearchDAO.addDouyinHotSearch(douyin);
}
logger.info("抖音热搜榜采集结束........");
}
}
......@@ -66,7 +66,7 @@ public class SendWeiboHotSearchRun extends Thread {
* @return void 返回类型
*/
public static void sendTemplateByUserIds(String title, String time, String url) {
Map<String, Object> dataMap = new HashMap<>();
Map<String, Object> dataMap = new HashMap<String, Object>();
JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自微博热搜榜的预警通知。");
dataMap.put("first", first);
......
......@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{
*/
public static void sendTemplateByUserIds(String title,String time, String url) {
Map<String, Object> dataMap = new HashMap<>();
Map<String, Object> dataMap = new HashMap<String, Object>();
JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
dataMap.put("first", first);
......
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.SougoHotSearchDAO;
public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
private SougoHotSearchDAO sougoHotSearchDAO = new SougoHotSearchDAO();
@Override
public void run() {
logger.info("搜狗微信采集开始........");
List<SougoHotSearch> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>();
for(SougoHotSearch sougoHotSearch : list){
// int changeCount = baiduHotSearchDAO.getChangeCount(sougoHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId());
doc.put("kw", sougoHotSearch.getKw());
doc.put("everurl", sougoHotSearch.getEverurl());
// doc.put("count", baiduHotSearch.getCount());
doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime());
// doc.put("changeCount", changeCount);
doc.put("rank", sougoHotSearch.getRank());
data.add(doc);
}
sougoHotSearchDAO.addSougoSearch(data);
logger.info("搜狗微信采集结束........");
}
}
#mongoIp=202.107.192.94
mongoIp=192.168.0.101
mongoPort=30000
mongoIp=192.168.0.247
mongoPort=27017
db.username=zzwno
db.paasword=zzwno1q2w3e4r
db.certifiedDB=admin
db.certifiedDB=oneDB
dbName=NetWork
collWeiboName=weibo_hotsearch2018_10
collZhihuName=zhihu_hotsearch2018_10
collWechatUserName=wechat_user
\ No newline at end of file
collWechatUserName=wechat_user
collBaiduName=baidu_hotsearch2019_07
collSougoName=sougo_hotsearch2019_07
collDouyinName=douyin_hotsearch2019_07
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment