Commit 41dee457 by zhiwei

添加抖音、微信、百度热搜采集

parent b528f200
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class BaiDuHotSearch implements Serializable {
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(kw+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private int count; //搜索指数
private String day; //天
private Date time; //时间
private int changeCount; //据上分钟变化量
private Integer rank; //排名
public BaiDuHotSearch(){}
public BaiDuHotSearch(Integer rank, String kw, String everurl,int count){
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.count = count;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new BaiDuHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", count = " + count
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ ", changeCount = " + changeCount
+ "]";
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public int getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotSearch implements Serializable {
private static final long serialVersionUID = -7707110236217797510L;
private String id; //主键(word+时间)
// private String url; //消息链接
private Integer position; //排名
private String word; //热搜关键词
private int hot_value; //热度值
private Date time; //时间
private int changeCount; //据上分钟变化量
private String day;
public DouyinHotSearch(){}
public DouyinHotSearch(Integer position, String word, Integer hot_value) {
this.id = word + "_" + new Date().getTime();
this.position = position;
this.word = word;
this.hot_value = hot_value;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new DouyinHotSearch["
+ "id = " + id
+ ", position = " + position
+ ", word = " + word
+ ", hot_value = " + hot_value
+ ", time = " + time
+ ", changeCount = " + changeCount
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Integer getPosition() {
return position;
}
public void setPosition(Integer position) {
this.position = position;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getHot_value() {
return hot_value;
}
public void setHot_value(int hot_value) {
this.hot_value = hot_value;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
}
...@@ -12,7 +12,7 @@ import java.util.Date; ...@@ -12,7 +12,7 @@ import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class WeiboHotSearch implements Serializable{ public class HotSearchList implements Serializable{
private static final long serialVersionUID = 2076919584659821600L; private static final long serialVersionUID = 2076919584659821600L;
...@@ -34,10 +34,13 @@ public class WeiboHotSearch implements Serializable{ ...@@ -34,10 +34,13 @@ public class WeiboHotSearch implements Serializable{
private int rank; //排名 private int rank; //排名
private String type; //分类
public WeiboHotSearch(){}
public WeiboHotSearch(String url, String name, int count,boolean hot,int rank){
public HotSearchList(){}
public HotSearchList(String url, String name, int count,boolean hot,int rank,String type){
this.id = name + "_" + new Date().getTime(); this.id = name + "_" + new Date().getTime();
this.url = url; this.url = url;
this.name = name; this.name = name;
...@@ -46,12 +49,26 @@ public class WeiboHotSearch implements Serializable{ ...@@ -46,12 +49,26 @@ public class WeiboHotSearch implements Serializable{
this.rank = rank; this.rank = rank;
this.time = new Date(); this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd"); this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.type = type;
}
public HotSearchList(String url, String name, Integer count,int rank,String type){
this.id = name + "_" + new Date().getTime();
this.url = url;
this.name = name;
this.count = count;
this.hot = true;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.type = type;
} }
@Override @Override
public String toString(){ public String toString(){
return "new WeiboHotSearch[" return "new HotSearchList["
+ "id = " + id + "id = " + id
+ ", url = " + url + ", url = " + url
+ ", name = " + name + ", name = " + name
...@@ -61,6 +78,7 @@ public class WeiboHotSearch implements Serializable{ ...@@ -61,6 +78,7 @@ public class WeiboHotSearch implements Serializable{
+ ", rank = " + rank + ", rank = " + rank
+ ", day = " + day + ", day = " + day
+ ", changeCount = " + changeCount + ", changeCount = " + changeCount
+ ", type = " + type
+ "]"; + "]";
} }
...@@ -140,10 +158,13 @@ public class WeiboHotSearch implements Serializable{ ...@@ -140,10 +158,13 @@ public class WeiboHotSearch implements Serializable{
public void setRank(int rank) { public void setRank(int rank) {
this.rank = rank; this.rank = rank;
} }
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
} }
package com.zhiwei.searchhotcrawler.bean;
public enum HotSearchType {
百度热搜,
微博热搜,
知乎热搜,
抖音热搜,
搜狗微信热搜
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class SougoHotSearch implements Serializable{
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(关键词+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private String day; //天
private Date time; //时间
private Integer rank; //排名
public SougoHotSearch() {}
public SougoHotSearch(Integer rank, String kw, String everurl) {
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new SougoHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
public class ZhihuHotSearch implements Serializable{
private static final long serialVersionUID = -7707110236217797510L;
private String url; //消息链接
private String query; //热搜关键词
private String displayQuery; //热搜关键词
private Date time; //时间
public ZhihuHotSearch(){}
public ZhihuHotSearch(String url, String query, String displayQuery, Date time){
this.url = url;
this.query = query;
this.displayQuery = displayQuery;
this.time = time;
}
@Override
public String toString(){
return "new ZhihuHotSearch["
+ "url = " + url
+ ", query = " + query
+ ", displayQuery = " + displayQuery
+ ", time = " + time
+ "]";
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public String getDisplayQuery() {
return displayQuery;
}
public void setDisplayQuery(String displayQuery) {
this.displayQuery = displayQuery;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
}
...@@ -18,11 +18,7 @@ public class Config { ...@@ -18,11 +18,7 @@ public class Config {
userPwd = conf.getProperty("db.paasword"); userPwd = conf.getProperty("db.paasword");
authDB = conf.getProperty("db.certifiedDB"); authDB = conf.getProperty("db.certifiedDB");
dbName = conf.getProperty("dbName"); dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName"); collName = conf.getProperty("collName");
collZhihuName = conf.getProperty("collZhihuName");
collBaiduName = conf.getProperty("collBaiduName");
collSougoName = conf.getProperty("collSougoName");
collDouyinName = conf.getProperty("collDouyinName");
collWechatUserName = conf.getProperty("collWechatUserName"); collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) { } catch (Exception e) {
...@@ -37,10 +33,6 @@ public class Config { ...@@ -37,10 +33,6 @@ public class Config {
public static String userPwd; public static String userPwd;
public static String authDB; public static String authDB;
public static String dbName; public static String dbName;
public static String collWeiboName; public static String collName;
public static String collBaiduName;
public static String collZhihuName;
public static String collWechatUserName; public static String collWechatUserName;
public static String collSougoName;
public static String collDouyinName;
} }
...@@ -14,7 +14,8 @@ import org.slf4j.LoggerFactory; ...@@ -14,7 +14,8 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
/** /**
* @ClassName:BaiDuHotSearch * @ClassName:BaiDuHotSearch
...@@ -26,7 +27,7 @@ public class BaiDuHotSearchCrawler { ...@@ -26,7 +27,7 @@ public class BaiDuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: BaiDuHotSearchTest * @Title: BaiDuHotSearchTest
* @author hero * @author hero
...@@ -34,7 +35,7 @@ public class BaiDuHotSearchCrawler { ...@@ -34,7 +35,7 @@ public class BaiDuHotSearchCrawler {
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<BaiDuHotSearch> baiduHotSearch() { public static List<HotSearchList> baiduHotSearch() {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex"; String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
...@@ -55,8 +56,8 @@ public class BaiDuHotSearchCrawler { ...@@ -55,8 +56,8 @@ public class BaiDuHotSearchCrawler {
* @param htmlBody * @param htmlBody
* @return * @return
*/ */
private static List<BaiDuHotSearch> ansysData(String htmlBody){ private static List<HotSearchList> ansysData(String htmlBody){
List<BaiDuHotSearch> list = new ArrayList<>(); List<HotSearchList> list = new ArrayList<>();
try { try {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("table.list-table").select("tr"); Elements elements = document.select("table.list-table").select("tr");
...@@ -94,8 +95,7 @@ public class BaiDuHotSearchCrawler { ...@@ -94,8 +95,7 @@ public class BaiDuHotSearchCrawler {
if (StringUtils.isNotBlank(hot)) { if (StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot); count = Integer.valueOf(hot);
} }
HotSearchList hotSearch = new HotSearchList(everurl, kw, count, rank, HotSearchType.百度热搜.name());
BaiDuHotSearch hotSearch = new BaiDuHotSearch(rank, kw, everurl, count);
if (Objects.nonNull(rank)) { if (Objects.nonNull(rank)) {
list.add(hotSearch); list.add(hotSearch);
} }
......
...@@ -12,7 +12,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -12,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
...@@ -34,13 +35,13 @@ public class DouyinHotSearchCrawler { ...@@ -34,13 +35,13 @@ public class DouyinHotSearchCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<DouyinHotSearch> getMobileDouyinHotList(){ public static List<HotSearchList> getMobileDouyinHotList(){
List<DouyinHotSearch> list = null; List<HotSearchList> list = null;
String url = "https://api.amemv.com/aweme/v1/hot/search/list/"; String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")){ if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")){
list = new ArrayList<DouyinHotSearch>(); list = new ArrayList<>();
JSONObject data = JSONObject.parseObject(htmlBody); JSONObject data = JSONObject.parseObject(htmlBody);
JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list"); JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list");
String positionStr = null; String positionStr = null;
...@@ -59,7 +60,7 @@ public class DouyinHotSearchCrawler { ...@@ -59,7 +60,7 @@ public class DouyinHotSearchCrawler {
Integer hotValue = null; Integer hotValue = null;
hotValue = Integer.valueOf(hotValueStr); hotValue = Integer.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value); // logger.info("热度为:::{}", hot_value);
DouyinHotSearch douyin = new DouyinHotSearch(position, word, hotValue); HotSearchList douyin = new HotSearchList(null,word, hotValue, position,HotSearchType.抖音热搜.name());
list.add(douyin); list.add(douyin);
} }
} }
......
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.Collections;
import java.util.Objects; import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
/** /**
* @ClassName:SougoHotSearch * @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集) * @Description: TODO(搜狗微信关键词采集)
* @author hero * @author hero
* @date 2019年7月10日 上午10:54:31 * @date 2019年7月10日 上午10:54:31
*/ */
public class SougoHotSearchCrawler { public class SougoHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: SougoHotSearchTest * @Title: SougoHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端搜狗微信关键词采集) * @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<SougoHotSearch> sougoHotSearch(){ public static List<HotSearchList> sougoHotSearch() {
String url = "https://weixin.sogou.com"; String url = "https://weixin.sogou.com";
List<SougoHotSearch> list = new ArrayList<SougoHotSearch>(); List<HotSearchList> list = new ArrayList<>();
for(int i =0; i<3; i++){ for (int i = 0; i < 3; i++) {
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("topwords")){ if (htmlBody != null && htmlBody.contains("topwords")) {
try { try {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("ol#topwords").select("li"); Elements elements = document.select("ol#topwords").select("li");
for (Element element : elements) { for (Element element : elements) {
try { try {
//获取排名rank // 获取排名rank
String rankStr = null; String rankStr = null;
if(!element.select("li").select("i").isEmpty()) { if (!element.select("li").select("i").isEmpty()) {
rankStr = element.select("li").select("i").text(); rankStr = element.select("li").select("i").text();
} }
Integer rank = null; Integer rank = null;
if(StringUtils.isNoneBlank(rankStr)) { if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr); rank = Integer.valueOf(rankStr);
} }
//获取关键词(String) // 获取关键词(String)
String kw = element.select("li").select("a").text(); String kw = element.select("li").select("a").text();
logger.info("关键词:{}", kw); logger.info("关键词:{}", kw);
//获取关键词相关链接everurl(String) // 获取关键词相关链接everurl(String)
String everurl = element.select("li").select("a").attr("href"); String everurl = element.select("li").select("a").attr("href");
SougoHotSearch hotSearch = new SougoHotSearch(rank,kw,everurl); HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name());
if(Objects.nonNull(rank)) { if (Objects.nonNull(rank)) {
list.add(hotSearch); list.add(hotSearch);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误", e); logger.error("解析搜狗微信时出现解析错误", e);
continue; }
}
} }
}catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,数据不是json结构",e.fillInStackTrace()); logger.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace());
return null; return Collections.emptyList();
} }
}else{ } else {
logger.info("解析搜狗微信时出现解析错误,页面结构有问题"); logger.info("解析搜狗微信时出现解析错误,页面结构有问题");
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,页面结构有问题", e); logger.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
} }
} }
logger.info("此轮采集的数据量为:", list.size());
return list; return list;
} }
} }
...@@ -17,7 +17,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -17,7 +17,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo; import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
...@@ -38,10 +39,10 @@ public class WeiboHotSearchCrawler { ...@@ -38,10 +39,10 @@ public class WeiboHotSearchCrawler {
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<WeiboHotSearch> weiboHotSearch(){ public static List<HotSearchList> weiboHotSearch(){
String url = "https://s.weibo.com/top/summary?cate=realtimehot"; String url = "https://s.weibo.com/top/summary?cate=realtimehot";
List<WeiboHotSearch> list = new ArrayList<WeiboHotSearch>(); List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){ for(int i =0; i<3; i++){
String htmlBody = null; String htmlBody = null;
try { try {
...@@ -63,7 +64,7 @@ public class WeiboHotSearchCrawler { ...@@ -63,7 +64,7 @@ public class WeiboHotSearchCrawler {
int hotCount = Integer.valueOf(num); int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank); int rankCount = Integer.valueOf(rank);
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount,true, rankCount); HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name());
list.add(hotSearch); list.add(hotSearch);
} catch (Exception e) { } catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
...@@ -103,13 +104,13 @@ public class WeiboHotSearchCrawler { ...@@ -103,13 +104,13 @@ public class WeiboHotSearchCrawler {
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<WeiboHotSearch> weiboHotSearchByPhone(){ public static List<HotSearchList> weiboHotSearchByPhone(){
String url = ""; String url = "";
Map<String,String> headerMap = new HashMap<String,String>(); Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Host", "mapi.weibo.com"); headerMap.put("Host", "mapi.weibo.com");
headerMap.put("User-Agent", "Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)"); headerMap.put("User-Agent", "Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)");
List<WeiboHotSearch> result = new ArrayList<WeiboHotSearch>(); List<HotSearchList> result = new ArrayList<HotSearchList>();
String htmlBody; String htmlBody;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
...@@ -133,7 +134,7 @@ public class WeiboHotSearchCrawler { ...@@ -133,7 +134,7 @@ public class WeiboHotSearchCrawler {
int rankCount = cardInfo.getIntValue("desc_extr"); int rankCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, hot, rankCount); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rankCount, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch); logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch); result.add(hotSearch);
} }
......
...@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -13,7 +12,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -13,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
...@@ -34,8 +34,8 @@ public class ZhihuHotSearchCrawler { ...@@ -34,8 +34,8 @@ public class ZhihuHotSearchCrawler {
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<ZhihuHotSearch> getZhihuHotList(){ public static List<HotSearchList> getZhihuHotList(){
List<ZhihuHotSearch> list = null; List<HotSearchList> list = null;
String url = "https://www.zhihu.com/api/v4/search/top_search"; String url = "https://www.zhihu.com/api/v4/search/top_search";
String rerferer = "https://www.zhihu.com/search?type=content&q=%E5%BF%AB%E6%89%8B"; String rerferer = "https://www.zhihu.com/search?type=content&q=%E5%BF%AB%E6%89%8B";
Map<String,String> headerMap = HeaderTool.getCommonHead(); Map<String,String> headerMap = HeaderTool.getCommonHead();
...@@ -47,22 +47,20 @@ public class ZhihuHotSearchCrawler { ...@@ -47,22 +47,20 @@ public class ZhihuHotSearchCrawler {
headerMap.put("Referer", rerferer); headerMap.put("Referer", rerferer);
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody != null){ if(htmlBody != null && htmlBody.contains("words")){
if(htmlBody.contains("words")){ list = new ArrayList<>();
list = new ArrayList<>(); JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONObject topSearch = JSONObject.parseObject(htmlBody); JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words"); String link = null;
String link = null; String displayQuery = null;
String displayQuery = null; String query = null;
String query = null; for (int i = 0; i < words.size(); i++) {
for (int i = 0; i < words.size(); i++) { JSONObject word = words.getJSONObject(i);
JSONObject word = words.getJSONObject(i); query = word.getString("query");
query = word.getString("query"); displayQuery = word.getString("display_query");
displayQuery = word.getString("display_query"); link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content"; HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, displayQuery,new Date()); list.add(zhihu);
list.add(zhihu);
}
} }
} }
} catch (IOException e) { } catch (IOException e) {
...@@ -80,8 +78,8 @@ public class ZhihuHotSearchCrawler { ...@@ -80,8 +78,8 @@ public class ZhihuHotSearchCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<ZhihuHotSearch> getMobileZhihuHotList(){ public static List<HotSearchList> getMobileZhihuHotList(){
List<ZhihuHotSearch> list = null; List<HotSearchList> list = null;
String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0"; String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0";
Map<String,String> headerMap = HeaderTool.getCommonHead(); Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "api.zhihu.com"); headerMap.put("Host", "api.zhihu.com");
...@@ -93,26 +91,20 @@ public class ZhihuHotSearchCrawler { ...@@ -93,26 +91,20 @@ public class ZhihuHotSearchCrawler {
for(int j=0;j<3;j++){ for(int j=0;j<3;j++){
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody != null){ if(htmlBody != null && htmlBody.contains("author")){
if(htmlBody.contains("author")){ list = new ArrayList<>();
list = new ArrayList<ZhihuHotSearch>(); JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONObject top_search = JSONObject.parseObject(htmlBody); JSONArray words = topSearch.getJSONArray("data");
JSONArray words = top_search.getJSONArray("data"); String link = null;
String link = null; String displayQuery = null;
String display_query = null; for (int i = 0; i < words.size(); i++) {
String query = null; JSONObject word = words.getJSONObject(i).getJSONObject("target");
for (int i = 0; i < words.size(); i++) { displayQuery = word.getString("title");
JSONObject word = words.getJSONObject(i).getJSONObject("target"); link = "https://www.zhihu.com/question/"+word.getLongValue("id");
query = word.getString("title"); HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
display_query = word.getString("title"); list.add(zhihu);
link = "https://www.zhihu.com/question/"+word.getLongValue("id");
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, display_query,new Date());
list.add(zhihu);
}
break;
}else{
System.out.println("---------------");
} }
break;
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace()); logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
......
package com.zhiwei.searchhotcrawler.dao;
import java.util.Calendar;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchDAO extends MongoDBTemplate{
public BaiduHotSearchDAO() {
super();
super.setDbName(Config.dbName);
String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collBaiduName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collBaiduName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
}
/**
* @Title: addBaiduSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addBaiduSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param baiduHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(BaiDuHotSearch baiduHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("kw", baiduHotSearch.getKw());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = baiduHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getWeiboHotOneHour(){
// List<DBObject> list = new ArrayList<DBObject>();
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
// query.put("changeCount", 0);
//
// try {
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// String name = doc.get("name").toString();
// if(CacheManager.getCacheByKey(name)==null){
// CacheManager.putCache(name, doc, 48*60*60*1000);
// list.add(doc);
// }
// }
// cur.close();
// } catch (Exception e) {
// return null;
// }
// return list;
// }
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.Calendar;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchDAO extends MongoDBTemplate{
public DouyinHotSearchDAO() {
super();
super.setDbName(Config.dbName);
String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
}
@SuppressWarnings("deprecation")
public void addDouyinHotSearch(DBObject douyin){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(douyin,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param douyinHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(DouyinHotSearch douyinHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("word", douyinHotSearch.getWord());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = douyinHotSearch.getHot_value() - Integer.valueOf(doc.get("hot_value").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getDouyinHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getDouyinHotSearch(){
// List<DBObject> list = null;
// try {
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
//
// long count = this.getReadColl().count(query);
// if(count>0){
// list = new ArrayList<DBObject>();
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// list.add(doc);
// }
// cur.close();
// }
// return list;
// } catch (Exception e) {
// e.printStackTrace();
// return list;
// }
// }
}
...@@ -6,58 +6,72 @@ import java.util.Calendar; ...@@ -6,58 +6,72 @@ import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor; import com.mongodb.DBCursor;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.cache.CacheManager; import com.zhiwei.searchhotcrawler.cache.CacheManager;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchDAO extends MongoDBTemplate{ public class HotSearchListDAO extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
public WeiboHotSearchDAO() {
@SuppressWarnings("unused")
public HotSearchListDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
String collWeiboName; String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
if(Calendar.MONTH<6){ String year = time.substring(0,4);
collWeiboName = Config.collWeiboName + Calendar.YEAR +"_01"; String month = time.substring(5,7);
}else{
collWeiboName = Config.collWeiboName + Calendar.YEAR +"_06"; String collName = Config.collName + year + "_" + month;
} super.setCollName(collName);
super.setCollName(collWeiboName);
} }
/** /**
* @Title: addWeiboHotSearch * 添加数据入库
* @author hero * @param list
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/ */
public void addWeiboHotSearch(List<DBObject> list){ public void addHotSearchList(List<DBObject> list){
for(int i=0; i<3; i++){ for(int i=0; i<3; i++){
try { try {
this.getReadColl().insert(list); this.getReadColl().insert(list);
ZhiWeiTools.sleep(200); ZhiWeiTools.sleep(200);
break; break;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("存储数据时出错,错误为:{}", e);
continue; }
}
}
public void addHotSearch(DBObject doc){
for(int i=0; i<3; i++){
try {
this.getReadColl().save(doc);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
} }
} }
} }
/** /**
* 查询据上次变化量
* @Title: getChangeCount * @Title: getChangeCount
* @author hero * @author hero
* @Description: TODO(查询据上次变化量)
* @param @param weiboHotSearch * @param @param weiboHotSearch
* @param @return 设定文件 * @param @return 设定文件
* @return int 返回类型 * @return int 返回类型
*/ */
public int getChangeCount(WeiboHotSearch weiboHotSearch){ public int getChangeCount(HotSearchList weiboHotSearch){
int result = 0; int result = 0;
DBObject query = new BasicDBObject(); DBObject query = new BasicDBObject();
query.put("name", weiboHotSearch.getName()); query.put("name", weiboHotSearch.getName());
...@@ -72,7 +86,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -72,7 +86,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
} }
cur.close(); cur.close();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("存储数据时出错,错误为:{}", e);
return result; return result;
} }
return result; return result;
...@@ -86,12 +100,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -86,12 +100,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @return 设定文件 * @param @return 设定文件
* @return List<DBObject> 返回类型 * @return List<DBObject> 返回类型
*/ */
public List<DBObject> getWeiboHotOneHour(){ public List<DBObject> getHotOneHour(String type){
List<DBObject> list = new ArrayList<DBObject>(); List<DBObject> list = new ArrayList<>();
Date date = new Date((new Date().getTime()-60*60*1000)); Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject(); DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date)); query.put("time", new BasicDBObject("$gte", date));
query.put("changeCount", 0); query.put("changeCount", 0);
query.put("type", type);
try { try {
DBCursor cur = this.getReadColl().find(query); DBCursor cur = this.getReadColl().find(query);
...@@ -105,7 +120,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -105,7 +120,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
} }
cur.close(); cur.close();
} catch (Exception e) { } catch (Exception e) {
return null; logger.error("存储数据时出错,错误为:{}", e);
} }
return list; return list;
} }
......
package com.zhiwei.searchhotcrawler.dao;
import java.util.List;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchDAO extends MongoDBTemplate{
public SougoHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collSougoName);
}
/**
* @Title: addSougoHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addSougoSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param sougoHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
// public int getChangeCount(SougoHotSearch sougoHotSearch){
// int result = 0;
// DBObject query = new BasicDBObject();
// query.put("kw", sougoHotSearch.getKw());
// DBObject sort = new BasicDBObject();
// sort.put("time", -1);
// try {
// DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// result = sougoHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
// break;
// }
// cur.close();
// } catch (Exception e) {
// e.printStackTrace();
// return result;
// }
// return result;
// }
}
package com.zhiwei.searchhotcrawler.dao; package com.zhiwei.searchhotcrawler.dao;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
public class WechatUserDao extends MongoDBTemplate{ public class WechatUserDao extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
public WechatUserDao() { public WechatUserDao() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
...@@ -31,8 +39,7 @@ public class WechatUserDao extends MongoDBTemplate{ ...@@ -31,8 +39,7 @@ public class WechatUserDao extends MongoDBTemplate{
this.getReadColl().save(doc); this.getReadColl().save(doc);
break; break;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("存储数据时出错,错误为:{}", e);
continue;
} }
} }
} }
...@@ -54,9 +61,9 @@ public class WechatUserDao extends MongoDBTemplate{ ...@@ -54,9 +61,9 @@ public class WechatUserDao extends MongoDBTemplate{
return (List<String>)doc.get("user"); return (List<String>)doc.get("user");
} }
} catch (Exception e) { } catch (Exception e) {
return null; logger.error("存储数据时出错,错误为:{}", e);
} }
return null; return Collections.emptyList();
} }
} }
package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchDAO extends MongoDBTemplate{
public ZhihuHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collZhihuName);
}
@SuppressWarnings("deprecation")
public void addZhiHuHotSearch(DBObject zhihu){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(zhihu,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getZhiHuHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getZhiHuHotSearch(){
List<DBObject> list = null;
try {
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
long count = this.getReadColl().count(query);
if(count>0){
list = new ArrayList<DBObject>();
DBCursor cur = this.getReadColl().find(query);
while(cur.hasNext()){
DBObject doc = cur.next();
list.add(doc);
}
cur.close();
}
return list;
} catch (Exception e) {
e.printStackTrace();
return list;
}
}
}
...@@ -29,13 +29,13 @@ public class MongoDBTemplate { ...@@ -29,13 +29,13 @@ public class MongoDBTemplate {
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort); ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
if(reader==null) if(reader==null)
{ {
// reader = new MongoClient(address, Arrays.asList(credential)); reader = new MongoClient(address, Arrays.asList(credential));
reader = new MongoClient(address); // reader = new MongoClient(address);
} }
if(writer==null) if(writer==null)
{ {
// writer = new MongoClient(address, Arrays.asList(credential)); writer = new MongoClient(address, Arrays.asList(credential));
writer = new MongoClient(address); // writer = new MongoClient(address);
} }
} catch (MongoException e) { } catch (MongoException e) {
e.printStackTrace(); e.printStackTrace();
......
package com.zhiwei.searchhotcrawler.test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.tools.timeparse.TimeParse;
public class HotSearchListTest{
public static void main(String[] args) {
MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongo = new MongoClient(address, Arrays.asList(credential));
DB db = mongo.getDB("NetWork");
DBCollection coll = db.getCollection("weibo_hotsearch2018_10");
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
DB dbNew = mongoNew.getDB("hot_search_list");
Map<String,String> timLine = TimeParse.getTimeMap("2019-07-16 00:00:00", "2019-07-16 23:59:59", "HH", 1);
timLine.forEach((start, end) ->{
String year = end.substring(0,4);
String month = end.substring(5,7);
Date startDate = TimeParse.stringFormartDate(start);
Date endDate = TimeParse.stringFormartDate(end);
String collName = "hot_search_list"+year+"_"+month;
System.out.println("collName=========="+collName);
DBCollection collNew = dbNew.getCollection(collName);
DBObject query = new BasicDBObject(new BasicDBObject("time",
new BasicDBObject("$gte",startDate).append("$lte", endDate)));
DBCursor cur = coll.find(query);
System.out.println(query +"======="+ cur.count());
List<DBObject> dataList = new ArrayList<>();
int i = 0;
while(cur.hasNext()) {
DBObject doc = cur.next();
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", doc.get("_id"));
zhihu.put("name", doc.get("name"));
zhihu.put("url", doc.get("url"));
zhihu.put("count", doc.get("count"));
zhihu.put("hot", doc.get("hot"));
zhihu.put("day", doc.get("day"));
zhihu.put("time", doc.get("time"));
zhihu.put("changeCount", doc.get("changeCount"));
zhihu.put("rank", doc.get("rank"));
zhihu.put("type", HotSearchType.微博热搜.name());
collNew.save(zhihu);
dataList.add(zhihu);
}
// System.out.println(collName +"数据量大小" +dataList.size());
// cur.close();
// if(!dataList.isEmpty()) {
// collNew.insert(dataList);
// }
});
mongo.close();
}
}
...@@ -10,37 +10,39 @@ import org.slf4j.LoggerFactory; ...@@ -10,37 +10,39 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.BaiduHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class BaiduHotSearchRun extends Thread{ public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
private BaiduHotSearchDAO baiduHotSearchDAO = new BaiduHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
logger.info("百度风云榜采集开始........"); logger.info("百度风云榜采集开始........");
List<BaiDuHotSearch> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>(); List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{ list.forEach(baiduHotSearch ->{
int changeCount = baiduHotSearchDAO.getChangeCount(baiduHotSearch); int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId()); doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getKw()); doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getEverurl()); doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount()); doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay()); doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime()); doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank()); doc.put("rank", baiduHotSearch.getRank());
doc.put("type", HotSearchType.百度热搜.name());
saveDataList.add(doc); saveDataList.add(doc);
}); });
} }
baiduHotSearchDAO.addBaiduSearch(saveDataList); hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........"); logger.info("百度风云榜采集结束........");
} }
......
...@@ -9,34 +9,36 @@ import org.slf4j.LoggerFactory; ...@@ -9,34 +9,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.DouyinHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class DouyinHotSearchRun extends Thread{ public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
private DouyinHotSearchDAO douyinHotSearchDAO = new DouyinHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
logger.info("抖音热搜榜采集开始........"); logger.info("抖音热搜榜采集开始........");
List<DouyinHotSearch> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>(); List<DBObject> data = new ArrayList<>();
for(DouyinHotSearch douyinHotSearch : list){ for(HotSearchList douyinHotSearch : list){
int changeCount = douyinHotSearchDAO.getChangeCount(douyinHotSearch); int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject(); DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId()); douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getWord()); douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getPosition()); douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getHot_value()); douyin.put("count", douyinHotSearch.getCount());
// douyin.put("url", douyinHotSearch.getUrl());
douyin.put("day", douyinHotSearch.getDay()); douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime()); douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount); douyin.put("changeCount", changeCount);
douyin.put("url", null);
douyin.put("type", HotSearchType.抖音热搜.name());
data.add(douyin); data.add(douyin);
douyinHotSearchDAO.addDouyinHotSearch(douyin); hotSearchDAO.addHotSearch(douyin);
} }
logger.info("抖音热搜榜采集结束........"); logger.info("抖音热搜榜采集结束........");
} }
......
...@@ -12,7 +12,8 @@ import org.slf4j.LoggerFactory; ...@@ -12,7 +12,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao; import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.Template; import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant; import com.zhiwei.searchhotcrawler.util.WechatConstant;
...@@ -20,10 +21,9 @@ import com.zhiwei.tools.timeparse.TimeParse; ...@@ -20,10 +21,9 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread { public class SendWeiboHotSearchRun extends Thread {
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao(); private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class);
@Override @Override
public void run() { public void run() {
while (true) { while (true) {
...@@ -32,8 +32,8 @@ public class SendWeiboHotSearchRun extends Thread { ...@@ -32,8 +32,8 @@ public class SendWeiboHotSearchRun extends Thread {
int hour = calendar.get(Calendar.HOUR_OF_DAY); int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("微博推送,当前系统时间为:" + hour); logger.info("微博推送,当前系统时间为:" + hour);
if (hour > 6 && hour < 23) { if (hour > 6 && hour < 23) {
List<DBObject> list = weiboHotSearchDAO.getWeiboHotOneHour(); List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
if (list != null && list.size() > 0) { if (list != null && !list.isEmpty()) {
for (DBObject weibo : list) { for (DBObject weibo : list) {
String title = weibo.get("name").toString(); String title = weibo.get("name").toString();
String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss"); String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
......
...@@ -11,8 +11,9 @@ import org.slf4j.LoggerFactory; ...@@ -11,8 +11,9 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao; import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template; import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant; import com.zhiwei.searchhotcrawler.util.WechatConstant;
...@@ -20,7 +21,7 @@ import com.zhiwei.tools.timeparse.TimeParse; ...@@ -20,7 +21,7 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{ public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao(); private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override @Override
...@@ -32,8 +33,8 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -32,8 +33,8 @@ public class SendZhihuHotSearchRun extends Thread{
int hour = calendar.get(Calendar.HOUR_OF_DAY); int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("知乎推送,当前系统时间为:"+hour); logger.info("知乎推送,当前系统时间为:"+hour);
if(hour > 6 && hour <23){ if(hour > 6 && hour <23){
List<DBObject> list = zhihuHotSearchDAO.getZhiHuHotSearch(); List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
if(list!=null && list.size()>0){ if(list!=null && !list.isEmpty()){
for(DBObject zhihu : list){ for(DBObject zhihu : list){
String title = zhihu.get("display_query").toString(); String title = zhihu.get("display_query").toString();
String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss"); String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
...@@ -51,7 +52,6 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -51,7 +52,6 @@ public class SendZhihuHotSearchRun extends Thread{
} catch (Exception e) { } catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace()); logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000); ZhiWeiTools.sleep(1*60*60*1000);
continue;
} }
} }
} }
...@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{
*/ */
public static void sendTemplateByUserIds(String title,String time, String url) { public static void sendTemplateByUserIds(String title,String time, String url) {
Map<String, Object> dataMap = new HashMap<String, Object>(); Map<String, Object> dataMap = new HashMap<>();
JSONObject first = new JSONObject(); JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自知乎热搜榜的预警通知。"); first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
dataMap.put("first", first); dataMap.put("first", first);
...@@ -87,7 +87,7 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -87,7 +87,7 @@ public class SendZhihuHotSearchRun extends Thread{
dataMap.put("remark", remark); dataMap.put("remark", remark);
List<String> userList = getUserList(); List<String> userList = getUserList();
if(userList!=null && userList.size()>0) { if(userList!=null && !userList.isEmpty()) {
for (String openId : userList) { for (String openId : userList) {
Template template = new Template(); Template template = new Template();
template.setTouser(openId); template.setTouser(openId);
......
...@@ -9,32 +9,34 @@ import org.slf4j.LoggerFactory; ...@@ -9,32 +9,34 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.SougoHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class SougoHotSearchRun extends Thread { public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
private SougoHotSearchDAO sougoHotSearchDAO = new SougoHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
logger.info("搜狗微信采集开始........"); logger.info("搜狗微信采集开始........");
List<SougoHotSearch> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(SougoHotSearch sougoHotSearch : list){ for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId()); doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getKw()); doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getEverurl()); doc.put("url", sougoHotSearch.getUrl());
doc.put("day", sougoHotSearch.getDay()); doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime()); doc.put("time", sougoHotSearch.getTime());
doc.put("rank", sougoHotSearch.getRank()); doc.put("rank", sougoHotSearch.getRank());
doc.put("type", HotSearchType.搜狗微信热搜.name());
data.add(doc); data.add(doc);
} }
sougoHotSearchDAO.addSougoSearch(data); hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........"); logger.info("搜狗微信采集结束........");
} }
......
...@@ -9,22 +9,23 @@ import org.slf4j.LoggerFactory; ...@@ -9,22 +9,23 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class WeiboHotSearchRun extends Thread{ public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO(); private HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
logger.info("微博话题采集开始........"); logger.info("微博话题采集开始........");
List<WeiboHotSearch> list = WeiboHotSearchCrawler.weiboHotSearch(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(WeiboHotSearch weiboHotSearch : list){ for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch); int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId()); doc.put("_id", weiboHotSearch.getId());
...@@ -36,9 +37,10 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -36,9 +37,10 @@ public class WeiboHotSearchRun extends Thread{
doc.put("time", weiboHotSearch.getTime()); doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank()); doc.put("rank", weiboHotSearch.getRank());
doc.put("type", HotSearchType.微博热搜.name());
data.add(doc); data.add(doc);
} }
weiboHotSearchDAO.addWeiboHotSearch(data); weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........"); logger.info("微博话题采集结束........");
} }
......
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
...@@ -9,31 +8,36 @@ import org.slf4j.LoggerFactory; ...@@ -9,31 +8,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class ZhihuHotSearchRun extends Thread{ public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override @Override
public void run() { public void run() {
logger.info("知乎话题采集开始........"); logger.info("知乎话题采集开始........");
List<ZhihuHotSearch> list = ZhihuHotSearchCrawler.getZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<ZhihuHotSearch> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist); list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>(); for(HotSearchList zhihuHotSearch : list){
for(ZhihuHotSearch zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject(); DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getUrl()); zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("query", zhihuHotSearch.getQuery()); zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("display_query", zhihuHotSearch.getDisplayQuery()); zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot());
zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime()); zhihu.put("time", zhihuHotSearch.getTime());
data.add(zhihu); zhihu.put("changeCount", 0);
zhihuHotSearchDAO.addZhiHuHotSearch(zhihu); zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", HotSearchType.知乎热搜.name());
hotSearchDAO.addHotSearch(zhihu);
} }
logger.info("知乎话题采集结束........"); logger.info("知乎话题采集结束........");
} }
......
#mongoIp=202.107.192.94 #mongoIp=202.107.192.94
mongoIp=192.168.0.81 mongoIp=192.168.0.101
mongoPort=27017 mongoPort=30000
#mongoIp=192.168.0.81
#mongoPort=27017
db.username=zzwno db.username=zzwno
db.paasword=zzwno1q2w3e4r db.paasword=zzwno1q2w3e4r
db.certifiedDB=oneDB db.certifiedDB=admin
dbName=NetWork dbName=hot_search_list
collWeiboName=weibo_hotsearch collName=hot_search_list
collZhihuName=zhihu_hotsearch \ No newline at end of file
collWechatUserName=wechat_user
collBaiduName=baidu_hotsearch
collSougoName=sougo_hotsearch
collDouyinName=douyin_hotsearch
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment