Commit 41dee457 by zhiwei

添加抖音、微信、百度热搜采集

parent b528f200
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class BaiDuHotSearch implements Serializable {
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(kw+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private int count; //搜索指数
private String day; //天
private Date time; //时间
private int changeCount; //据上分钟变化量
private Integer rank; //排名
public BaiDuHotSearch(){}
public BaiDuHotSearch(Integer rank, String kw, String everurl,int count){
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.count = count;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new BaiDuHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", count = " + count
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ ", changeCount = " + changeCount
+ "]";
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public int getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotSearch implements Serializable {
private static final long serialVersionUID = -7707110236217797510L;
private String id; //主键(word+时间)
// private String url; //消息链接
private Integer position; //排名
private String word; //热搜关键词
private int hot_value; //热度值
private Date time; //时间
private int changeCount; //据上分钟变化量
private String day;
public DouyinHotSearch(){}
public DouyinHotSearch(Integer position, String word, Integer hot_value) {
this.id = word + "_" + new Date().getTime();
this.position = position;
this.word = word;
this.hot_value = hot_value;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new DouyinHotSearch["
+ "id = " + id
+ ", position = " + position
+ ", word = " + word
+ ", hot_value = " + hot_value
+ ", time = " + time
+ ", changeCount = " + changeCount
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Integer getPosition() {
return position;
}
public void setPosition(Integer position) {
this.position = position;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getHot_value() {
return hot_value;
}
public void setHot_value(int hot_value) {
this.hot_value = hot_value;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
}
......@@ -12,7 +12,7 @@ import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class WeiboHotSearch implements Serializable{
public class HotSearchList implements Serializable{
private static final long serialVersionUID = 2076919584659821600L;
......@@ -34,10 +34,13 @@ public class WeiboHotSearch implements Serializable{
private int rank; //排名
private String type; //分类
public WeiboHotSearch(){}
public WeiboHotSearch(String url, String name, int count,boolean hot,int rank){
public HotSearchList(){}
public HotSearchList(String url, String name, int count,boolean hot,int rank,String type){
this.id = name + "_" + new Date().getTime();
this.url = url;
this.name = name;
......@@ -46,12 +49,26 @@ public class WeiboHotSearch implements Serializable{
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.type = type;
}
public HotSearchList(String url, String name, Integer count,int rank,String type){
this.id = name + "_" + new Date().getTime();
this.url = url;
this.name = name;
this.count = count;
this.hot = true;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.type = type;
}
@Override
public String toString(){
return "new WeiboHotSearch["
return "new HotSearchList["
+ "id = " + id
+ ", url = " + url
+ ", name = " + name
......@@ -61,6 +78,7 @@ public class WeiboHotSearch implements Serializable{
+ ", rank = " + rank
+ ", day = " + day
+ ", changeCount = " + changeCount
+ ", type = " + type
+ "]";
}
......@@ -140,10 +158,13 @@ public class WeiboHotSearch implements Serializable{
public void setRank(int rank) {
this.rank = rank;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
package com.zhiwei.searchhotcrawler.bean;
public enum HotSearchType {
百度热搜,
微博热搜,
知乎热搜,
抖音热搜,
搜狗微信热搜
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class SougoHotSearch implements Serializable{
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键(关键词+时间)
private String url; //主链接
private String everurl; //相关链接
private String kw; //关键词
private String day; //天
private Date time; //时间
private Integer rank; //排名
public SougoHotSearch() {}
public SougoHotSearch(Integer rank, String kw, String everurl) {
this.id = kw + "_" + new Date().getTime();
this.rank = rank;
this.kw = kw;
this.everurl = everurl;
this.rank = rank;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new SougoHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", everurl = " + everurl
+ ", kw = " + kw
+ ", day = " + day
+ ", time = " + time
+ ", rank = " + rank
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getEverurl() {
return everurl;
}
public void setEverurl(String everurl) {
this.everurl = everurl;
}
public String getKw() {
return kw;
}
public void setKw(String kw) {
this.kw = kw;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
public class ZhihuHotSearch implements Serializable{
private static final long serialVersionUID = -7707110236217797510L;
private String url; //消息链接
private String query; //热搜关键词
private String displayQuery; //热搜关键词
private Date time; //时间
public ZhihuHotSearch(){}
public ZhihuHotSearch(String url, String query, String displayQuery, Date time){
this.url = url;
this.query = query;
this.displayQuery = displayQuery;
this.time = time;
}
@Override
public String toString(){
return "new ZhihuHotSearch["
+ "url = " + url
+ ", query = " + query
+ ", displayQuery = " + displayQuery
+ ", time = " + time
+ "]";
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public String getDisplayQuery() {
return displayQuery;
}
public void setDisplayQuery(String displayQuery) {
this.displayQuery = displayQuery;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
}
......@@ -18,11 +18,7 @@ public class Config {
userPwd = conf.getProperty("db.paasword");
authDB = conf.getProperty("db.certifiedDB");
dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName");
collZhihuName = conf.getProperty("collZhihuName");
collBaiduName = conf.getProperty("collBaiduName");
collSougoName = conf.getProperty("collSougoName");
collDouyinName = conf.getProperty("collDouyinName");
collName = conf.getProperty("collName");
collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) {
......@@ -37,10 +33,6 @@ public class Config {
public static String userPwd;
public static String authDB;
public static String dbName;
public static String collWeiboName;
public static String collBaiduName;
public static String collZhihuName;
public static String collName;
public static String collWechatUserName;
public static String collSougoName;
public static String collDouyinName;
}
......@@ -14,7 +14,8 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
/**
* @ClassName:BaiDuHotSearch
......@@ -26,7 +27,7 @@ public class BaiDuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: BaiDuHotSearchTest
* @author hero
......@@ -34,7 +35,7 @@ public class BaiDuHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public static List<BaiDuHotSearch> baiduHotSearch() {
public static List<HotSearchList> baiduHotSearch() {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
......@@ -55,8 +56,8 @@ public class BaiDuHotSearchCrawler {
* @param htmlBody
* @return
*/
private static List<BaiDuHotSearch> ansysData(String htmlBody){
List<BaiDuHotSearch> list = new ArrayList<>();
private static List<HotSearchList> ansysData(String htmlBody){
List<HotSearchList> list = new ArrayList<>();
try {
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("table.list-table").select("tr");
......@@ -94,8 +95,7 @@ public class BaiDuHotSearchCrawler {
if (StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot);
}
BaiDuHotSearch hotSearch = new BaiDuHotSearch(rank, kw, everurl, count);
HotSearchList hotSearch = new HotSearchList(everurl, kw, count, rank, HotSearchType.百度热搜.name());
if (Objects.nonNull(rank)) {
list.add(hotSearch);
}
......
......@@ -12,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -34,13 +35,13 @@ public class DouyinHotSearchCrawler {
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<DouyinHotSearch> getMobileDouyinHotList(){
List<DouyinHotSearch> list = null;
public static List<HotSearchList> getMobileDouyinHotList(){
List<HotSearchList> list = null;
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")){
list = new ArrayList<DouyinHotSearch>();
list = new ArrayList<>();
JSONObject data = JSONObject.parseObject(htmlBody);
JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list");
String positionStr = null;
......@@ -59,7 +60,7 @@ public class DouyinHotSearchCrawler {
Integer hotValue = null;
hotValue = Integer.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value);
DouyinHotSearch douyin = new DouyinHotSearch(position, word, hotValue);
HotSearchList douyin = new HotSearchList(null,word, hotValue, position,HotSearchType.抖音热搜.name());
list.add(douyin);
}
}
......
package com.zhiwei.searchhotcrawler.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
/**
* @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集)
* @author hero
/**
* @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集)
* @author hero
* @date 2019年7月10日 上午10:54:31
*/
public class SougoHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: SougoHotSearchTest
* @author hero
* @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件
* @Title: SougoHotSearchTest
* @author hero
* @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<SougoHotSearch> sougoHotSearch(){
public static List<HotSearchList> sougoHotSearch() {
String url = "https://weixin.sogou.com";
List<SougoHotSearch> list = new ArrayList<SougoHotSearch>();
for(int i =0; i<3; i++){
List<HotSearchList> list = new ArrayList<>();
for (int i = 0; i < 3; i++) {
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("topwords")){
if (htmlBody != null && htmlBody.contains("topwords")) {
try {
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("ol#topwords").select("li");
for (Element element : elements) {
try {
//获取排名rank
// 获取排名rank
String rankStr = null;
if(!element.select("li").select("i").isEmpty()) {
if (!element.select("li").select("i").isEmpty()) {
rankStr = element.select("li").select("i").text();
}
Integer rank = null;
if(StringUtils.isNoneBlank(rankStr)) {
if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
//获取关键词(String)
// 获取关键词(String)
String kw = element.select("li").select("a").text();
logger.info("关键词:{}", kw);
//获取关键词相关链接everurl(String)
// 获取关键词相关链接everurl(String)
String everurl = element.select("li").select("a").attr("href");
SougoHotSearch hotSearch = new SougoHotSearch(rank,kw,everurl);
if(Objects.nonNull(rank)) {
HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name());
if (Objects.nonNull(rank)) {
list.add(hotSearch);
}
} catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误", e);
continue;
}
logger.error("解析搜狗微信时出现解析错误", e);
}
}
}catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,数据不是json结构",e.fillInStackTrace());
return null;
} catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace());
return Collections.emptyList();
}
}else{
} else {
logger.info("解析搜狗微信时出现解析错误,页面结构有问题");
}
}
break;
} catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
}
}
}
logger.info("此轮采集的数据量为:", list.size());
return list;
}
}
......@@ -17,7 +17,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil;
......@@ -38,10 +39,10 @@ public class WeiboHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public static List<WeiboHotSearch> weiboHotSearch(){
public static List<HotSearchList> weiboHotSearch(){
String url = "https://s.weibo.com/top/summary?cate=realtimehot";
List<WeiboHotSearch> list = new ArrayList<WeiboHotSearch>();
List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
......@@ -63,7 +64,7 @@ public class WeiboHotSearchCrawler {
int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank);
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount,true, rankCount);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name());
list.add(hotSearch);
} catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
......@@ -103,13 +104,13 @@ public class WeiboHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public static List<WeiboHotSearch> weiboHotSearchByPhone(){
public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "";
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Host", "mapi.weibo.com");
headerMap.put("User-Agent", "Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)");
List<WeiboHotSearch> result = new ArrayList<WeiboHotSearch>();
List<HotSearchList> result = new ArrayList<HotSearchList>();
String htmlBody;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
......@@ -133,7 +134,7 @@ public class WeiboHotSearchCrawler {
int rankCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, hot, rankCount);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rankCount, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch);
}
......
......@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
......@@ -13,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil;
......@@ -34,8 +34,8 @@ public class ZhihuHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public static List<ZhihuHotSearch> getZhihuHotList(){
List<ZhihuHotSearch> list = null;
public static List<HotSearchList> getZhihuHotList(){
List<HotSearchList> list = null;
String url = "https://www.zhihu.com/api/v4/search/top_search";
String rerferer = "https://www.zhihu.com/search?type=content&q=%E5%BF%AB%E6%89%8B";
Map<String,String> headerMap = HeaderTool.getCommonHead();
......@@ -47,22 +47,20 @@ public class ZhihuHotSearchCrawler {
headerMap.put("Referer", rerferer);
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody != null){
if(htmlBody.contains("words")){
list = new ArrayList<>();
JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
String link = null;
String displayQuery = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i);
query = word.getString("query");
displayQuery = word.getString("display_query");
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, displayQuery,new Date());
list.add(zhihu);
}
if(htmlBody != null && htmlBody.contains("words")){
list = new ArrayList<>();
JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
String link = null;
String displayQuery = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i);
query = word.getString("query");
displayQuery = word.getString("display_query");
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
}
} catch (IOException e) {
......@@ -80,8 +78,8 @@ public class ZhihuHotSearchCrawler {
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<ZhihuHotSearch> getMobileZhihuHotList(){
List<ZhihuHotSearch> list = null;
public static List<HotSearchList> getMobileZhihuHotList(){
List<HotSearchList> list = null;
String url = "https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0";
Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "api.zhihu.com");
......@@ -93,26 +91,20 @@ public class ZhihuHotSearchCrawler {
for(int j=0;j<3;j++){
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody != null){
if(htmlBody.contains("author")){
list = new ArrayList<ZhihuHotSearch>();
JSONObject top_search = JSONObject.parseObject(htmlBody);
JSONArray words = top_search.getJSONArray("data");
String link = null;
String display_query = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i).getJSONObject("target");
query = word.getString("title");
display_query = word.getString("title");
link = "https://www.zhihu.com/question/"+word.getLongValue("id");
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, display_query,new Date());
list.add(zhihu);
}
break;
}else{
System.out.println("---------------");
if(htmlBody != null && htmlBody.contains("author")){
list = new ArrayList<>();
JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = topSearch.getJSONArray("data");
String link = null;
String displayQuery = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i).getJSONObject("target");
displayQuery = word.getString("title");
link = "https://www.zhihu.com/question/"+word.getLongValue("id");
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
break;
}
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
......
package com.zhiwei.searchhotcrawler.dao;
import java.util.Calendar;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaiduHotSearchDAO extends MongoDBTemplate{
public BaiduHotSearchDAO() {
super();
super.setDbName(Config.dbName);
String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collBaiduName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collBaiduName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
}
/**
* @Title: addBaiduSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addBaiduSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param baiduHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(BaiDuHotSearch baiduHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("kw", baiduHotSearch.getKw());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = baiduHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getWeiboHotOneHour(){
// List<DBObject> list = new ArrayList<DBObject>();
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
// query.put("changeCount", 0);
//
// try {
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// String name = doc.get("name").toString();
// if(CacheManager.getCacheByKey(name)==null){
// CacheManager.putCache(name, doc, 48*60*60*1000);
// list.add(doc);
// }
// }
// cur.close();
// } catch (Exception e) {
// return null;
// }
// return list;
// }
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.Calendar;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DouyinHotSearchDAO extends MongoDBTemplate{
public DouyinHotSearchDAO() {
super();
super.setDbName(Config.dbName);
String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
}
@SuppressWarnings("deprecation")
public void addDouyinHotSearch(DBObject douyin){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(douyin,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param douyinHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(DouyinHotSearch douyinHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("word", douyinHotSearch.getWord());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = douyinHotSearch.getHot_value() - Integer.valueOf(doc.get("hot_value").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getDouyinHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getDouyinHotSearch(){
// List<DBObject> list = null;
// try {
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
//
// long count = this.getReadColl().count(query);
// if(count>0){
// list = new ArrayList<DBObject>();
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// list.add(doc);
// }
// cur.close();
// }
// return list;
// } catch (Exception e) {
// e.printStackTrace();
// return list;
// }
// }
}
......@@ -6,58 +6,72 @@ import java.util.Calendar;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.cache.CacheManager;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboHotSearchDAO extends MongoDBTemplate{
public WeiboHotSearchDAO() {
public class HotSearchListDAO extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
@SuppressWarnings("unused")
public HotSearchListDAO() {
super();
super.setDbName(Config.dbName);
String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collWeiboName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collWeiboName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
String year = time.substring(0,4);
String month = time.substring(5,7);
String collName = Config.collName + year + "_" + month;
super.setCollName(collName);
}
/**
* @Title: addWeiboHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
* 添加数据入库
* @param list
*/
public void addWeiboHotSearch(List<DBObject> list){
public void addHotSearchList(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
logger.error("存储数据时出错,错误为:{}", e);
}
}
}
public void addHotSearch(DBObject doc){
for(int i=0; i<3; i++){
try {
this.getReadColl().save(doc);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e);
}
}
}
/**
* 查询据上次变化量
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(WeiboHotSearch weiboHotSearch){
public int getChangeCount(HotSearchList weiboHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("name", weiboHotSearch.getName());
......@@ -72,7 +86,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
logger.error("存储数据时出错,错误为:{}", e);
return result;
}
return result;
......@@ -86,12 +100,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getWeiboHotOneHour(){
List<DBObject> list = new ArrayList<DBObject>();
public List<DBObject> getHotOneHour(String type){
List<DBObject> list = new ArrayList<>();
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
query.put("changeCount", 0);
query.put("type", type);
try {
DBCursor cur = this.getReadColl().find(query);
......@@ -105,7 +120,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
cur.close();
} catch (Exception e) {
return null;
logger.error("存储数据时出错,错误为:{}", e);
}
return list;
}
......
package com.zhiwei.searchhotcrawler.dao;
import java.util.List;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SougoHotSearchDAO extends MongoDBTemplate{
public SougoHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collSougoName);
}
/**
* @Title: addSougoHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public void addSougoSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param sougoHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
// public int getChangeCount(SougoHotSearch sougoHotSearch){
// int result = 0;
// DBObject query = new BasicDBObject();
// query.put("kw", sougoHotSearch.getKw());
// DBObject sort = new BasicDBObject();
// sort.put("time", -1);
// try {
// DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// result = sougoHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
// break;
// }
// cur.close();
// } catch (Exception e) {
// e.printStackTrace();
// return result;
// }
// return result;
// }
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.Collections;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
public class WechatUserDao extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
public WechatUserDao() {
super();
super.setDbName(Config.dbName);
......@@ -31,8 +39,7 @@ public class WechatUserDao extends MongoDBTemplate{
this.getReadColl().save(doc);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
logger.error("存储数据时出错,错误为:{}", e);
}
}
}
......@@ -54,9 +61,9 @@ public class WechatUserDao extends MongoDBTemplate{
return (List<String>)doc.get("user");
}
} catch (Exception e) {
return null;
logger.error("存储数据时出错,错误为:{}", e);
}
return null;
return Collections.emptyList();
}
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ZhihuHotSearchDAO extends MongoDBTemplate{
public ZhihuHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collZhihuName);
}
@SuppressWarnings("deprecation")
public void addZhiHuHotSearch(DBObject zhihu){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(zhihu,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getZhiHuHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getZhiHuHotSearch(){
List<DBObject> list = null;
try {
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
long count = this.getReadColl().count(query);
if(count>0){
list = new ArrayList<DBObject>();
DBCursor cur = this.getReadColl().find(query);
while(cur.hasNext()){
DBObject doc = cur.next();
list.add(doc);
}
cur.close();
}
return list;
} catch (Exception e) {
e.printStackTrace();
return list;
}
}
}
......@@ -29,13 +29,13 @@ public class MongoDBTemplate {
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
if(reader==null)
{
// reader = new MongoClient(address, Arrays.asList(credential));
reader = new MongoClient(address);
reader = new MongoClient(address, Arrays.asList(credential));
// reader = new MongoClient(address);
}
if(writer==null)
{
// writer = new MongoClient(address, Arrays.asList(credential));
writer = new MongoClient(address);
writer = new MongoClient(address, Arrays.asList(credential));
// writer = new MongoClient(address);
}
} catch (MongoException e) {
e.printStackTrace();
......
package com.zhiwei.searchhotcrawler.test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.tools.timeparse.TimeParse;
public class HotSearchListTest{
public static void main(String[] args) {
MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongo = new MongoClient(address, Arrays.asList(credential));
DB db = mongo.getDB("NetWork");
DBCollection coll = db.getCollection("weibo_hotsearch2018_10");
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
DB dbNew = mongoNew.getDB("hot_search_list");
Map<String,String> timLine = TimeParse.getTimeMap("2019-07-16 00:00:00", "2019-07-16 23:59:59", "HH", 1);
timLine.forEach((start, end) ->{
String year = end.substring(0,4);
String month = end.substring(5,7);
Date startDate = TimeParse.stringFormartDate(start);
Date endDate = TimeParse.stringFormartDate(end);
String collName = "hot_search_list"+year+"_"+month;
System.out.println("collName=========="+collName);
DBCollection collNew = dbNew.getCollection(collName);
DBObject query = new BasicDBObject(new BasicDBObject("time",
new BasicDBObject("$gte",startDate).append("$lte", endDate)));
DBCursor cur = coll.find(query);
System.out.println(query +"======="+ cur.count());
List<DBObject> dataList = new ArrayList<>();
int i = 0;
while(cur.hasNext()) {
DBObject doc = cur.next();
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", doc.get("_id"));
zhihu.put("name", doc.get("name"));
zhihu.put("url", doc.get("url"));
zhihu.put("count", doc.get("count"));
zhihu.put("hot", doc.get("hot"));
zhihu.put("day", doc.get("day"));
zhihu.put("time", doc.get("time"));
zhihu.put("changeCount", doc.get("changeCount"));
zhihu.put("rank", doc.get("rank"));
zhihu.put("type", HotSearchType.微博热搜.name());
collNew.save(zhihu);
dataList.add(zhihu);
}
// System.out.println(collName +"数据量大小" +dataList.size());
// cur.close();
// if(!dataList.isEmpty()) {
// collNew.insert(dataList);
// }
});
mongo.close();
}
}
......@@ -10,37 +10,39 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.BaiduHotSearchDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
private BaiduHotSearchDAO baiduHotSearchDAO = new BaiduHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
logger.info("百度风云榜采集开始........");
List<BaiDuHotSearch> list = BaiDuHotSearchCrawler.baiduHotSearch();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{
int changeCount = baiduHotSearchDAO.getChangeCount(baiduHotSearch);
int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getKw());
doc.put("url", baiduHotSearch.getEverurl());
doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank());
doc.put("type", HotSearchType.百度热搜.name());
saveDataList.add(doc);
});
}
baiduHotSearchDAO.addBaiduSearch(saveDataList);
hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........");
}
......
......@@ -9,34 +9,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.DouyinHotSearchDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
private DouyinHotSearchDAO douyinHotSearchDAO = new DouyinHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
logger.info("抖音热搜榜采集开始........");
List<DouyinHotSearch> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>();
for(DouyinHotSearch douyinHotSearch : list){
int changeCount = douyinHotSearchDAO.getChangeCount(douyinHotSearch);
List<DBObject> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getWord());
douyin.put("rank", douyinHotSearch.getPosition());
douyin.put("count", douyinHotSearch.getHot_value());
// douyin.put("url", douyinHotSearch.getUrl());
douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getCount());
douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount);
douyin.put("url", null);
douyin.put("type", HotSearchType.抖音热搜.name());
data.add(douyin);
douyinHotSearchDAO.addDouyinHotSearch(douyin);
hotSearchDAO.addHotSearch(douyin);
}
logger.info("抖音热搜榜采集结束........");
}
......
......@@ -12,7 +12,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
......@@ -20,10 +21,9 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread {
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class);
@Override
public void run() {
while (true) {
......@@ -32,8 +32,8 @@ public class SendWeiboHotSearchRun extends Thread {
int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("微博推送,当前系统时间为:" + hour);
if (hour > 6 && hour < 23) {
List<DBObject> list = weiboHotSearchDAO.getWeiboHotOneHour();
if (list != null && list.size() > 0) {
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
if (list != null && !list.isEmpty()) {
for (DBObject weibo : list) {
String title = weibo.get("name").toString();
String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
......
......@@ -11,8 +11,9 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
......@@ -20,7 +21,7 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override
......@@ -32,8 +33,8 @@ public class SendZhihuHotSearchRun extends Thread{
int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("知乎推送,当前系统时间为:"+hour);
if(hour > 6 && hour <23){
List<DBObject> list = zhihuHotSearchDAO.getZhiHuHotSearch();
if(list!=null && list.size()>0){
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
if(list!=null && !list.isEmpty()){
for(DBObject zhihu : list){
String title = zhihu.get("display_query").toString();
String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
......@@ -51,7 +52,6 @@ public class SendZhihuHotSearchRun extends Thread{
} catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000);
continue;
}
}
}
......@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{
*/
public static void sendTemplateByUserIds(String title,String time, String url) {
Map<String, Object> dataMap = new HashMap<String, Object>();
Map<String, Object> dataMap = new HashMap<>();
JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
dataMap.put("first", first);
......@@ -87,7 +87,7 @@ public class SendZhihuHotSearchRun extends Thread{
dataMap.put("remark", remark);
List<String> userList = getUserList();
if(userList!=null && userList.size()>0) {
if(userList!=null && !userList.isEmpty()) {
for (String openId : userList) {
Template template = new Template();
template.setTouser(openId);
......
......@@ -9,32 +9,34 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.SougoHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.SougoHotSearchDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
private SougoHotSearchDAO sougoHotSearchDAO = new SougoHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
logger.info("搜狗微信采集开始........");
List<SougoHotSearch> list = SougoHotSearchCrawler.sougoHotSearch();
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(SougoHotSearch sougoHotSearch : list){
for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getKw());
doc.put("url", sougoHotSearch.getEverurl());
doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getUrl());
doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime());
doc.put("rank", sougoHotSearch.getRank());
doc.put("type", HotSearchType.搜狗微信热搜.name());
data.add(doc);
}
sougoHotSearchDAO.addSougoSearch(data);
hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........");
}
......
......@@ -9,22 +9,23 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
private HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
logger.info("微博话题采集开始........");
List<WeiboHotSearch> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(WeiboHotSearch weiboHotSearch : list){
for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId());
......@@ -36,9 +37,10 @@ public class WeiboHotSearchRun extends Thread{
doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank());
doc.put("type", HotSearchType.微博热搜.name());
data.add(doc);
}
weiboHotSearchDAO.addWeiboHotSearch(data);
weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........");
}
......
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
......@@ -9,31 +8,36 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Override
public void run() {
logger.info("知乎话题采集开始........");
List<ZhihuHotSearch> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<ZhihuHotSearch> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>();
for(ZhihuHotSearch zhihuHotSearch : list){
for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getUrl());
zhihu.put("query", zhihuHotSearch.getQuery());
zhihu.put("display_query", zhihuHotSearch.getDisplayQuery());
zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot());
zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime());
data.add(zhihu);
zhihuHotSearchDAO.addZhiHuHotSearch(zhihu);
zhihu.put("changeCount", 0);
zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", HotSearchType.知乎热搜.name());
hotSearchDAO.addHotSearch(zhihu);
}
logger.info("知乎话题采集结束........");
}
......
#mongoIp=202.107.192.94
mongoIp=192.168.0.81
mongoPort=27017
mongoIp=192.168.0.101
mongoPort=30000
#mongoIp=192.168.0.81
#mongoPort=27017
db.username=zzwno
db.paasword=zzwno1q2w3e4r
db.certifiedDB=oneDB
dbName=NetWork
collWeiboName=weibo_hotsearch
collZhihuName=zhihu_hotsearch
collWechatUserName=wechat_user
collBaiduName=baidu_hotsearch
collSougoName=sougo_hotsearch
collDouyinName=douyin_hotsearch
\ No newline at end of file
db.certifiedDB=admin
dbName=hot_search_list
collName=hot_search_list
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment