Commit b528f200 by zhiwei

添加百度热搜、微信热搜、抖音热搜帮

parent ca20b119
...@@ -38,12 +38,12 @@ ...@@ -38,12 +38,12 @@
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.3.0-RELEASE</version> <version>0.5.2-RELEASE</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.2-SNAPSHOT</version> <version>0.1.4-SNAPSHOT</version>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -3,6 +3,8 @@ package com.zhiwei.searchhotcrawler.bean; ...@@ -3,6 +3,8 @@ package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable; import java.io.Serializable;
import java.util.Date; import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotSearch implements Serializable { public class DouyinHotSearch implements Serializable {
...@@ -22,6 +24,8 @@ public class DouyinHotSearch implements Serializable { ...@@ -22,6 +24,8 @@ public class DouyinHotSearch implements Serializable {
private int changeCount; //据上分钟变化量 private int changeCount; //据上分钟变化量
private String day;
public DouyinHotSearch(){} public DouyinHotSearch(){}
...@@ -31,6 +35,7 @@ public class DouyinHotSearch implements Serializable { ...@@ -31,6 +35,7 @@ public class DouyinHotSearch implements Serializable {
this.word = word; this.word = word;
this.hot_value = hot_value; this.hot_value = hot_value;
this.time = new Date(); this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
} }
@Override @Override
...@@ -104,4 +109,13 @@ public class DouyinHotSearch implements Serializable { ...@@ -104,4 +109,13 @@ public class DouyinHotSearch implements Serializable {
public void setChangeCount(int changeCount) { public void setChangeCount(int changeCount) {
this.changeCount = changeCount; this.changeCount = changeCount;
} }
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
} }
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -25,7 +25,8 @@ import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch; ...@@ -25,7 +25,8 @@ import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
public class BaiDuHotSearchCrawler { public class BaiDuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: BaiDuHotSearchTest * @Title: BaiDuHotSearchTest
* @author hero * @author hero
...@@ -33,88 +34,80 @@ public class BaiDuHotSearchCrawler { ...@@ -33,88 +34,80 @@ public class BaiDuHotSearchCrawler {
* @param 设定文件 * @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<BaiDuHotSearch> baiduHotSearch(){ public static List<BaiDuHotSearch> baiduHotSearch() {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex"; String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
List<BaiDuHotSearch> list = new ArrayList<BaiDuHotSearch>();
for(int i =0; i<3; i++){
String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("mainBody")){ if (htmlBody != null && htmlBody.contains("mainBody")) {
return ansysData(htmlBody);
} else {
logger.info("解析百度风云榜时出现解析错误,页面结构有问题");
}
} catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
}
return Collections.emptyList();
}
/**
* 解析数据
* @param htmlBody
* @return
*/
private static List<BaiDuHotSearch> ansysData(String htmlBody){
List<BaiDuHotSearch> list = new ArrayList<>();
try { try {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("table.list-table").select("tr"); Elements elements = document.select("table.list-table").select("tr");
for (Element element : elements) { if (Objects.nonNull(elements) && !elements.isEmpty()) {
elements.forEach(element -> {
try { try {
//获取排名rank // 获取排名rank
String rankStr = null; String rankStr = null;
//根据网页标签,给rankStr做判断 // 根据网页标签,给rankStr做判断
if(!element.select("td.first").select("span.num-top").isEmpty()) { if (!element.select("td.first").select("span.num-top").isEmpty()) {
rankStr = element.select("td.first").select("span.num-top").text(); rankStr = element.select("td.first").select("span.num-top").text();
}else if(!element.select("td.first").select("span.num-normal").isEmpty()) { } else if (!element.select("td.first").select("span.num-normal").isEmpty()) {
rankStr = element.select("td.first").select("span.num-normal").text(); rankStr = element.select("td.first").select("span.num-normal").text();
} }
Integer rank = null; Integer rank = null;
//判断rankStr是否为空 // 判断rankStr是否为空
if(StringUtils.isNoneBlank(rankStr)) { if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr); rank = Integer.valueOf(rankStr);
} }
// 获取关键词(String)
//获取id(主键String)
// String id = element.select("td.keyword").select("a").text() + "_" +
// TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss");
//获取关键词(String)
String kw = element.select("td.keyword").select("a.list-title").text(); String kw = element.select("td.keyword").select("a.list-title").text();
logger.info("关键词:{}", kw); logger.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
//获取关键词相关链接everurl(String)
String everurl = element.select("td.keyword").select("a.list-title").attr("href"); String everurl = element.select("td.keyword").select("a.list-title").attr("href");
// 获取搜索指数count(int)
//获取搜索指数count(int)
String hot = null; String hot = null;
//判断热度值所在的规则是否为null // 判断热度值所在的规则是否为null
if(!element.select("td.last").select("span.icon-fall").isEmpty()) { if (!element.select("td.last").select("span.icon-fall").isEmpty()) {
hot = element.select("td.last").select("span.icon-fall").text(); hot = element.select("td.last").select("span.icon-fall").text();
}else if(!element.select("td.last").select("span.icon-rise").isEmpty()) { } else if (!element.select("td.last").select("span.icon-rise").isEmpty()) {
hot = element.select("td.last").select("span.icon-rise").text(); hot = element.select("td.last").select("span.icon-rise").text();
} }
int count = 0; int count = 0;
//判断hot是否为空 // 判断hot是否为空
if(StringUtils.isNotBlank(hot)) { if (StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot); count = Integer.valueOf(hot);
} }
BaiDuHotSearch hotSearch = new BaiDuHotSearch(rank, kw, everurl, count);
BaiDuHotSearch hotSearch = new BaiDuHotSearch(rank,kw,everurl,count); if (Objects.nonNull(rank)) {
if(Objects.nonNull(rank)) {
list.add(hotSearch); list.add(hotSearch);
} }
} catch (Exception e) { } catch (Exception e) {
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger.error("解析百度风云榜时出现解析错误", e); logger.error("解析百度风云榜时出现解析错误", e);
continue;
}
} }
}catch (Exception e) { });
logger.error("解析百度风云榜时出现解析错误,数据不是json结构",e.fillInStackTrace());
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
return null;
} }
}else{
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger.info("解析百度风云榜时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,页面结构有问题", e); logger.error("解析百度风云榜时出现解析错误,数据不是json结构", e);
} }
}
logger.info("次轮采集的数据量为:", list.size());
return list; return list;
} }
} }
\ No newline at end of file
...@@ -4,6 +4,7 @@ import java.io.IOException; ...@@ -4,6 +4,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -24,7 +25,7 @@ import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch; ...@@ -24,7 +25,7 @@ import com.zhiwei.searchhotcrawler.bean.DouyinHotSearch;
public class DouyinHotSearchCrawler { public class DouyinHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: getMobileDouyinHotList * @Title: getMobileDouyinHotList
...@@ -36,53 +37,34 @@ public class DouyinHotSearchCrawler { ...@@ -36,53 +37,34 @@ public class DouyinHotSearchCrawler {
public static List<DouyinHotSearch> getMobileDouyinHotList(){ public static List<DouyinHotSearch> getMobileDouyinHotList(){
List<DouyinHotSearch> list = null; List<DouyinHotSearch> list = null;
String url = "https://api.amemv.com/aweme/v1/hot/search/list/"; String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
// Map<String,String> headerMap = HeaderTool.getCommonHead();
// headerMap.put("Host", "api.zhihu.com");
// headerMap.put("Referer", url);
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36");
// headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
// headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
for(int j=0;j<3;j++){
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody != null){ if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")){
if(htmlBody.contains("word_list")){
list = new ArrayList<DouyinHotSearch>(); list = new ArrayList<DouyinHotSearch>();
JSONObject data = JSONObject.parseObject(htmlBody); JSONObject data = JSONObject.parseObject(htmlBody);
JSONArray word_list = data.getJSONObject("data").getJSONArray("word_list"); JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list");
String positionStr = null; String positionStr = null;
String word = null; String word = null;
String hot_value_str = null; String hotValueStr = null;
for (int i = 0; i < word_list.size(); i++) { for (int i = 0; i < wordList.size(); i++) {
JSONObject wl = word_list.getJSONObject(i); JSONObject wl = wordList.getJSONObject(i);
//获取排名 //获取排名
positionStr = wl.getString("position"); positionStr = wl.getString("position");
Integer position = null; Integer position = null;
position = Integer.valueOf(positionStr); position = Integer.valueOf(positionStr);
//获取关键词 //获取关键词
word = wl.getString("word"); word = wl.getString("word");
//获取热度值 //获取热度值
hot_value_str =wl.getString("hot_value"); hotValueStr =wl.getString("hot_value");
Integer hot_value = null; Integer hotValue = null;
hot_value = Integer.valueOf(hot_value_str); hotValue = Integer.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value);
logger.info("热度为:::{}", hot_value); DouyinHotSearch douyin = new DouyinHotSearch(position, word, hotValue);
DouyinHotSearch douyin = new DouyinHotSearch(position, word, hot_value);
list.add(douyin); list.add(douyin);
} }
break;
}else{
System.out.println("---------------");
}
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取抖音热搜榜时出现问题:{}", e.fillInStackTrace()); logger.debug("获取抖音热搜榜时出现问题:{}", e);
continue;
}
} }
return list; return list;
} }
......
...@@ -25,7 +25,7 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -25,7 +25,7 @@ package com.zhiwei.searchhotcrawler.crawler;
public class SougoHotSearchCrawler { public class SougoHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: SougoHotSearchTest * @Title: SougoHotSearchTest
* @author hero * @author hero
......
...@@ -30,7 +30,7 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -30,7 +30,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
public class WeiboHotSearchCrawler { public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
* @author hero * @author hero
......
...@@ -26,7 +26,7 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -26,7 +26,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
public class ZhihuHotSearchCrawler { public class ZhihuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class); private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: getZhihuHotList * @Title: getZhihuHotList
* @author hero * @author hero
...@@ -45,32 +45,28 @@ public class ZhihuHotSearchCrawler { ...@@ -45,32 +45,28 @@ public class ZhihuHotSearchCrawler {
headerMap.put("accept", "application/json, text/plain, */*"); headerMap.put("accept", "application/json, text/plain, */*");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20"); headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
headerMap.put("Referer", rerferer); headerMap.put("Referer", rerferer);
for(int j=0;j<3;j++){
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody != null){ if(htmlBody != null){
if(htmlBody.contains("words")){ if(htmlBody.contains("words")){
list = new ArrayList<ZhihuHotSearch>(); list = new ArrayList<>();
JSONObject top_search = JSONObject.parseObject(htmlBody); JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = top_search.getJSONObject("top_search").getJSONArray("words"); JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
String link = null; String link = null;
String display_query = null; String displayQuery = null;
String query = null; String query = null;
for (int i = 0; i < words.size(); i++) { for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i); JSONObject word = words.getJSONObject(i);
query = word.getString("query"); query = word.getString("query");
display_query = word.getString("display_query"); displayQuery = word.getString("display_query");
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content"; link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, display_query,new Date()); ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, displayQuery,new Date());
list.add(zhihu); list.add(zhihu);
} }
break;
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace()); logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
continue;
}
} }
return list; return list;
} }
...@@ -120,7 +116,6 @@ public class ZhihuHotSearchCrawler { ...@@ -120,7 +116,6 @@ public class ZhihuHotSearchCrawler {
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace()); logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
continue;
} }
} }
return list; return list;
......
package com.zhiwei.searchhotcrawler.dao; package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList; import java.util.Calendar;
import java.util.Date;
import java.util.List; import java.util.List;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor; import com.mongodb.DBCursor;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch; import com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch;
import com.zhiwei.searchhotcrawler.cache.CacheManager;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -18,15 +16,13 @@ public class BaiduHotSearchDAO extends MongoDBTemplate{ ...@@ -18,15 +16,13 @@ public class BaiduHotSearchDAO extends MongoDBTemplate{
public BaiduHotSearchDAO() { public BaiduHotSearchDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
// Date date = new Date(); String collWeiboName;
// String time = TimeParse.dateFormartString(date, "yyyy"); if(Calendar.MONTH<6){
// if(Calendar.MONTH<6){ collWeiboName = Config.collBaiduName + Calendar.YEAR +"_01";
// collWeiboName = Config.collWeiboName + time+"_01"; }else{
// }else{ collWeiboName = Config.collBaiduName + Calendar.YEAR +"_06";
// collWeiboName = Config.collWeiboName + time+"_06"; }
// } super.setCollName(collWeiboName);
// System.out.println("collWeiboName========="+collWeiboName);
super.setCollName(Config.collBaiduName);
} }
/** /**
......
package com.zhiwei.searchhotcrawler.dao; package com.zhiwei.searchhotcrawler.dao;
import java.util.Calendar;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor; import com.mongodb.DBCursor;
import com.mongodb.DBObject; import com.mongodb.DBObject;
...@@ -14,7 +16,13 @@ public class DouyinHotSearchDAO extends MongoDBTemplate{ ...@@ -14,7 +16,13 @@ public class DouyinHotSearchDAO extends MongoDBTemplate{
public DouyinHotSearchDAO() { public DouyinHotSearchDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
super.setCollName(Config.collDouyinName); String collWeiboName;
if(Calendar.MONTH<6){
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_01";
}else{
collWeiboName = Config.collDouyinName + Calendar.YEAR +"_06";
}
super.setCollName(collWeiboName);
} }
......
...@@ -12,14 +12,6 @@ public class SougoHotSearchDAO extends MongoDBTemplate{ ...@@ -12,14 +12,6 @@ public class SougoHotSearchDAO extends MongoDBTemplate{
public SougoHotSearchDAO() { public SougoHotSearchDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
// Date date = new Date();
// String time = TimeParse.dateFormartString(date, "yyyy");
// if(Calendar.MONTH<6){
// collWeiboName = Config.collWeiboName + time+"_01";
// }else{
// collWeiboName = Config.collWeiboName + time+"_06";
// }
// System.out.println("collWeiboName========="+collWeiboName);
super.setCollName(Config.collSougoName); super.setCollName(Config.collSougoName);
} }
...@@ -38,7 +30,6 @@ public class SougoHotSearchDAO extends MongoDBTemplate{ ...@@ -38,7 +30,6 @@ public class SougoHotSearchDAO extends MongoDBTemplate{
break; break;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
continue;
} }
} }
} }
......
...@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.dao; ...@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
...@@ -19,15 +20,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -19,15 +20,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
public WeiboHotSearchDAO() { public WeiboHotSearchDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
// Date date = new Date(); String collWeiboName;
// String time = TimeParse.dateFormartString(date, "yyyy"); if(Calendar.MONTH<6){
// if(Calendar.MONTH<6){ collWeiboName = Config.collWeiboName + Calendar.YEAR +"_01";
// collWeiboName = Config.collWeiboName + time+"_01"; }else{
// }else{ collWeiboName = Config.collWeiboName + Calendar.YEAR +"_06";
// collWeiboName = Config.collWeiboName + time+"_06"; }
// } super.setCollName(collWeiboName);
// System.out.println("collWeiboName========="+collWeiboName);
super.setCollName(Config.collWeiboName);
} }
/** /**
......
...@@ -21,14 +21,14 @@ public class HotSearchRun { ...@@ -21,14 +21,14 @@ public class HotSearchRun {
private ScheduledExecutorService scheduExec; private ScheduledExecutorService scheduExec;
public HotSearchRun() { public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(2); this.scheduExec = Executors.newScheduledThreadPool(5);
} }
public void showTimer() { public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new BaiduHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 1 , TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new SougoHotSearchRun(), 0, 5 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new DouyinHotSearchRun(), 0, 10 , TimeUnit.MINUTES);
} }
......
...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer; ...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -22,22 +23,24 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -22,22 +23,24 @@ public class BaiduHotSearchRun extends Thread{
public void run() { public void run() {
logger.info("百度风云榜采集开始........"); logger.info("百度风云榜采集开始........");
List<BaiDuHotSearch> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<BaiDuHotSearch> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>(); List<DBObject> saveDataList = new ArrayList<>();
for(BaiDuHotSearch baiduHotSearch : list){ if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{
int changeCount = baiduHotSearchDAO.getChangeCount(baiduHotSearch); int changeCount = baiduHotSearchDAO.getChangeCount(baiduHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId()); doc.put("_id", baiduHotSearch.getId());
doc.put("kw", baiduHotSearch.getKw()); doc.put("name", baiduHotSearch.getKw());
doc.put("everurl", baiduHotSearch.getEverurl()); doc.put("url", baiduHotSearch.getEverurl());
doc.put("count", baiduHotSearch.getCount()); doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay()); doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime()); doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank()); doc.put("rank", baiduHotSearch.getRank());
data.add(doc); saveDataList.add(doc);
});
} }
baiduHotSearchDAO.addBaiduSearch(data); baiduHotSearchDAO.addBaiduSearch(saveDataList);
logger.info("百度风云榜采集结束........"); logger.info("百度风云榜采集结束........");
} }
......
...@@ -28,10 +28,11 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -28,10 +28,11 @@ public class DouyinHotSearchRun extends Thread{
int changeCount = douyinHotSearchDAO.getChangeCount(douyinHotSearch); int changeCount = douyinHotSearchDAO.getChangeCount(douyinHotSearch);
DBObject douyin = new BasicDBObject(); DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId()); douyin.put("_id", douyinHotSearch.getId());
douyin.put("word", douyinHotSearch.getWord()); douyin.put("name", douyinHotSearch.getWord());
douyin.put("position", douyinHotSearch.getPosition()); douyin.put("rank", douyinHotSearch.getPosition());
douyin.put("hot_value", douyinHotSearch.getHot_value()); douyin.put("count", douyinHotSearch.getHot_value());
// douyin.put("url", douyinHotSearch.getUrl()); // douyin.put("url", douyinHotSearch.getUrl());
douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime()); douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount); douyin.put("changeCount", changeCount);
data.add(douyin); data.add(douyin);
......
...@@ -23,17 +23,14 @@ public class SougoHotSearchRun extends Thread { ...@@ -23,17 +23,14 @@ public class SougoHotSearchRun extends Thread {
logger.info("搜狗微信采集开始........"); logger.info("搜狗微信采集开始........");
List<SougoHotSearch> list = SougoHotSearchCrawler.sougoHotSearch(); List<SougoHotSearch> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>(); List<DBObject> data = new ArrayList<>();
for(SougoHotSearch sougoHotSearch : list){ for(SougoHotSearch sougoHotSearch : list){
// int changeCount = baiduHotSearchDAO.getChangeCount(sougoHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", sougoHotSearch.getId()); doc.put("_id", sougoHotSearch.getId());
doc.put("kw", sougoHotSearch.getKw()); doc.put("name", sougoHotSearch.getKw());
doc.put("everurl", sougoHotSearch.getEverurl()); doc.put("url", sougoHotSearch.getEverurl());
// doc.put("count", baiduHotSearch.getCount());
doc.put("day", sougoHotSearch.getDay()); doc.put("day", sougoHotSearch.getDay());
doc.put("time", sougoHotSearch.getTime()); doc.put("time", sougoHotSearch.getTime());
// doc.put("changeCount", changeCount);
doc.put("rank", sougoHotSearch.getRank()); doc.put("rank", sougoHotSearch.getRank());
data.add(doc); data.add(doc);
} }
......
...@@ -23,7 +23,7 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -23,7 +23,7 @@ public class WeiboHotSearchRun extends Thread{
logger.info("微博话题采集开始........"); logger.info("微博话题采集开始........");
List<WeiboHotSearch> list = WeiboHotSearchCrawler.weiboHotSearch(); List<WeiboHotSearch> list = WeiboHotSearchCrawler.weiboHotSearch();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<DBObject>(); List<DBObject> data = new ArrayList<>();
for(WeiboHotSearch weiboHotSearch : list){ for(WeiboHotSearch weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch); int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
......
...@@ -9,17 +9,18 @@ import org.apache.commons.lang3.StringUtils; ...@@ -9,17 +9,18 @@ import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.dubbo.rpc.protocol.rest.support.ContentType;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.utils.RequestUtils.HttpMethod;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.MediaType;
import okhttp3.RequestBody;
public class WechatCodeUtil { public class WechatCodeUtil {
private static Logger logger = LoggerFactory.getLogger(WechatCodeUtil.class); private static Logger logger = LoggerFactory.getLogger(WechatCodeUtil.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: getToken * @Title: getToken
* @author hero * @author hero
...@@ -65,7 +66,8 @@ public class WechatCodeUtil { ...@@ -65,7 +66,8 @@ public class WechatCodeUtil {
int msgid = 0; int msgid = 0;
String url = WechatConstant.WECHAT_TEMPLET_SEND_URL.replace("ACCESS_TOKEN", getToken()); String url = WechatConstant.WECHAT_TEMPLET_SEND_URL.replace("ACCESS_TOKEN", getToken());
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,"application/json",templateJson.toJSONString())).body().string(); RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), templateJson.toJSONString());
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string();
if(StringUtils.isNotBlank(htmlBody)) { if(StringUtils.isNotBlank(htmlBody)) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody); JSONObject jsonObject = JSONObject.parseObject(htmlBody);
if (null != jsonObject) { if (null != jsonObject) {
...@@ -103,8 +105,8 @@ public class WechatCodeUtil { ...@@ -103,8 +105,8 @@ public class WechatCodeUtil {
JSONObject postData = new JSONObject(); JSONObject postData = new JSONObject();
postData.put("tagid", getGroupIp(groupName)); postData.put("tagid", getGroupIp(groupName));
postData.put("next_openid", ""); postData.put("next_openid", "");
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,"application/json",postData.toJSONString())).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) { if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody); JSONObject jsonObject = JSONObject.parseObject(htmlBody);
if (null != jsonObject) { if (null != jsonObject) {
...@@ -136,7 +138,8 @@ public class WechatCodeUtil { ...@@ -136,7 +138,8 @@ public class WechatCodeUtil {
JSONObject postData = new JSONObject(); JSONObject postData = new JSONObject();
postData.put("tagid", groupId); postData.put("tagid", groupId);
postData.put("next_openid", ""); postData.put("next_openid", "");
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,"application/json",postData.toJSONString())).body().string(); RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) { if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody); JSONObject jsonObject = JSONObject.parseObject(htmlBody);
if (null != jsonObject) { if (null != jsonObject) {
......
#mongoIp=202.107.192.94 #mongoIp=202.107.192.94
mongoIp=192.168.0.247 mongoIp=192.168.0.81
mongoPort=27017 mongoPort=27017
db.username=zzwno db.username=zzwno
db.paasword=zzwno1q2w3e4r db.paasword=zzwno1q2w3e4r
db.certifiedDB=oneDB db.certifiedDB=oneDB
dbName=NetWork dbName=NetWork
collWeiboName=weibo_hotsearch2018_10 collWeiboName=weibo_hotsearch
collZhihuName=zhihu_hotsearch2018_10 collZhihuName=zhihu_hotsearch
collWechatUserName=wechat_user collWechatUserName=wechat_user
collBaiduName=baidu_hotsearch2019_07 collBaiduName=baidu_hotsearch
collSougoName=sougo_hotsearch2019_07 collSougoName=sougo_hotsearch
collDouyinName=douyin_hotsearch2019_07 collDouyinName=douyin_hotsearch
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment