Commit 9ce337bb by zhiwei

添加总热搜表缓存+mongo修改为多节点读取+微博话题类型错误修复

parent dd95f27b
package com.zhiwei.searchhotcrawler.bean;
import lombok.Data;
import lombok.ToString;
import java.util.Date;
@ToString
@Data
public class HotSearchCache {
/**
* 主键
*/
private String id;
/**
* 消息链接
*/
private String url;
/**
* 热搜关键词,且为消息主键
*/
private String name;
/**
* 热搜或话题导语
*/
private String topicLead;
/**
* 最高热搜值
*/
private Integer highestCount;
/**
* 最新热搜热度值
*/
private Integer lastCount;
/**
* 状态(true 为热搜; false为时时上升)
*/
private Boolean hot;
/**
* 话题开始时间
*/
private Date startTime;
/**
* 话题结束时间
*/
private Date endTime;
/**
* 最高排名
*/
private Integer highestRank;
/**
* 最新排名
*/
private Integer lastRank;
/**
* 热搜分类
*/
private String type;
/**
* 热搜持续时长
*/
private Integer duration;
public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot,
Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){
this.id = name + "_" + type;
this.url = url;
this.name = name;
this.topicLead = topicLead;
this.hot = hot;
this.highestCount = highestCount;
this.lastCount = lastCount;
this.hot = hot;
this.startTime = startTime;
this.endTime = endTime;
this.highestRank = highestRank;
this.lastRank = lastRank;
this.type = type;
this.duration = duration;
}
}
...@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{ ...@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
private String topicLead; private String topicLead;
/** /**
* 时时热搜量 * 热搜量
*/ */
private Integer count; private Integer count;
...@@ -60,11 +60,6 @@ public class HotSearchList implements Serializable{ ...@@ -60,11 +60,6 @@ public class HotSearchList implements Serializable{
private Date time; private Date time;
/** /**
* 据上分钟变化量
*/
private Integer changeCount;
/**
* 排名 * 排名
*/ */
private Integer rank; private Integer rank;
......
...@@ -3,7 +3,7 @@ package com.zhiwei.searchhotcrawler.config; ...@@ -3,7 +3,7 @@ package com.zhiwei.searchhotcrawler.config;
import java.io.InputStream; import java.io.InputStream;
import java.util.Properties; import java.util.Properties;
public class Config { public class DBConfig {
static { static {
Properties conf = null; Properties conf = null;
try { try {
...@@ -12,29 +12,22 @@ public class Config { ...@@ -12,29 +12,22 @@ public class Config {
conf = new Properties(); conf = new Properties();
conf.load(is); conf.load(is);
is.close(); is.close();
mongoIp = conf.getProperty("mongoIp"); mongoUri = conf.getProperty("mongoUri");
mongoPort = Integer.valueOf(conf.getProperty("mongoPort"));
userName = conf.getProperty("db.username");
userPwd = conf.getProperty("db.paasword");
authDB = conf.getProperty("db.certifiedDB");
dbName = conf.getProperty("dbName"); dbName = conf.getProperty("dbName");
searchCollName = conf.getProperty("searchCollName"); searchCollName = conf.getProperty("searchCollName");
searchCacheCollName = conf.getProperty("searchCacheCollName");
topicCollName = conf.getProperty("topicCollName"); topicCollName = conf.getProperty("topicCollName");
collWechatUserName = conf.getProperty("collWechatUserName"); collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
public static String mongoIp; public static String mongoUri;
public static int mongoPort;
public static String userName;
public static String userPwd;
public static String authDB;
public static String dbName; public static String dbName;
public static String searchCollName; public static String searchCollName;
public static String searchCacheCollName;
public static String topicCollName; public static String topicCollName;
public static String collWechatUserName; public static String collWechatUserName;
} }
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.*;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
...@@ -107,6 +103,8 @@ public class WeiboHotSearchCrawler { ...@@ -107,6 +103,8 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> weiboHotSearchByPhone(){ public static List<HotSearchList> weiboHotSearchByPhone(){
for(int count =0; count<=5; count++){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"; String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>(); Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
...@@ -123,9 +121,10 @@ public class WeiboHotSearchCrawler { ...@@ -123,9 +121,10 @@ public class WeiboHotSearchCrawler {
try { try {
JSONObject card = cards.getJSONObject(i); JSONObject card = cards.getJSONObject(i);
JSONArray cardGroup = card.getJSONArray("card_group"); JSONArray cardGroup = card.getJSONArray("card_group");
if(Objects.nonNull(cardGroup) && !cardGroup.isEmpty()){
String title = card.getString("title"); String title = card.getString("title");
boolean hot = true; boolean hot = true;
if(title.contains("实时上升热点")){ if(Objects.nonNull(title) && title.contains("实时上升热点")){
hot = false; hot = false;
rank = 50; rank = 50;
} }
...@@ -142,6 +141,9 @@ public class WeiboHotSearchCrawler { ...@@ -142,6 +141,9 @@ public class WeiboHotSearchCrawler {
result.add(hotSearch); result.add(hotSearch);
rank++; rank++;
} }
}else{
log.info("card 数据结构为:{}", card);
}
} catch (Exception e) { } catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误",e); log.error("解析微博时时热搜时出现解析错误",e);
continue; continue;
...@@ -150,14 +152,13 @@ public class WeiboHotSearchCrawler { ...@@ -150,14 +152,13 @@ public class WeiboHotSearchCrawler {
return result; return result;
} catch (Exception e) { } catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e); log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e);
return Collections.emptyList();
} }
}else{ }else{
log.info("解析微博时时热搜时出现解析错误,页面结构有问题"); log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
} catch (IOException e1) { } catch (IOException e1) {
log.error("解析微博时时热搜时出现连接失败",e1); log.error("解析微博时时热搜时出现连接失败",e1);
return Collections.emptyList(); }
} }
return Collections.emptyList(); return Collections.emptyList();
} }
......
...@@ -61,7 +61,7 @@ public class WeiboSuperTopicCrawler { ...@@ -61,7 +61,7 @@ public class WeiboSuperTopicCrawler {
//重试三次 //重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) { for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
try { try {
System.out.println("pageUrl=========="+pageUrl); // System.out.println("pageUrl=========="+pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl, headMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl, headMap), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) { if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) {
topicList.addAll(parseTopicRankHtml(page, htmlBody, type)); topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
......
...@@ -134,7 +134,7 @@ public class WeiboTopicCrawler { ...@@ -134,7 +134,7 @@ public class WeiboTopicCrawler {
//重试三次 //重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) { for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try { try {
log.info("pageUrl::{}", pageUrl); // log.info("pageUrl::{}", pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("top_mark_text")) { if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("top_mark_text")) {
topicList.addAll(parseTopicHtml(htmlBody)); topicList.addAll(parseTopicHtml(htmlBody));
...@@ -202,7 +202,7 @@ public class WeiboTopicCrawler { ...@@ -202,7 +202,7 @@ public class WeiboTopicCrawler {
} }
return topicList; return topicList;
}else{ }else{
log.info("html:{}",htmlBody); // log.info("html:{}",htmlBody);
} }
} catch (Exception e) { } catch (Exception e) {
log.error("解析榜单列表页面时出现错误,错误为:{}", e); log.error("解析榜单列表页面时出现错误,错误为:{}", e);
......
package com.zhiwei.searchhotcrawler.dao;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.client.MongoCollection;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.Date;
import java.util.List;
import java.util.Objects;
/**
* 热搜基础结果表
*/
@Log4j2
public class HotSearchCacheDAO {
private static MongoCollection collection = MongoDBTemplate.getCollection(DBConfig.dbName, DBConfig.searchCacheCollName);
/**
* 添加及更新相应数据表中的数据
* @param document
*/
public void addAndUpdateData(Document document){
try {
String name = document.getString("name");
String type = document.getString("type");
int lastRank = document.getInteger("rank")!=null?document.getInteger("rank"): -1;
int lastCount = document.getInteger("count")!=null?document.getInteger("count"): -1;
Date startTime = document.getDate("time");
Date endTime = new Date(startTime.getTime() + (60 * 1000));
String topicLead = document.getString("topic_lead")!=null?document.getString("topic_lead"):null;
boolean hot = document.getBoolean("hot")!=null?document.getBoolean("hot"):true;
String url = document.getString("url")!=null?document.getString("url"):null;
String id = name + "_" + type;
Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first();
if (Objects.nonNull(nowDoc)) {
int highestRank = nowDoc.getInteger("highestRank");
int highestCount = nowDoc.getInteger("highestCount");
//判断最大热度值
if (lastCount > highestCount) {
highestCount = lastCount;
}
//判断最高排名
if (lastRank < highestRank) {
highestRank = lastRank;
}
//计算热搜时长
int duration = nowDoc.getInteger("duration");
int durationNow = getDuration(type, duration);
endTime = new Date(new Date().getTime() + (60 * 1000));
//更新相应信息
nowDoc.put("endTime", endTime);
nowDoc.put("lastRank", lastRank);
nowDoc.put("lastCount", lastCount);
nowDoc.put("highestRank", highestRank);
nowDoc.put("highestCount", highestCount);
nowDoc.put("duration", durationNow);
collection.replaceOne(query, nowDoc);
} else {
nowDoc = new Document();
int durationNow = getDuration(type, 0);
nowDoc.put("_id", id);
nowDoc.put("url", url);
nowDoc.put("name", name);
nowDoc.put("hot", hot);
nowDoc.put("topicLead", topicLead);
nowDoc.put("type", type);
nowDoc.put("lastRank", lastRank);
nowDoc.put("highestRank", lastRank);
nowDoc.put("lastCount", lastCount);
nowDoc.put("highestCount", lastCount);
nowDoc.put("startTime", startTime);
nowDoc.put("endTime", endTime);
nowDoc.put("duration", durationNow);
collection.insertOne(nowDoc);
}
}catch (Exception e){
log.info("数据存储时出错:{}", e);
}
}
/**
* 计算热搜时长
* @param type
* @param duration
* @return
*/
private int getDuration(String type, int duration){
switch (type){
case "微博热搜" :
duration = duration + 1;
break;
case "百度热搜" :
duration = duration + 5;
break;
case "知乎热搜" :
duration = duration + 10;
break;
case "抖音热搜" :
duration = duration + 10;
break;
case "搜狗微信热搜" :
duration = duration + 5;
break;
case "微博话题" :
duration = duration + 3;
break;
default :
duration = duration + 1;
}
return duration;
}
}
...@@ -6,7 +6,12 @@ import java.util.Date; ...@@ -6,7 +6,12 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import com.zhiwei.searchhotcrawler.config.Config; import com.mongodb.client.ListIndexesIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.IndexOptions;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
...@@ -14,138 +19,50 @@ import com.mongodb.DBCursor; ...@@ -14,138 +19,50 @@ import com.mongodb.DBCursor;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.cache.CacheManager; import com.zhiwei.searchhotcrawler.cache.CacheManager;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import org.bson.BsonDocument;
import org.bson.Document;
import org.bson.conversions.Bson;
@Log4j2 @Log4j2
public class HotSearchListDAO extends MongoDBTemplate{ public class HotSearchListDAO{
public static MongoDatabase mongoDatabase = MongoDBTemplate.getDB(DBConfig.dbName);
public static MongoCollection mongoCollection;
public HotSearchListDAO() { public HotSearchListDAO() {
super();
super.setDbName(Config.dbName);
String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd"); String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
String year = time.substring(0,4); String year = time.substring(0,4);
String month = time.substring(5,7); String month = time.substring(5,7);
String collName = Config.searchCollName + year + "_" + month; String collName = DBConfig.searchCollName + year + "_" + month;
super.setCollName(collName); mongoCollection = mongoDatabase.getCollection(collName);
//给数据表创建索引 //给数据表创建索引
createIndex(); MongoDBTemplate.createIndex(DBConfig.dbName, collName);
} }
/**
* 初次创建表及创建相应的索引
*/
private void createIndex(){
List<DBObject> indexList = this.getReadColl().getIndexInfo();
if(Objects.isNull(indexList) && indexList.isEmpty()){
DBObject countIndexDoc = new BasicDBObject();
countIndexDoc.put("count", -1);
DBObject timeIndexDoc = new BasicDBObject();
timeIndexDoc.put("time", -1);
DBObject rankIndexDoc = new BasicDBObject();
rankIndexDoc.put("rank", -1);
DBObject nameIndexDoc = new BasicDBObject();
nameIndexDoc.put("name", -1);
DBObject typeIndexDoc = new BasicDBObject();
typeIndexDoc.put("type", -1);
try {
super.getReadColl().createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
super.getReadColl().createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
super.getReadColl().createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
super.getReadColl().createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
super.getReadColl().createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
} catch (Exception e) {
e.printStackTrace();
}
}
}
/** /**
* 添加数据入库 * 添加数据入库
* @param list * @param list
*/ */
public void addHotSearchList(List<DBObject> list){ public void addHotSearchList(List<Document> list){
try {
this.getReadColl().insert(list);
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e);
}
}
public void addHotSearch(DBObject doc){
try {
this.getReadColl().insert(doc);
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e);
}
}
/**
* 查询据上次变化量
* @Title: getChangeCount
* @author hero
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(HotSearchList weiboHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("name", weiboHotSearch.getName());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try { try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1); mongoCollection.insertMany(list);
while(cur.hasNext()){
DBObject doc = cur.next();
if(doc.get("count")!=null) {
result = weiboHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
break;
}
}
cur.close();
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
return result;
} }
return result;
} }
public void addHotSearch(Document doc){
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getHotOneHour(String type){
List<DBObject> list = new ArrayList<>();
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
query.put("changeCount", 0);
query.put("type", type);
try { try {
DBCursor cur = this.getReadColl().find(query); mongoCollection.insertOne(doc);
while(cur.hasNext()){
DBObject doc = cur.next();
String name = doc.get("name").toString();
if(CacheManager.getCacheByKey(name)==null){
CacheManager.putCache(name, doc, 48*60*60*1000);
list.add(doc);
}
}
cur.close();
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
return list;
} }
} }
package com.zhiwei.searchhotcrawler.dao;
import java.util.Collections;
import java.util.List;
import com.zhiwei.searchhotcrawler.config.Config;
import lombok.extern.log4j.Log4j2;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
@Log4j2
public class WechatUserDao extends MongoDBTemplate{
public WechatUserDao() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collWechatUserName);
}
/**
* 添加分组用户
* @param userlist
* @param groupName
* @param groupId
*/
public void addWechatUser(List<String> userlist, String groupName, Integer groupId){
for(int i=0; i<3; i++){
try {
DBObject doc = new BasicDBObject();
doc.put("_id", groupId+"-"+groupName);
doc.put("groupId", groupId);
doc.put("groupName", groupName);
doc.put("user", userlist);
this.getReadColl().save(doc);
break;
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e);
}
}
}
/**
* 根据分组名称查询分组用户
* @param group
* @return
*/
@SuppressWarnings("unchecked")
public List<String> getWechatUserByGroup(String group){
try {
DBObject query = new BasicDBObject();
query.put("groupName", group);
DBObject doc = this.getReadColl().findOne(query);
if(doc != null){
return (List<String>)doc.get("user");
}
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e);
}
return Collections.emptyList();
}
}
...@@ -3,76 +3,46 @@ package com.zhiwei.searchhotcrawler.dao; ...@@ -3,76 +3,46 @@ package com.zhiwei.searchhotcrawler.dao;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.DBConfig;
import lombok.extern.log4j.Log4j2;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import org.bson.Document;
@Log4j2 @Log4j2
public class WeiboSuperTopicDAO extends MongoDBTemplate{ public class WeiboSuperTopicDAO{
public static MongoDatabase mongoDatabase = MongoDBTemplate.getDB(DBConfig.dbName);
public static MongoCollection mongoCollection;
public WeiboSuperTopicDAO() { public WeiboSuperTopicDAO() {
super();
super.setDbName(Config.dbName);
String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd"); String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
String year = time.substring(0,4); String year = time.substring(0,4);
String month = time.substring(5,7); String month = time.substring(5,7);
String collName = Config.topicCollName + year + "_" + month; String collName = DBConfig.searchCollName + year + "_" + month;
super.setCollName(collName); mongoCollection = mongoDatabase.getCollection(collName);
createIndex(); //给数据表创建索引
MongoDBTemplate.createIndex(DBConfig.dbName, collName);
} }
/**
* 初次创建表及创建相应的索引
*/
private void createIndex(){
List<DBObject> indexList = this.getReadColl().getIndexInfo();
if(Objects.isNull(indexList) && indexList.isEmpty()){
DBObject countIndexDoc = new BasicDBObject();
countIndexDoc.put("score_num", -1);
DBObject timeIndexDoc = new BasicDBObject();
timeIndexDoc.put("time", -1);
DBObject rankIndexDoc = new BasicDBObject();
rankIndexDoc.put("rank", -1);
DBObject nameIndexDoc = new BasicDBObject();
nameIndexDoc.put("name", -1);
DBObject typeIndexDoc = new BasicDBObject();
typeIndexDoc.put("type", -1);
try {
super.getReadColl().createIndex(countIndexDoc, new BasicDBObject("name", "score_desc"));
super.getReadColl().createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
super.getReadColl().createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
super.getReadColl().createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
super.getReadColl().createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
} catch (Exception e) {
e.printStackTrace();
}
}
}
/** /**
* 添加数据入库 * 添加数据入库
* @param list * @param list
*/ */
public void addTopicList(List<DBObject> list){ public void addTopicList(List<Document> list){
try { try {
this.getReadColl().insert(list); mongoCollection.insertMany(list);
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
public void addTopic(DBObject doc){ public void addTopic(Document doc){
try { try {
this.getReadColl().insert(doc); mongoCollection.insertOne(doc);
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
......
package com.zhiwei.searchhotcrawler.dbtemplate; package com.zhiwei.searchhotcrawler.dbtemplate;
import java.util.Arrays;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.Mongo;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
import com.mongodb.MongoCredential; import com.mongodb.MongoClientOptions;
import com.mongodb.MongoException; import com.mongodb.MongoClientURI;
import com.mongodb.ServerAddress; import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.config.Config; import com.mongodb.client.ListIndexesIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.IndexOptions;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.Objects;
@Log4j2
public enum MongoDBTemplate {
instance;
private MongoClient mongoClient;
static {
MongoClientOptions options = new MongoClientOptions.Builder()
.connectionsPerHost(300) //连接池设置为300个连接,默认为100
.connectTimeout(15000) //连接超时,推荐>3000毫秒
.maxWaitTime(5000)
.socketTimeout(0) // 套接字超时时间,0无限制
.threadsAllowedToBlockForConnectionMultiplier(5000) // 线程队列数,如果连接线程排满了队列就会抛出“Out of semaphores to get db”错误。
.writeConcern(WriteConcern.W1) //
.build();
log.info("MongoDBTemplate.static initializer : {}", DBConfig.mongoUri);
MongoClientURI mongoClientURI = new MongoClientURI(DBConfig.mongoUri);
instance.mongoClient = new MongoClient(mongoClientURI);
}
/** /**
* 获取DB实例 - 指定DB
* *
* @Description: MongoDB模板类 * @param databaseName
* @author Tou Tang * @return
* @date 2014-11-14 下午3:24:40
*/ */
public class MongoDBTemplate { public static MongoDatabase getDB(String databaseName) {
protected static Mongo reader; return instance.mongoClient.getDatabase(databaseName);
protected static Mongo writer;
protected String collName;
protected String dbName;
@SuppressWarnings("deprecation")
public MongoDBTemplate() {
try {
MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
if(reader==null)
{
reader = new MongoClient(address, Arrays.asList(credential));
// reader = new MongoClient(address);
}
if(writer==null)
{
writer = new MongoClient(address, Arrays.asList(credential));
// writer = new MongoClient(address);
}
} catch (MongoException e) {
e.printStackTrace();
}
}
public DBCollection getReadColl() {
@SuppressWarnings("deprecation")
final DB db = getReader().getDB(dbName);
final DBCollection coll = db.getCollection(collName);
return coll;
}
protected Mongo getReader() {
return reader;
} }
public DBCollection getWriteColl() { /**
@SuppressWarnings("deprecation") * 获取collection对象 - 指定Collection
final DB db = getWriter().getDB(dbName); *
final DBCollection coll = db.getCollection(collName); * @param databaseName
return coll; * @param collectionName
* @return
} */
public static MongoCollection<Document> getCollection(String databaseName, String collectionName) {
protected Mongo getWriter() { MongoDatabase db = instance.mongoClient.getDatabase(databaseName);
return writer; return db.getCollection(collectionName);
} }
protected void setCollName(final String collName) {
this.collName = collName;
}
protected void setDbName(final String dbName) { /**
this.dbName = dbName; * 创建索引
} * @param databaseName
* @param collectionName
@SuppressWarnings("static-access") */
protected void setReader(final Mongo reader) { public static void createIndex(String databaseName, String collectionName){
this.reader = reader; MongoDatabase db = instance.mongoClient.getDatabase(databaseName);
MongoCollection mongoCollection = db.getCollection(collectionName);
ListIndexesIterable<Document> indexList = mongoCollection.listIndexes();
if(Objects.isNull(indexList)){
Document countIndexDoc = new Document();
countIndexDoc.put("score_num", -1);
Document timeIndexDoc = new Document();
timeIndexDoc.put("time", -1);
Document rankIndexDoc = new Document();
rankIndexDoc.put("rank", -1);
Document nameIndexDoc = new Document();
nameIndexDoc.put("name", -1);
Document typeIndexDoc = new Document();
typeIndexDoc.put("type", -1);
try {
mongoCollection.createIndex(countIndexDoc, new IndexOptions().name("count_desc"));
mongoCollection.createIndex(timeIndexDoc, new IndexOptions().name("time_desc"));
mongoCollection.createIndex(rankIndexDoc, new IndexOptions().name("rank_desc"));
mongoCollection.createIndex(nameIndexDoc, new IndexOptions().name( "name_desc"));
mongoCollection.createIndex(typeIndexDoc, new IndexOptions().name( "type_desc"));
} catch (Exception e) {
e.printStackTrace();
} }
@SuppressWarnings("static-access")
protected void setWriter(final Mongo writer) {
this.writer = writer;
} }
public static void main(String[] args) {
} }
} }
...@@ -20,9 +20,12 @@ public class HotSearchRun { ...@@ -20,9 +20,12 @@ public class HotSearchRun {
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build(); .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig); ProxyFactory.init(simpleConfig);
new UpdateWechatUserRun().start(); // new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000); // ZhiWeiTools.sleep(10000);
new CacheListener().startListen(); // new CacheListener().startListen();
//推送程序启动
// new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
// ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6); // ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
// //
...@@ -47,8 +50,6 @@ public class HotSearchRun { ...@@ -47,8 +50,6 @@ public class HotSearchRun {
new ZhihuHotSearchRun().start(); new ZhihuHotSearchRun().start();
new WeiboSuperTopicRun().start(); new WeiboSuperTopicRun().start();
new WeiboTopicRun().start(); new WeiboTopicRun().start();
//推送程序启动
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
} }
} }
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HotSearchListTest{
public static void main(String[] args) {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("zzw").build();
ProxyFactory.init(simpleConfig);
String url = "http://app.myzaker.com/news/app.php?f=";
HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
try{
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string();
Elements elements = Jsoup.parse(htmlBody).select("div.titlebar>a");
for(Element element : elements){
String lableUrl = "http://app.myzaker.com/news/app.php" + element.attr("href");
System.out.println("lableUrl========="+lableUrl);
String htmlBodyLable = httpBoot.syncCall(RequestUtils.wrapGet(lableUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
Elements elementsLable = Jsoup.parse(htmlBodyLable).select("div#infinite_scroll>a");
for(Element elementLable : elementsLable){
System.out.println(elementLable.attr("href") + "=============" + elementLable.text());
}
}
}catch (Exception e){
e.printStackTrace();
}
// MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
// ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
// Mongo mongo = new MongoClient(address, Arrays.asList(credential));
//
// DB db = mongo.getDB("hot_search_list");
// DBCollection coll = db.getCollection("hot_search_list2019_09");
//
//// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
//// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
//// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
//// DB dbNew = mongoNew.getDB("hot_search_list");
//
// Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
//
// timLine.forEach((start, end) ->{
//
// String year = end.substring(0,4);
// String month = end.substring(5,7);
// Date startDate = TimeParse.stringFormartDate(start);
// Date endDate = TimeParse.stringFormartDate(end);
//
// String collName = "hot_search_list"+year+"_"+month;
// System.out.println("collName=========="+collName);
//// DBCollection collNew = dbNew.getCollection(collName);
//// DBObject countIndexDoc = new BasicDBObject();
//// countIndexDoc.put("count", -1);
//// DBObject timeIndexDoc = new BasicDBObject();
//// timeIndexDoc.put("time", -1);
//// DBObject rankIndexDoc = new BasicDBObject();
//// rankIndexDoc.put("rank", -1);
//// DBObject nameIndexDoc = new BasicDBObject();
//// nameIndexDoc.put("name", -1);
//// DBObject typeIndexDoc = new BasicDBObject();
//// typeIndexDoc.put("type", -1);
//// try {
//// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
//// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
//// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
//// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
//// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//
// DBObject query = new BasicDBObject(new BasicDBObject("time",
// new BasicDBObject("$gte",startDate).append("$lte", endDate)));
// System.out.println(query);
// WriteResult wr = coll.remove(query);
// System.out.println("========"+wr.getN());
//// int i = 0;
//// DBCursor cur = coll.remove(query);
//// System.out.println(query +"======="+ cur.count());
//// List<DBObject> dataList = new ArrayList<>();
//// while(cur.hasNext()) {
//// DBObject doc = cur.next();
//// try {
////// collNew.save(doc);
//// i++;
//// coll.remove(doc);
//// } catch (Exception e2) {
//// e2.printStackTrace();
//// }
//// dataList.add(doc);
//// }
//// System.out.println(collName +"数据量大小" +dataList.size());
//// cur.close();
//// if(!dataList.isEmpty()) {
//// try {
//// collNew.insert(dataList);
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//// }
// });
// mongo.close();
}
}
...@@ -6,7 +6,9 @@ import java.util.List; ...@@ -6,7 +6,9 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -39,23 +41,24 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -39,23 +41,24 @@ public class BaiduHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
log.info("百度风云榜采集开始........"); log.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>(); List<Document> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{ list.forEach(baiduHotSearch ->{
int changeCount = hotSearchDAO.getChangeCount(baiduHotSearch); Document doc = new Document();
DBObject doc = new BasicDBObject();
doc.put("_id", baiduHotSearch.getId()); doc.put("_id", baiduHotSearch.getId());
doc.put("name", baiduHotSearch.getName()); doc.put("name", baiduHotSearch.getName());
doc.put("url", baiduHotSearch.getUrl()); doc.put("url", baiduHotSearch.getUrl());
doc.put("count", baiduHotSearch.getCount()); doc.put("count", baiduHotSearch.getCount());
doc.put("day", baiduHotSearch.getDay()); doc.put("day", baiduHotSearch.getDay());
doc.put("time", baiduHotSearch.getTime()); doc.put("time", baiduHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", baiduHotSearch.getRank()); doc.put("rank", baiduHotSearch.getRank());
doc.put("type", baiduHotSearch.getType()); doc.put("type", baiduHotSearch.getType());
saveDataList.add(doc); saveDataList.add(doc);
hotSearchCacheDAO.addAndUpdateData(doc);
}); });
} }
hotSearchDAO.addHotSearchList(saveDataList); hotSearchDAO.addHotSearchList(saveDataList);
......
...@@ -5,7 +5,9 @@ import java.util.Date; ...@@ -5,7 +5,9 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -42,23 +44,24 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -42,23 +44,24 @@ public class DouyinHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
log.info("抖音热搜榜采集开始........"); log.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){ for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch); Document douyin = new Document();
DBObject douyin = new BasicDBObject();
douyin.put("_id", douyinHotSearch.getId()); douyin.put("_id", douyinHotSearch.getId());
douyin.put("name", douyinHotSearch.getName()); douyin.put("name", douyinHotSearch.getName());
douyin.put("rank", douyinHotSearch.getRank()); douyin.put("rank", douyinHotSearch.getRank());
douyin.put("count", douyinHotSearch.getCount()); douyin.put("count", douyinHotSearch.getCount());
douyin.put("hot", douyinHotSearch.getHot());
douyin.put("day", douyinHotSearch.getDay()); douyin.put("day", douyinHotSearch.getDay());
douyin.put("time", douyinHotSearch.getTime()); douyin.put("time", douyinHotSearch.getTime());
douyin.put("changeCount", changeCount);
douyin.put("url", null); douyin.put("url", null);
douyin.put("type", douyinHotSearch.getType()); douyin.put("type", douyinHotSearch.getType());
data.add(douyin); data.add(douyin);
hotSearchDAO.addHotSearch(douyin); hotSearchDAO.addHotSearch(douyin);
hotSearchCacheDAO.addAndUpdateData(douyin);
} }
log.info("抖音热搜榜采集结束........"); log.info("抖音热搜榜采集结束........");
} }
......
package com.zhiwei.searchhotcrawler.timer; //package com.zhiwei.searchhotcrawler.timer;
//
import java.util.Calendar; //import java.util.Calendar;
import java.util.Date; //import java.util.Date;
import java.util.HashMap; //import java.util.HashMap;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import lombok.extern.log4j.Log4j2; //import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; //
import org.slf4j.LoggerFactory; //import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
import com.alibaba.fastjson.JSONObject; //import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.mongodb.DBObject; //import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao; //import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; //import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; //import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.searchhotcrawler.util.Template; //import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; //import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.searchhotcrawler.util.WechatConstant; //
import com.zhiwei.tools.timeparse.TimeParse; //@Log4j2
import com.zhiwei.tools.tools.ZhiWeiTools; //public class SendWeiboHotSearchRun extends Thread {
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Log4j2 // private static WechatUserDao wechatUserDao = new WechatUserDao();
public class SendWeiboHotSearchRun extends Thread { // @Override
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); // public void run() {
private static WechatUserDao wechatUserDao = new WechatUserDao(); // while (true) {
@Override // try {
public void run() { // Calendar calendar = Calendar.getInstance();
while (true) { // int hour = calendar.get(Calendar.HOUR_OF_DAY);
try { // log.info("微博推送,当前系统时间为:" + hour);
Calendar calendar = Calendar.getInstance(); // if (hour > 6 && hour < 23) {
int hour = calendar.get(Calendar.HOUR_OF_DAY); // List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
log.info("微博推送,当前系统时间为:" + hour); // if (list != null && !list.isEmpty()) {
if (hour > 6 && hour < 23) { // for (DBObject weibo : list) {
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name()); // String title = weibo.get("name").toString();
if (list != null && !list.isEmpty()) { // String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
for (DBObject weibo : list) { // String url = weibo.get("url").toString();
String title = weibo.get("name").toString(); // sendTemplateByUserIds(title, time, url);
String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss"); // }
String url = weibo.get("url").toString(); // } else {
sendTemplateByUserIds(title, time, url); // log.info("微博最近一小时无数据");
} // sendTemplateByUserIds("最近一小时无数据",
} else { // TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
log.info("微博最近一小时无数据"); // }
sendTemplateByUserIds("最近一小时无数据", // }
TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); // ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
} // } catch (Exception e) {
} // log.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace());
ZhiWeiTools.sleep(1 * 60 * 60 * 1000); // ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
} catch (Exception e) { // continue;
log.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace()); // }
ZhiWeiTools.sleep(1 * 60 * 60 * 1000); // }
continue; // }
} //
} // /**
} // * @Title: sendTemplateByUserIds
// * @author hero
/** // * @Description: 发送模版消息
* @Title: sendTemplateByUserIds // * @param @param
* @author hero // * microTouTiao
* @Description: 发送模版消息 // * @param @param
* @param @param // * userList 设定文件
* microTouTiao // * @return void 返回类型
* @param @param // */
* userList 设定文件 // public static void sendTemplateByUserIds(String title, String time, String url) {
* @return void 返回类型 // Map<String, Object> dataMap = new HashMap<String, Object>();
*/ // JSONObject first = new JSONObject();
public static void sendTemplateByUserIds(String title, String time, String url) { // first.put("value", "您好,有一条来自微博热搜榜的预警通知。");
Map<String, Object> dataMap = new HashMap<String, Object>(); // dataMap.put("first", first);
JSONObject first = new JSONObject(); // JSONObject keyword1 = new JSONObject();
first.put("value", "您好,有一条来自微博热搜榜的预警通知。"); // keyword1.put("value", title);
dataMap.put("first", first); // keyword1.put("color", "#173177");
JSONObject keyword1 = new JSONObject(); // dataMap.put("keyword1", keyword1);
keyword1.put("value", title); // JSONObject keyword2 = new JSONObject();
keyword1.put("color", "#173177"); // keyword2.put("value", "微博热搜榜");
dataMap.put("keyword1", keyword1); // keyword2.put("color", "#173177");
JSONObject keyword2 = new JSONObject(); // dataMap.put("keyword2", keyword2);
keyword2.put("value", "微博热搜榜"); // JSONObject keyword3 = new JSONObject();
keyword2.put("color", "#173177"); // keyword3.put("value", time);
dataMap.put("keyword2", keyword2); // keyword3.put("color", "#173177");
JSONObject keyword3 = new JSONObject(); // dataMap.put("keyword3", keyword3);
keyword3.put("value", time); // JSONObject remark = new JSONObject();
keyword3.put("color", "#173177"); // remark.put("value", "知微情报监测服务");
dataMap.put("keyword3", keyword3); // dataMap.put("remark", remark);
JSONObject remark = new JSONObject(); // List<String> userList = getUserList();
remark.put("value", "知微情报监测服务"); // if (userList != null && userList.size() > 0) {
dataMap.put("remark", remark); // for (String openId : userList) {
List<String> userList = getUserList(); // Template template = new Template();
if (userList != null && userList.size() > 0) { // template.setTouser(openId);
for (String openId : userList) { // if (url != null) {
Template template = new Template(); // template.setUrl(url);
template.setTouser(openId); // }
if (url != null) { // template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template.setUrl(url); // template.setData(dataMap);
} //
template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT); // JSONObject templateJson = (JSONObject) JSONObject.toJSON(template);
template.setData(dataMap); // WechatCodeUtil.sendDataJson(templateJson);
// }
JSONObject templateJson = (JSONObject) JSONObject.toJSON(template); // } else {
WechatCodeUtil.sendDataJson(templateJson); // log.info("拉取微博用户列表失败");
} // }
} else { // }
log.info("拉取微博用户列表失败"); //
} // /**
} // * @Title: getUserList
// * @author hero
/** // * @Description: 用户列表
* @Title: getUserList // * @param @param
* @author hero // * projectName
* @Description: 用户列表 // * @param @return
* @param @param // * 设定文件
* projectName // * @return List<String> 返回类型
* @param @return // */
* 设定文件 // public static List<String> getUserList() {
* @return List<String> 返回类型 // List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot");
*/ // if(userList==null){
public static List<String> getUserList() { // userList = WechatCodeUtil.getUserListByGroupName("weibohot");
List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot"); // }
if(userList==null){ // return userList;
userList = WechatCodeUtil.getUserListByGroupName("weibohot"); // }
} //}
return userList;
}
}
package com.zhiwei.searchhotcrawler.timer; //package com.zhiwei.searchhotcrawler.timer;
//
import java.util.Calendar; //import java.util.Calendar;
import java.util.Date; //import java.util.Date;
import java.util.HashMap; //import java.util.HashMap;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import lombok.extern.log4j.Log4j2; //import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; //
import org.slf4j.LoggerFactory; //import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
import com.alibaba.fastjson.JSONObject; //import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.mongodb.DBObject; //import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; //import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; //import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao; //import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.searchhotcrawler.util.Template; //import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; //import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.searchhotcrawler.util.WechatConstant; //
import com.zhiwei.tools.timeparse.TimeParse; //@Log4j2
import com.zhiwei.tools.tools.ZhiWeiTools; //public class SendZhihuHotSearchRun extends Thread{
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Log4j2 // private static WechatUserDao wechatUserDao = new WechatUserDao();
public class SendZhihuHotSearchRun extends Thread{ // @Override
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); // public void run() {
private static WechatUserDao wechatUserDao = new WechatUserDao(); //
@Override // while(true) {
public void run() { // try {
// Calendar calendar = Calendar.getInstance();
while(true) { // int hour = calendar.get(Calendar.HOUR_OF_DAY);
try { // log.info("知乎推送,当前系统时间为:"+hour);
Calendar calendar = Calendar.getInstance(); // if(hour > 6 && hour <23){
int hour = calendar.get(Calendar.HOUR_OF_DAY); // List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
log.info("知乎推送,当前系统时间为:"+hour); // if(list!=null && !list.isEmpty()){
if(hour > 6 && hour <23){ // for(DBObject zhihu : list){
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name()); // String title = zhihu.get("display_query").toString();
if(list!=null && !list.isEmpty()){ // String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
for(DBObject zhihu : list){ // String url = zhihu.get("_id").toString();
String title = zhihu.get("display_query").toString(); // if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){
String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss"); // sendTemplateByUserIds(title, time, url);
String url = zhihu.get("_id").toString(); // }
if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){ // }
sendTemplateByUserIds(title, time, url); // }else{
} // log.info("知乎最近一小时无数据");
} // sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}else{ // }
log.info("知乎最近一小时无数据"); // }
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); // ZhiWeiTools.sleep(1*60*60*1000);
} // } catch (Exception e) {
} // log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000); // ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) { // }
log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace()); // }
ZhiWeiTools.sleep(1*60*60*1000); // }
} //
} // /**
} // * @Title: sendTemplateByUserIds
// * @author hero
/** // * @Description: 发送模版消息
* @Title: sendTemplateByUserIds // * @param @param microTouTiao
* @author hero // * @param @param userList 设定文件
* @Description: 发送模版消息 // * @return void 返回类型
* @param @param microTouTiao // */
* @param @param userList 设定文件 // public static void sendTemplateByUserIds(String title,String time, String url) {
* @return void 返回类型 //
*/ // Map<String, Object> dataMap = new HashMap<>();
public static void sendTemplateByUserIds(String title,String time, String url) { // JSONObject first = new JSONObject();
// first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
Map<String, Object> dataMap = new HashMap<>(); // dataMap.put("first", first);
JSONObject first = new JSONObject(); // JSONObject keyword1 = new JSONObject();
first.put("value", "您好,有一条来自知乎热搜榜的预警通知。"); // keyword1.put("value", title);
dataMap.put("first", first); // keyword1.put("color", "#173177");
JSONObject keyword1 = new JSONObject(); // dataMap.put("keyword1", keyword1);
keyword1.put("value", title); // JSONObject keyword2 = new JSONObject();
keyword1.put("color", "#173177"); // keyword2.put("value", "知乎热搜榜");
dataMap.put("keyword1", keyword1); // keyword2.put("color", "#173177");
JSONObject keyword2 = new JSONObject(); // dataMap.put("keyword2", keyword2);
keyword2.put("value", "知乎热搜榜"); // JSONObject keyword3 = new JSONObject();
keyword2.put("color", "#173177"); // keyword3.put("value", time);
dataMap.put("keyword2", keyword2); // keyword3.put("color", "#173177");
JSONObject keyword3 = new JSONObject(); // dataMap.put("keyword3", keyword3);
keyword3.put("value", time); // JSONObject remark = new JSONObject();
keyword3.put("color", "#173177"); // remark.put("value", "知微情报监测服务");
dataMap.put("keyword3", keyword3); // dataMap.put("remark", remark);
JSONObject remark = new JSONObject(); //
remark.put("value", "知微情报监测服务"); // List<String> userList = getUserList();
dataMap.put("remark", remark); // if(userList!=null && !userList.isEmpty()) {
// for (String openId : userList) {
List<String> userList = getUserList(); // Template template = new Template();
if(userList!=null && !userList.isEmpty()) { // template.setTouser(openId);
for (String openId : userList) { // if(url!=null){
Template template = new Template(); // template.setUrl(url);
template.setTouser(openId); // }
if(url!=null){ // template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template.setUrl(url); // template.setData(dataMap);
} //
template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT); // JSONObject templateJson = (JSONObject)JSONObject.toJSON(template);
template.setData(dataMap); // WechatCodeUtil.sendDataJson(templateJson);
// }
JSONObject templateJson = (JSONObject)JSONObject.toJSON(template); // }else {
WechatCodeUtil.sendDataJson(templateJson); // log.info("知乎推送拉取用户列表失败");
} // }
}else { //
log.info("知乎推送拉取用户列表失败"); // }
} //
// /**
} // * @Title: getUserList
// * @author hero
/** // * @Description: 用户列表
* @Title: getUserList // * @param @param projectName
* @author hero // * @param @return 设定文件
* @Description: 用户列表 // * @return List<String> 返回类型
* @param @param projectName // */
* @param @return 设定文件 //// private static List<String> getUserList()
* @return List<String> 返回类型 //// {
*/ //// List<String> userList = wechatUserDao.getWechatUserByGroup("LP组");
private static List<String> getUserList() //// if(userList==null){
{ //// userList = WechatCodeUtil.getUserListByGroupName("LP组");
List<String> userList = wechatUserDao.getWechatUserByGroup("LP组"); //// }
if(userList==null){ //// return userList;
userList = WechatCodeUtil.getUserListByGroupName("LP组"); //// }
} //
return userList; //}
}
}
...@@ -5,7 +5,9 @@ import java.util.Date; ...@@ -5,7 +5,9 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -37,12 +39,13 @@ public class SougoHotSearchRun extends Thread { ...@@ -37,12 +39,13 @@ public class SougoHotSearchRun extends Thread {
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
log.info("搜狗微信采集开始........"); log.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList sougoHotSearch : list){ for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject(); Document doc = new Document();
doc.put("_id", sougoHotSearch.getId()); doc.put("_id", sougoHotSearch.getId());
doc.put("name", sougoHotSearch.getName()); doc.put("name", sougoHotSearch.getName());
doc.put("url", sougoHotSearch.getUrl()); doc.put("url", sougoHotSearch.getUrl());
...@@ -51,6 +54,7 @@ public class SougoHotSearchRun extends Thread { ...@@ -51,6 +54,7 @@ public class SougoHotSearchRun extends Thread {
doc.put("rank", sougoHotSearch.getRank()); doc.put("rank", sougoHotSearch.getRank());
doc.put("type", sougoHotSearch.getType()); doc.put("type", sougoHotSearch.getType());
data.add(doc); data.add(doc);
hotSearchCacheDAO.addAndUpdateData(doc);
} }
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("搜狗微信采集结束........"); log.info("搜狗微信采集结束........");
......
package com.zhiwei.searchhotcrawler.timer; //package com.zhiwei.searchhotcrawler.timer;
//
import java.util.Calendar; //import java.util.Calendar;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
import java.util.Map.Entry; //import java.util.Map.Entry;
//
import lombok.extern.log4j.Log4j2; //import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; //
import org.slf4j.LoggerFactory; //import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao; //
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; //@Log4j2
import com.zhiwei.tools.tools.ZhiWeiTools; //public class UpdateWechatUserRun extends Thread{
// private WechatUserDao wechatUserDao = new WechatUserDao();
@Log4j2 // @Override
public class UpdateWechatUserRun extends Thread{ // public void run() {
private WechatUserDao wechatUserDao = new WechatUserDao(); // log.info("开始更新用户数据");
@Override // while(true) {
public void run() { // try {
log.info("开始更新用户数据"); // Calendar calendar = Calendar.getInstance();
while(true) { // int hour = calendar.get(Calendar.HOUR_OF_DAY);
try { // if(hour > 6 ){
Calendar calendar = Calendar.getInstance(); // Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp();
int hour = calendar.get(Calendar.HOUR_OF_DAY); // log.info("此公众号的分组数量为:::{}", groupMap.size());
if(hour > 6 ){ // if(!groupMap.isEmpty() && groupMap!=null){
Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp(); // for(Entry<String,Integer> group : groupMap.entrySet()){
log.info("此公众号的分组数量为:::{}", groupMap.size()); // log.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue());
if(!groupMap.isEmpty() && groupMap!=null){ // List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue());
for(Entry<String,Integer> group : groupMap.entrySet()){ // log.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size());
log.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue()); // if(userList!=null && !userList.isEmpty()){
List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue()); // wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue());
log.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size()); // }
if(userList!=null && !userList.isEmpty()){ // }
wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue()); // }
} // }
} // ZhiWeiTools.sleep(1*60*60*1000);
} // } catch (Exception e) {
} // log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000); // ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) { // continue;
log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace()); // }
ZhiWeiTools.sleep(1*60*60*1000); // }
continue; // }
} //
} //
} //}
}
...@@ -5,16 +5,13 @@ import java.util.Date; ...@@ -5,16 +5,13 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import org.bson.Document;
@Log4j2 @Log4j2
public class WeiboHotSearchRun extends Thread{ public class WeiboHotSearchRun extends Thread{
...@@ -38,13 +35,13 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -38,13 +35,13 @@ public class WeiboHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
log.info("微博话题采集开始........"); log.info("微博话题采集开始........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO(); HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch(); // List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){ for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch); Document doc = new Document();
DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId()); doc.put("_id", weiboHotSearch.getId());
doc.put("name", weiboHotSearch.getName()); doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl()); doc.put("url", weiboHotSearch.getUrl());
...@@ -52,11 +49,11 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -52,11 +49,11 @@ public class WeiboHotSearchRun extends Thread{
doc.put("hot", weiboHotSearch.getHot()); doc.put("hot", weiboHotSearch.getHot());
doc.put("day", weiboHotSearch.getDay()); doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime()); doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount);
doc.put("rank", weiboHotSearch.getRank()); doc.put("rank", weiboHotSearch.getRank());
doc.put("type", weiboHotSearch.getType()); doc.put("type", weiboHotSearch.getType());
doc.put("icon", weiboHotSearch.getIcon()); doc.put("icon", weiboHotSearch.getIcon());
data.add(doc); data.add(doc);
hotSearchCacheDAO.addAndUpdateData(doc);
} }
weiboHotSearchDAO.addHotSearchList(data); weiboHotSearchDAO.addHotSearchList(data);
log.info("微博话题采集结束........"); log.info("微博话题采集结束........");
......
...@@ -9,11 +9,7 @@ import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic; ...@@ -9,11 +9,7 @@ import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO; import com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.bson.Document;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2 @Log4j2
...@@ -40,10 +36,10 @@ public class WeiboSuperTopicRun extends Thread{ ...@@ -40,10 +36,10 @@ public class WeiboSuperTopicRun extends Thread{
log.info("微博超话采集开始........"); log.info("微博超话采集开始........");
List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler(); List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(WeiboSuperTopic topic : list){ for(WeiboSuperTopic topic : list){
log.info("topic::::{}", topic); log.info("topic::::{}", topic);
DBObject doc = new BasicDBObject(); Document doc = new Document();
doc.put("_id", topic.getId()); doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName()); doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank()); doc.put("rank", topic.getRank());
......
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
...@@ -32,12 +33,13 @@ public class WeiboTopicRun extends Thread{ ...@@ -32,12 +33,13 @@ public class WeiboTopicRun extends Thread{
private void getTopicList() { private void getTopicList() {
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO(); HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
log.info("微博话题采集开始........"); log.info("微博话题采集开始........");
List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(); List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone();
log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList topic : list){ for(HotSearchList topic : list){
DBObject doc = new BasicDBObject(); Document doc = new Document();
doc.put("_id", topic.getId()); doc.put("_id", topic.getId());
doc.put("name", topic.getName()); doc.put("name", topic.getName());
doc.put("url", topic.getUrl()); doc.put("url", topic.getUrl());
...@@ -50,6 +52,7 @@ public class WeiboTopicRun extends Thread{ ...@@ -50,6 +52,7 @@ public class WeiboTopicRun extends Thread{
doc.put("topic_lead", topic.getTopicLead()); doc.put("topic_lead", topic.getTopicLead());
doc.put("comment_count", topic.getCommentCount()); doc.put("comment_count", topic.getCommentCount());
data.add(doc); data.add(doc);
hotSearchCacheDAO.addAndUpdateData(doc);
} }
weiboHotSearchDAO.addHotSearchList(data); weiboHotSearchDAO.addHotSearchList(data);
log.info("微博话题采集结束........"); log.info("微博话题采集结束........");
......
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -36,13 +39,15 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -36,13 +39,15 @@ public class ZhihuHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
log.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName()); log.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist); list.addAll(mobilelist);
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<Document> dataList = new ArrayList<>();
for(HotSearchList zhihuHotSearch : list){ for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject(); Document zhihu = new Document();
zhihu.put("_id", zhihuHotSearch.getId()); zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName()); zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl()); zhihu.put("url", zhihuHotSearch.getUrl());
...@@ -50,11 +55,12 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -50,11 +55,12 @@ public class ZhihuHotSearchRun extends Thread{
zhihu.put("hot", zhihuHotSearch.getHot()); zhihu.put("hot", zhihuHotSearch.getHot());
zhihu.put("day", zhihuHotSearch.getDay()); zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime()); zhihu.put("time", zhihuHotSearch.getTime());
zhihu.put("changeCount", 0);
zhihu.put("rank", zhihuHotSearch.getRank()); zhihu.put("rank", zhihuHotSearch.getRank());
zhihu.put("type", zhihuHotSearch.getType()); zhihu.put("type", zhihuHotSearch.getType());
hotSearchDAO.addHotSearch(zhihu); dataList.add(zhihu);
hotSearchCacheDAO.addAndUpdateData(zhihu);
} }
hotSearchDAO.addHotSearchList(dataList);
log.info("知乎话题采集结束........"); log.info("知乎话题采集结束........");
} }
......
#mongoIp=202.107.192.94 #local service
mongoIp=192.168.0.101 #mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@202.107.192.94:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
mongoPort=30000 #local
#mongoIp=192.168.0.81 #mongoUri=mongodb://192.168.0.81:27017/istarshine_data
#mongoPort=27017 #service
db.username=searchhotcrawleruser mongoUri=mongodb://istarshineuser:istarshine1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
db.paasword=searchhotcrawler1q2w3e4r
db.certifiedDB=admin
dbName=hot_search_list dbName=hot_search_list
searchCollName=hot_search_list searchCollName=hot_search_list
searchCacheCollName=hot_search_cache
topicCollName=topic_list topicCollName=topic_list
collWechatUserName=wechat_user collWechatUserName=wechat_user
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment