Commit cb6bcd76 by zhiwei

添加微博话题采集,并添加lombok

parent a9966f9d
...@@ -38,12 +38,17 @@ ...@@ -38,12 +38,17 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.5-SNAPSHOT</version> <version>0.1.6-SNAPSHOT</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.5.6-SNAPSHOT</version> <version>0.6.0.4-RELEASE</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -10,35 +10,79 @@ import java.io.Serializable; ...@@ -10,35 +10,79 @@ import java.io.Serializable;
import java.util.Date; import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
public class HotSearchList implements Serializable{ public class HotSearchList implements Serializable{
private static final long serialVersionUID = 2076919584659821600L; private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键 /**
* 主键
*/
private String id;
private String url; //消息链接 /**
* 消息链接
*/
private String url;
private String name; //热搜关键词 /**
* 热搜关键词
*/
private String name;
private Integer count; //时时热搜量 /**
* 热搜或话题导语
*/
private String topicLead;
private Boolean hot; //状态(true 为热搜; false为时时上升) /**
* 时时热搜量
*/
private Integer count;
private String day; //天 /**
* 状态(true 为热搜; false为时时上升)
*/
private Boolean hot;
private Date time; //时间 /**
* 天
*/
private String day;
private Integer changeCount; //据上分钟变化量 /**
* 时间
*/
private Date time;
private Integer rank; //排名 /**
* 据上分钟变化量
*/
private Integer changeCount;
private String type; //分类 /**
* 排名
*/
private Integer rank;
private String icon; //热搜类型 /**
* 分类
*/
private String type;
/**
* 热搜类型
*/
private String icon;
/**
* 话题讨论量
*/
private Integer commentCount;
public HotSearchList(){} public HotSearchList(){}
...@@ -69,120 +113,18 @@ public class HotSearchList implements Serializable{ ...@@ -69,120 +113,18 @@ public class HotSearchList implements Serializable{
} }
@Override public HotSearchList(String url, String name, Integer count,Integer rank,String type, Integer commentCount, String topicLead){
public String toString(){ this.id = name + "_" + new Date().getTime();
return "new HotSearchList["
+ "id = " + id
+ ", url = " + url
+ ", name = " + name
+ ", count = " + count
+ ", time = " + time
+ ", hot = " + hot
+ ", rank = " + rank
+ ", day = " + day
+ ", changeCount = " + changeCount
+ ", type = " + type
+ ", icon = " + icon
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url; this.url = url;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name; this.name = name;
}
public Integer getCount() {
return count;
}
public void setCount(Integer count) {
this.count = count; this.count = count;
} this.hot = true;
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public Integer getChangeCount() {
return changeCount;
}
public void setChangeCount(Integer changeCount) {
this.changeCount = changeCount;
}
public static long getSerialversionuid() {
return serialVersionUID;
}
public Boolean isHot() {
return hot;
}
public void setHot(Boolean hot) {
this.hot = hot;
}
public Boolean getHot() {
return hot;
}
public String getIcon() {
return icon;
}
public void setIcon(String icon) {
this.icon = icon;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank; this.rank = rank;
} this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
public String getType() {
return type;
}
public void setType(String type) {
this.type = type; this.type = type;
this.commentCount = commentCount;
this.topicLead = topicLead;
} }
} }
...@@ -5,5 +5,6 @@ public enum HotSearchType { ...@@ -5,5 +5,6 @@ public enum HotSearchType {
微博热搜, 微博热搜,
知乎热搜, 知乎热搜,
抖音热搜, 抖音热搜,
搜狗微信热搜 搜狗微信热搜,
微博话题
} }
package com.zhiwei.searchhotcrawler.bean;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
import lombok.Data;
import lombok.ToString;
/**
*
* @ClassName: WeiboSuperTopic
* @Description: 微博话题
* @author Bewilder ZW
* @date 2019年9月27日 下午3:33:08
*/
@Data
@ToString
public class WeiboSuperTopic {
/**
* 主键
*/
private String id;
/**
* 话题链接
*/
public String url;
/**
* 话题名称
*/
public String topicName;
/**
* 话题排名
*/
public Integer rank;
/**
* 话题影响力
*/
public String score;
/**
* 话题粉丝数
*/
public String fensi;
/**
* 话题阅读数
*/
public String readNum;
/**
* 话题帖子数
*/
public String postNum;
/**
* 榜单类型
*/
public String type;
/**
* 天
*/
private String day;
/**
* 时间
*/
private Date time;
public WeiboSuperTopic() {}
public WeiboSuperTopic(String url, String topicName, Integer rank, String score,
String fensi, String type) {
this.url = url;
this.topicName = topicName;
this.rank = rank;
this.score = score;
this.fensi = fensi;
this.type = type;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.id = topicName + "_" + type + "_" + day;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.util.Date;
import com.zhiwei.tools.timeparse.TimeParse;
/**
*
* @ClassName: WeiboTopic
* @Description: 微博话题
* @author Bewilder ZW
* @date 2019年9月27日 下午3:33:08
*/
public class WeiboTopic {
private String id; //主键
public String url; //话题链接
public String topicName; //话题名称
public Integer rank; //话题排名
public String score; //话题影响力
public String fensi; //话题粉丝数
public String readNum; //话题阅读数
public String postNum; //话题帖子数
public String type; //榜单类型
private String day; //天
private Date time; //时间
public WeiboTopic() {}
public WeiboTopic(String url, String topicName, Integer rank, String score,
String fensi, String type) {
this.url = url;
this.topicName = topicName;
this.rank = rank;
this.score = score;
this.fensi = fensi;
this.type = type;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.id = topicName + "_" + type + "_" + day;
}
@Override
public String toString() {
return "new WeiboTopic["
+ "topicName = " + topicName
+ ", rank = " + rank
+ ", score = " + score
+ ", fensi = " + fensi
+ ", type = " + type
+ ", readNum = " + readNum
+ ", postNum = " + postNum
+ ", url = " + url
+ "]";
}
public String getUrl() {
return url;
}
public String getTopicName() {
return topicName;
}
public Integer getRank() {
return rank;
}
public String getScore() {
return score;
}
public String getFensi() {
return fensi;
}
public String getReadNum() {
return readNum;
}
public String getPostNum() {
return postNum;
}
public String getType() {
return type;
}
public void setUrl(String url) {
this.url = url;
}
public void setTopicName(String topicName) {
this.topicName = topicName;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public void setScore(String score) {
this.score = score;
}
public void setFensi(String fensi) {
this.fensi = fensi;
}
public void setReadNum(String readNum) {
this.readNum = readNum;
}
public void setPostNum(String postNum) {
this.postNum = postNum;
}
public void setType(String type) {
this.type = type;
}
public String getId() {
return id;
}
public String getDay() {
return day;
}
public Date getTime() {
return time;
}
public void setId(String id) {
this.id = id;
}
public void setDay(String day) {
this.day = day;
}
public void setTime(Date time) {
this.time = time;
}
}
package com.zhiwei.searchhotcrawler.cache; package com.zhiwei.searchhotcrawler.cache;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class CacheListener { public class CacheListener {
Logger logger = LoggerFactory.getLogger(CacheListener.class);
/**
* 开启缓存监听
*/
public void startListen() { public void startListen() {
new Thread(){ new Thread(){
public void run() { public void run() {
...@@ -17,7 +20,7 @@ public class CacheListener { ...@@ -17,7 +20,7 @@ public class CacheListener {
for(String key : CacheManager.getAllKeys()) { for(String key : CacheManager.getAllKeys()) {
if (CacheManager.isTimeOut(key)) { if (CacheManager.isTimeOut(key)) {
CacheManager.clearByKey(key); CacheManager.clearByKey(key);
logger.info(key + "缓存被清除"); log.info(key + "缓存被清除");
} }
} }
} }
......
...@@ -5,6 +5,7 @@ import java.util.Collections; ...@@ -5,6 +5,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -24,16 +25,15 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -24,16 +25,15 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
* @author hero * @author hero
* @date 2019年7月10日 上午10:54:31 * @date 2019年7月10日 上午10:54:31
*/ */
@Log4j2
public class BaiDuHotSearchCrawler { public class BaiDuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: BaiDuHotSearchTest * @Title: BaiDuHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端百度风云榜采集) * @Description: PC端百度风云榜采集
* @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> baiduHotSearch() { public static List<HotSearchList> baiduHotSearch() {
...@@ -43,10 +43,10 @@ public class BaiDuHotSearchCrawler { ...@@ -43,10 +43,10 @@ public class BaiDuHotSearchCrawler {
if (htmlBody != null && htmlBody.contains("mainBody")) { if (htmlBody != null && htmlBody.contains("mainBody")) {
return ansysData(htmlBody); return ansysData(htmlBody);
} else { } else {
logger.info("解析百度风云榜时出现解析错误,页面结构有问题"); log.info("解析百度风云榜时出现解析错误,页面结构有问题");
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,页面结构有问题", e); log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
...@@ -101,12 +101,12 @@ public class BaiDuHotSearchCrawler { ...@@ -101,12 +101,12 @@ public class BaiDuHotSearchCrawler {
list.add(hotSearch); list.add(hotSearch);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误", e); log.error("解析百度风云榜时出现解析错误", e);
} }
}); });
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析百度风云榜时出现解析错误,数据不是json结构", e); log.error("解析百度风云榜时出现解析错误,数据不是json结构", e);
} }
return list; return list;
} }
......
...@@ -4,6 +4,7 @@ import java.io.IOException; ...@@ -4,6 +4,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -24,9 +25,9 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -24,9 +25,9 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
* @author win 10 * @author win 10
* @date:2019年07月11日 上午10:26:21 * @date:2019年07月11日 上午10:26:21
*/ */
@Log4j2
public class DouyinHotSearchCrawler { public class DouyinHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
...@@ -66,7 +67,7 @@ public class DouyinHotSearchCrawler { ...@@ -66,7 +67,7 @@ public class DouyinHotSearchCrawler {
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取抖音热搜榜时出现问题:{}", e); log.debug("获取抖音热搜榜时出现问题:{}", e);
} }
return list; return list;
} }
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -27,16 +28,15 @@ import com.zhiwei.tools.httpclient.HeaderTool; ...@@ -27,16 +28,15 @@ import com.zhiwei.tools.httpclient.HeaderTool;
* @author hero * @author hero
* @date 2019年7月10日 上午10:54:31 * @date 2019年7月10日 上午10:54:31
*/ */
@Log4j2
public class SougoHotSearchCrawler { public class SougoHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: SougoHotSearchTest * @Title: SougoHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端搜狗微信关键词采集) * @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> sougoHotSearch() { public static List<HotSearchList> sougoHotSearch() {
...@@ -76,19 +76,19 @@ public class SougoHotSearchCrawler { ...@@ -76,19 +76,19 @@ public class SougoHotSearchCrawler {
list.add(hotSearch); list.add(hotSearch);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误", e); log.error("解析搜狗微信时出现解析错误", e);
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace()); log.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace());
return Collections.emptyList(); return Collections.emptyList();
} }
} else { } else {
logger.info("解析搜狗微信时出现解析错误,页面结构有问题"); log.info("解析搜狗微信时出现解析错误,页面结构有问题");
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析搜狗微信时出现解析错误,页面结构有问题", e); log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
} }
} }
return list; return list;
......
...@@ -7,6 +7,7 @@ import java.util.HashMap; ...@@ -7,6 +7,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -27,13 +28,13 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -27,13 +28,13 @@ import com.zhiwei.tools.tools.URLCodeUtil;
/** /**
* @ClassName: WeiboHotSearch * @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集) * @Description: 微博实时热搜采集
* @author hero * @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
@Log4j2
public class WeiboHotSearchCrawler { public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
...@@ -70,18 +71,18 @@ public class WeiboHotSearchCrawler { ...@@ -70,18 +71,18 @@ public class WeiboHotSearchCrawler {
list.add(hotSearch); list.add(hotSearch);
} catch (Exception e) { } catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误", e); log.error("解析微博时时热搜时出现解析错误", e);
continue; continue;
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace()); log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null; return null;
} }
}else{ }else{
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题"); log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
...@@ -138,25 +139,25 @@ public class WeiboHotSearchCrawler { ...@@ -138,25 +139,25 @@ public class WeiboHotSearchCrawler {
} }
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon);
logger.info("采集到的数据:::{}", hotSearch); log.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch); result.add(hotSearch);
rank++; rank++;
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e); log.error("解析微博时时热搜时出现解析错误",e);
continue; continue;
} }
} }
return result; return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e); log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e);
return Collections.emptyList(); return Collections.emptyList();
} }
}else{ }else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题"); log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
} catch (IOException e1) { } catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1); log.error("解析微博时时热搜时出现连接失败",e1);
return Collections.emptyList(); return Collections.emptyList();
} }
return Collections.emptyList(); return Collections.emptyList();
......
...@@ -8,6 +8,8 @@ import java.util.Map; ...@@ -8,6 +8,8 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Objects; import java.util.Objects;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -17,18 +19,17 @@ import com.alibaba.fastjson.JSONObject; ...@@ -17,18 +19,17 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiboTopic;
/** /**
* *
* @ClassName: WeiboHuatiCrawler * @ClassName: WeiboSuperTopicCrawler
* @Description: 微博话题榜单采集(明星) * @Description: 微博超话榜单采集(明星)
* @author Bewilder ZW * @author Bewilder ZW
* @date 2019年9月27日 下午3:01:34 * @date 2019年9月27日 下午3:01:34
*/ */
public class WeiboHuatiCrawler { @Log4j2
public class WeiboSuperTopicCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHuatiCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Map<String,String> headMap = new HashMap<>(); private static Map<String,String> headMap = new HashMap<>();
...@@ -44,13 +45,13 @@ public class WeiboHuatiCrawler { ...@@ -44,13 +45,13 @@ public class WeiboHuatiCrawler {
* 开始采集明星话题 * 开始采集明星话题
* @return void * @return void
*/ */
public static List<WeiboTopic> startCrawler() { public static List<WeiboSuperTopic> startCrawler() {
Map<String,String> urlMap = new HashMap<>(); Map<String,String> urlMap = new HashMap<>();
urlMap.put("明星", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=star&from=&wm="); urlMap.put("明星", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=star&from=&wm=");
urlMap.put("明星潜力", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=potential&from=&wm="); urlMap.put("明星潜力", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=potential&from=&wm=");
urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm="); urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm=");
List<WeiboTopic> topicList = new ArrayList<>(); List<WeiboSuperTopic> topicList = new ArrayList<>();
for(Entry<String,String> entry : urlMap.entrySet()) { for(Entry<String,String> entry : urlMap.entrySet()) {
String url = entry.getValue(); String url = entry.getValue();
...@@ -66,10 +67,10 @@ public class WeiboHuatiCrawler { ...@@ -66,10 +67,10 @@ public class WeiboHuatiCrawler {
topicList.addAll(parseTopicRankHtml(page, htmlBody, type)); topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
break; break;
}else { }else {
logger.error("获取榜单列表页面时数据格式错误,页面为:{}", htmlBody); log.error("获取榜单列表页面时数据格式错误,页面为:{}", htmlBody);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("获取榜单列表页面时出现错误,错误为:{}", e); log.error("获取榜单列表页面时出现错误,错误为:{}", e);
continue; continue;
} }
} }
...@@ -87,13 +88,13 @@ public class WeiboHuatiCrawler { ...@@ -87,13 +88,13 @@ public class WeiboHuatiCrawler {
* @param type * @param type
* @return void * @return void
*/ */
private static List<WeiboTopic> parseTopicRankHtml(int page,String htmlBody, String type) { private static List<WeiboSuperTopic> parseTopicRankHtml(int page,String htmlBody, String type) {
try { try {
JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list"); JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list");
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
page = (page-1)*20; page = (page-1)*20;
List<WeiboTopic> topicList = new ArrayList<>(); List<WeiboSuperTopic> topicList = new ArrayList<>();
Integer toprank = null; Integer toprank = null;
String topicName = null; String topicName = null;
String id = null; String id = null;
...@@ -111,7 +112,7 @@ public class WeiboHuatiCrawler { ...@@ -111,7 +112,7 @@ public class WeiboHuatiCrawler {
fensi = desc1.replaceAll(".*影响力|粉丝", "").trim(); fensi = desc1.replaceAll(".*影响力|粉丝", "").trim();
url = data.getString("link"); url = data.getString("link");
WeiboTopic topic = new WeiboTopic(url, topicName, toprank, score, fensi, type); WeiboSuperTopic topic = new WeiboSuperTopic(url, topicName, toprank, score, fensi, type);
topic = getTopicInfo(id, topic); topic = getTopicInfo(id, topic);
System.out.println("topic====="+topic); System.out.println("topic====="+topic);
topicList.add(topic); topicList.add(topic);
...@@ -119,7 +120,7 @@ public class WeiboHuatiCrawler { ...@@ -119,7 +120,7 @@ public class WeiboHuatiCrawler {
return topicList; return topicList;
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析榜单列表页面时出现错误,错误为:{}", e); log.error("解析榜单列表页面时出现错误,错误为:{}", e);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
...@@ -134,7 +135,7 @@ public class WeiboHuatiCrawler { ...@@ -134,7 +135,7 @@ public class WeiboHuatiCrawler {
* @return * @return
* @return WeiboTopic * @return WeiboTopic
*/ */
private static WeiboTopic getTopicInfo(String id, WeiboTopic topic) { private static WeiboSuperTopic getTopicInfo(String id, WeiboSuperTopic topic) {
for(int retryTimes=1; retryTimes<=3; retryTimes++) { for(int retryTimes=1; retryTimes<=3; retryTimes++) {
try { try {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id; String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
...@@ -151,7 +152,7 @@ public class WeiboHuatiCrawler { ...@@ -151,7 +152,7 @@ public class WeiboHuatiCrawler {
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析榜单详情页面时出现错误,错误为:{}", e); log.error("解析榜单详情页面时出现错误,错误为:{}", e);
} }
} }
return topic; return topic;
......
...@@ -5,6 +5,7 @@ import java.util.ArrayList; ...@@ -5,6 +5,7 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -24,15 +25,14 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -24,15 +25,14 @@ import com.zhiwei.tools.tools.URLCodeUtil;
* @author hero * @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
@Log4j2
public class ZhihuHotSearchCrawler { public class ZhihuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: getZhihuHotList * @Title: getZhihuHotList
* @author hero * @author hero
* @Description: 知乎热搜采集程序 * @Description: 知乎热搜采集程序
* @param 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> getZhihuHotList(){ public static List<HotSearchList> getZhihuHotList(){
...@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler { ...@@ -65,7 +65,7 @@ public class ZhihuHotSearchCrawler {
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e); log.debug("获取知乎热搜时出现问题:{}", e);
return list; return list;
} }
return list; return list;
...@@ -106,7 +106,7 @@ public class ZhihuHotSearchCrawler { ...@@ -106,7 +106,7 @@ public class ZhihuHotSearchCrawler {
} }
} }
} catch (IOException e) { } catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e); log.debug("获取知乎热搜时出现问题:{}", e);
return list; return list;
} }
return list; return list;
......
...@@ -4,7 +4,9 @@ package com.zhiwei.searchhotcrawler.dao; ...@@ -4,7 +4,9 @@ package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -17,8 +19,8 @@ import com.zhiwei.searchhotcrawler.config.Config; ...@@ -17,8 +19,8 @@ import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
@Log4j2
public class HotSearchListDAO extends MongoDBTemplate{ public class HotSearchListDAO extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(HotSearchListDAO.class);
public HotSearchListDAO() { public HotSearchListDAO() {
super(); super();
...@@ -28,6 +30,19 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -28,6 +30,19 @@ public class HotSearchListDAO extends MongoDBTemplate{
String month = time.substring(5,7); String month = time.substring(5,7);
String collName = Config.searchCollName + year + "_" + month; String collName = Config.searchCollName + year + "_" + month;
super.setCollName(collName); super.setCollName(collName);
//给数据表创建索引
createIndex();
}
/**
* 初次创建表及创建相应的索引
*/
private void createIndex(){
List<DBObject> indexList = this.getReadColl().getIndexInfo();
if(Objects.isNull(indexList) && indexList.isEmpty()){
DBObject countIndexDoc = new BasicDBObject(); DBObject countIndexDoc = new BasicDBObject();
countIndexDoc.put("count", -1); countIndexDoc.put("count", -1);
DBObject timeIndexDoc = new BasicDBObject(); DBObject timeIndexDoc = new BasicDBObject();
...@@ -48,6 +63,7 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -48,6 +63,7 @@ public class HotSearchListDAO extends MongoDBTemplate{
e.printStackTrace(); e.printStackTrace();
} }
} }
}
/** /**
* 添加数据入库 * 添加数据入库
...@@ -57,7 +73,7 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -57,7 +73,7 @@ public class HotSearchListDAO extends MongoDBTemplate{
try { try {
this.getReadColl().insert(list); this.getReadColl().insert(list);
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
...@@ -65,7 +81,7 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -65,7 +81,7 @@ public class HotSearchListDAO extends MongoDBTemplate{
try { try {
this.getReadColl().insert(doc); this.getReadColl().insert(doc);
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
...@@ -94,7 +110,7 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -94,7 +110,7 @@ public class HotSearchListDAO extends MongoDBTemplate{
} }
cur.close(); cur.close();
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
return result; return result;
} }
return result; return result;
...@@ -128,7 +144,7 @@ public class HotSearchListDAO extends MongoDBTemplate{ ...@@ -128,7 +144,7 @@ public class HotSearchListDAO extends MongoDBTemplate{
} }
cur.close(); cur.close();
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
return list; return list;
} }
......
...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.dao; ...@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.dao;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -12,8 +13,8 @@ import com.zhiwei.searchhotcrawler.config.Config; ...@@ -12,8 +13,8 @@ import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
@Log4j2
public class WechatUserDao extends MongoDBTemplate{ public class WechatUserDao extends MongoDBTemplate{
private static Logger logger = LoggerFactory.getLogger(BaiDuHotSearchCrawler.class);
public WechatUserDao() { public WechatUserDao() {
super(); super();
...@@ -39,7 +40,7 @@ public class WechatUserDao extends MongoDBTemplate{ ...@@ -39,7 +40,7 @@ public class WechatUserDao extends MongoDBTemplate{
this.getReadColl().save(doc); this.getReadColl().save(doc);
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
} }
...@@ -61,7 +62,7 @@ public class WechatUserDao extends MongoDBTemplate{ ...@@ -61,7 +62,7 @@ public class WechatUserDao extends MongoDBTemplate{
return (List<String>)doc.get("user"); return (List<String>)doc.get("user");
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
......
...@@ -3,7 +3,9 @@ package com.zhiwei.searchhotcrawler.dao; ...@@ -3,7 +3,9 @@ package com.zhiwei.searchhotcrawler.dao;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -13,10 +15,10 @@ import com.zhiwei.searchhotcrawler.config.Config; ...@@ -13,10 +15,10 @@ import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class WeiboTopicDAO extends MongoDBTemplate{ @Log4j2
private static Logger logger = LoggerFactory.getLogger(WeiboTopicDAO.class); public class WeiboSuperTopicDAO extends MongoDBTemplate{
public WeiboTopicDAO() { public WeiboSuperTopicDAO() {
super(); super();
super.setDbName(Config.dbName); super.setDbName(Config.dbName);
String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd"); String time = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
...@@ -25,6 +27,16 @@ public class WeiboTopicDAO extends MongoDBTemplate{ ...@@ -25,6 +27,16 @@ public class WeiboTopicDAO extends MongoDBTemplate{
String collName = Config.topicCollName + year + "_" + month; String collName = Config.topicCollName + year + "_" + month;
super.setCollName(collName); super.setCollName(collName);
createIndex();
}
/**
* 初次创建表及创建相应的索引
*/
private void createIndex(){
List<DBObject> indexList = this.getReadColl().getIndexInfo();
if(Objects.isNull(indexList) && indexList.isEmpty()){
DBObject countIndexDoc = new BasicDBObject(); DBObject countIndexDoc = new BasicDBObject();
countIndexDoc.put("score_num", -1); countIndexDoc.put("score_num", -1);
DBObject timeIndexDoc = new BasicDBObject(); DBObject timeIndexDoc = new BasicDBObject();
...@@ -45,6 +57,8 @@ public class WeiboTopicDAO extends MongoDBTemplate{ ...@@ -45,6 +57,8 @@ public class WeiboTopicDAO extends MongoDBTemplate{
e.printStackTrace(); e.printStackTrace();
} }
} }
}
/** /**
* 添加数据入库 * 添加数据入库
...@@ -54,7 +68,7 @@ public class WeiboTopicDAO extends MongoDBTemplate{ ...@@ -54,7 +68,7 @@ public class WeiboTopicDAO extends MongoDBTemplate{
try { try {
this.getReadColl().insert(list); this.getReadColl().insert(list);
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
...@@ -62,7 +76,7 @@ public class WeiboTopicDAO extends MongoDBTemplate{ ...@@ -62,7 +76,7 @@ public class WeiboTopicDAO extends MongoDBTemplate{
try { try {
this.getReadColl().insert(doc); this.getReadColl().insert(doc);
} catch (Exception e) { } catch (Exception e) {
logger.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
......
package com.zhiwei.searchhotcrawler.run; package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun; import com.zhiwei.searchhotcrawler.timer.*;
import com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
...@@ -24,7 +16,9 @@ public class HotSearchRun { ...@@ -24,7 +16,9 @@ public class HotSearchRun {
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER, 10000013); SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
new UpdateWechatUserRun().start(); new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000); ZhiWeiTools.sleep(10000);
...@@ -51,6 +45,7 @@ public class HotSearchRun { ...@@ -51,6 +45,7 @@ public class HotSearchRun {
new SougoHotSearchRun().start(); new SougoHotSearchRun().start();
new DouyinHotSearchRun().start(); new DouyinHotSearchRun().start();
new ZhihuHotSearchRun().start(); new ZhihuHotSearchRun().start();
new WeiboSuperTopicRun().start();
new WeiboTopicRun().start(); new WeiboTopicRun().start();
//推送程序启动 //推送程序启动
new SendWeiboHotSearchRun().start(); new SendWeiboHotSearchRun().start();
......
...@@ -16,90 +16,125 @@ import com.mongodb.MongoClient; ...@@ -16,90 +16,125 @@ import com.mongodb.MongoClient;
import com.mongodb.MongoCredential; import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress; import com.mongodb.ServerAddress;
import com.mongodb.WriteResult; import com.mongodb.WriteResult;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HotSearchListTest{ public class HotSearchListTest{
public static void main(String[] args) { public static void main(String[] args) {
MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray()); SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort); .group(ProxyConfig.group).appId(10000013).appName("zzw").build();
Mongo mongo = new MongoClient(address, Arrays.asList(credential)); ProxyFactory.init(simpleConfig);
DB db = mongo.getDB("hot_search_list"); String url = "http://app.myzaker.com/news/app.php?f=";
DBCollection coll = db.getCollection("hot_search_list2019_09"); HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
try{
// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray()); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string();
// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort); Elements elements = Jsoup.parse(htmlBody).select("div.titlebar>a");
// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew)); for(Element element : elements){
// DB dbNew = mongoNew.getDB("hot_search_list"); String lableUrl = "http://app.myzaker.com/news/app.php" + element.attr("href");
System.out.println("lableUrl========="+lableUrl);
Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1); String htmlBodyLable = httpBoot.syncCall(RequestUtils.wrapGet(lableUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
Elements elementsLable = Jsoup.parse(htmlBodyLable).select("div#infinite_scroll>a");
for(Element elementLable : elementsLable){
System.out.println(elementLable.attr("href") + "=============" + elementLable.text());
}
}
timLine.forEach((start, end) ->{ }catch (Exception e){
e.printStackTrace();
}
String year = end.substring(0,4);
String month = end.substring(5,7);
Date startDate = TimeParse.stringFormartDate(start);
Date endDate = TimeParse.stringFormartDate(end);
String collName = "hot_search_list"+year+"_"+month;
System.out.println("collName=========="+collName);
// DBCollection collNew = dbNew.getCollection(collName);
// DBObject countIndexDoc = new BasicDBObject();
// countIndexDoc.put("count", -1);
// DBObject timeIndexDoc = new BasicDBObject();
// timeIndexDoc.put("time", -1);
// DBObject rankIndexDoc = new BasicDBObject();
// rankIndexDoc.put("rank", -1);
// DBObject nameIndexDoc = new BasicDBObject();
// nameIndexDoc.put("name", -1);
// DBObject typeIndexDoc = new BasicDBObject();
// typeIndexDoc.put("type", -1);
// try {
// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
// } catch (Exception e) {
// e.printStackTrace();
// }
DBObject query = new BasicDBObject(new BasicDBObject("time", // MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
new BasicDBObject("$gte",startDate).append("$lte", endDate))); // ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
System.out.println(query); // Mongo mongo = new MongoClient(address, Arrays.asList(credential));
WriteResult wr = coll.remove(query); //
System.out.println("========"+wr.getN()); // DB db = mongo.getDB("hot_search_list");
// int i = 0; // DBCollection coll = db.getCollection("hot_search_list2019_09");
// DBCursor cur = coll.remove(query); //
// System.out.println(query +"======="+ cur.count()); //// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
// List<DBObject> dataList = new ArrayList<>(); //// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
// while(cur.hasNext()) { //// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
// DBObject doc = cur.next(); //// DB dbNew = mongoNew.getDB("hot_search_list");
// try { //
//// collNew.save(doc); // Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
// i++; //
// coll.remove(doc); // timLine.forEach((start, end) ->{
// } catch (Exception e2) { //
// e2.printStackTrace(); // String year = end.substring(0,4);
// } // String month = end.substring(5,7);
// dataList.add(doc); // Date startDate = TimeParse.stringFormartDate(start);
// } // Date endDate = TimeParse.stringFormartDate(end);
// System.out.println(collName +"数据量大小" +dataList.size()); //
// cur.close(); // String collName = "hot_search_list"+year+"_"+month;
// if(!dataList.isEmpty()) { // System.out.println("collName=========="+collName);
// try { //// DBCollection collNew = dbNew.getCollection(collName);
// collNew.insert(dataList); //// DBObject countIndexDoc = new BasicDBObject();
// } catch (Exception e) { //// countIndexDoc.put("count", -1);
// e.printStackTrace(); //// DBObject timeIndexDoc = new BasicDBObject();
// } //// timeIndexDoc.put("time", -1);
// } //// DBObject rankIndexDoc = new BasicDBObject();
}); //// rankIndexDoc.put("rank", -1);
mongo.close(); //// DBObject nameIndexDoc = new BasicDBObject();
//// nameIndexDoc.put("name", -1);
//// DBObject typeIndexDoc = new BasicDBObject();
//// typeIndexDoc.put("type", -1);
//// try {
//// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
//// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
//// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
//// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
//// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//
// DBObject query = new BasicDBObject(new BasicDBObject("time",
// new BasicDBObject("$gte",startDate).append("$lte", endDate)));
// System.out.println(query);
// WriteResult wr = coll.remove(query);
// System.out.println("========"+wr.getN());
//// int i = 0;
//// DBCursor cur = coll.remove(query);
//// System.out.println(query +"======="+ cur.count());
//// List<DBObject> dataList = new ArrayList<>();
//// while(cur.hasNext()) {
//// DBObject doc = cur.next();
//// try {
////// collNew.save(doc);
//// i++;
//// coll.remove(doc);
//// } catch (Exception e2) {
//// e2.printStackTrace();
//// }
//// dataList.add(doc);
//// }
//// System.out.println(collName +"数据量大小" +dataList.size());
//// cur.close();
//// if(!dataList.isEmpty()) {
//// try {
//// collNew.insert(dataList);
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//// }
// });
// mongo.close();
} }
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -16,10 +17,9 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; ...@@ -16,10 +17,9 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class BaiduHotSearchRun extends Thread{ public class BaiduHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(BaiduHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
...@@ -37,10 +37,10 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -37,10 +37,10 @@ public class BaiduHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
logger.info("百度风云榜采集开始........"); log.info("百度风云榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch(); List<HotSearchList> list = BaiDuHotSearchCrawler.baiduHotSearch();
logger.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮百度风云榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> saveDataList = new ArrayList<>(); List<DBObject> saveDataList = new ArrayList<>();
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
list.forEach(baiduHotSearch ->{ list.forEach(baiduHotSearch ->{
...@@ -59,7 +59,7 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -59,7 +59,7 @@ public class BaiduHotSearchRun extends Thread{
}); });
} }
hotSearchDAO.addHotSearchList(saveDataList); hotSearchDAO.addHotSearchList(saveDataList);
logger.info("百度风云榜采集结束........"); log.info("百度风云榜采集结束........");
} }
} }
\ No newline at end of file
...@@ -5,6 +5,7 @@ import java.util.Date; ...@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -15,10 +16,9 @@ import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler; ...@@ -15,10 +16,9 @@ import com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class DouyinHotSearchRun extends Thread{ public class DouyinHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(DouyinHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
...@@ -40,10 +40,10 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -40,10 +40,10 @@ public class DouyinHotSearchRun extends Thread{
* @return void * @return void
*/ */
private void getHotList() { private void getHotList() {
logger.info("抖音热搜榜采集开始........"); log.info("抖音热搜榜采集开始........");
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
logger.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList douyinHotSearch : list){ for(HotSearchList douyinHotSearch : list){
int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch); int changeCount = hotSearchDAO.getChangeCount(douyinHotSearch);
...@@ -60,7 +60,7 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -60,7 +60,7 @@ public class DouyinHotSearchRun extends Thread{
data.add(douyin); data.add(douyin);
hotSearchDAO.addHotSearch(douyin); hotSearchDAO.addHotSearch(douyin);
} }
logger.info("抖音热搜榜采集结束........"); log.info("抖音热搜榜采集结束........");
} }
} }
...@@ -6,6 +6,7 @@ import java.util.HashMap; ...@@ -6,6 +6,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -20,17 +21,17 @@ import com.zhiwei.searchhotcrawler.util.WechatConstant; ...@@ -20,17 +21,17 @@ import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class SendWeiboHotSearchRun extends Thread { public class SendWeiboHotSearchRun extends Thread {
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao(); private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class);
@Override @Override
public void run() { public void run() {
while (true) { while (true) {
try { try {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY); int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("微博推送,当前系统时间为:" + hour); log.info("微博推送,当前系统时间为:" + hour);
if (hour > 6 && hour < 23) { if (hour > 6 && hour < 23) {
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name()); List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
if (list != null && !list.isEmpty()) { if (list != null && !list.isEmpty()) {
...@@ -41,14 +42,14 @@ public class SendWeiboHotSearchRun extends Thread { ...@@ -41,14 +42,14 @@ public class SendWeiboHotSearchRun extends Thread {
sendTemplateByUserIds(title, time, url); sendTemplateByUserIds(title, time, url);
} }
} else { } else {
logger.info("微博最近一小时无数据"); log.info("微博最近一小时无数据");
sendTemplateByUserIds("最近一小时无数据", sendTemplateByUserIds("最近一小时无数据",
TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
} }
} }
ZhiWeiTools.sleep(1 * 60 * 60 * 1000); ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
} catch (Exception e) { } catch (Exception e) {
logger.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace()); log.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace());
ZhiWeiTools.sleep(1 * 60 * 60 * 1000); ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
continue; continue;
} }
...@@ -100,7 +101,7 @@ public class SendWeiboHotSearchRun extends Thread { ...@@ -100,7 +101,7 @@ public class SendWeiboHotSearchRun extends Thread {
WechatCodeUtil.sendDataJson(templateJson); WechatCodeUtil.sendDataJson(templateJson);
} }
} else { } else {
logger.info("拉取微博用户列表失败"); log.info("拉取微博用户列表失败");
} }
} }
......
...@@ -6,6 +6,7 @@ import java.util.HashMap; ...@@ -6,6 +6,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -20,10 +21,10 @@ import com.zhiwei.searchhotcrawler.util.WechatConstant; ...@@ -20,10 +21,10 @@ import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class SendZhihuHotSearchRun extends Thread{ public class SendZhihuHotSearchRun extends Thread{
private HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao(); private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override @Override
public void run() { public void run() {
...@@ -31,7 +32,7 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -31,7 +32,7 @@ public class SendZhihuHotSearchRun extends Thread{
try { try {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY); int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("知乎推送,当前系统时间为:"+hour); log.info("知乎推送,当前系统时间为:"+hour);
if(hour > 6 && hour <23){ if(hour > 6 && hour <23){
List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name()); List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
if(list!=null && !list.isEmpty()){ if(list!=null && !list.isEmpty()){
...@@ -44,13 +45,13 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -44,13 +45,13 @@ public class SendZhihuHotSearchRun extends Thread{
} }
} }
}else{ }else{
logger.info("知乎最近一小时无数据"); log.info("知乎最近一小时无数据");
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
} }
} }
ZhiWeiTools.sleep(1*60*60*1000); ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) { } catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace()); log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000); ZhiWeiTools.sleep(1*60*60*1000);
} }
} }
...@@ -101,7 +102,7 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -101,7 +102,7 @@ public class SendZhihuHotSearchRun extends Thread{
WechatCodeUtil.sendDataJson(templateJson); WechatCodeUtil.sendDataJson(templateJson);
} }
}else { }else {
logger.info("知乎推送拉取用户列表失败"); log.info("知乎推送拉取用户列表失败");
} }
} }
......
...@@ -5,6 +5,7 @@ import java.util.Date; ...@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -15,8 +16,8 @@ import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler; ...@@ -15,8 +16,8 @@ import com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class SougoHotSearchRun extends Thread { public class SougoHotSearchRun extends Thread {
private static Logger logger = LoggerFactory.getLogger(SougoHotSearchRun.class);
@Override @Override
public void run() { public void run() {
...@@ -36,9 +37,9 @@ public class SougoHotSearchRun extends Thread { ...@@ -36,9 +37,9 @@ public class SougoHotSearchRun extends Thread {
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("搜狗微信采集开始........"); log.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
logger.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList sougoHotSearch : list){ for(HotSearchList sougoHotSearch : list){
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
...@@ -52,7 +53,7 @@ public class SougoHotSearchRun extends Thread { ...@@ -52,7 +53,7 @@ public class SougoHotSearchRun extends Thread {
data.add(doc); data.add(doc);
} }
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
logger.info("搜狗微信采集结束........"); log.info("搜狗微信采集结束........");
} }
} }
...@@ -5,6 +5,7 @@ import java.util.List; ...@@ -5,6 +5,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -12,24 +13,24 @@ import com.zhiwei.searchhotcrawler.dao.WechatUserDao; ...@@ -12,24 +13,24 @@ import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class UpdateWechatUserRun extends Thread{ public class UpdateWechatUserRun extends Thread{
private WechatUserDao wechatUserDao = new WechatUserDao(); private WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(UpdateWechatUserRun.class);
@Override @Override
public void run() { public void run() {
logger.info("开始更新用户数据"); log.info("开始更新用户数据");
while(true) { while(true) {
try { try {
Calendar calendar = Calendar.getInstance(); Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY); int hour = calendar.get(Calendar.HOUR_OF_DAY);
if(hour > 6 ){ if(hour > 6 ){
Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp(); Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp();
logger.info("此公众号的分组数量为:::{}", groupMap.size()); log.info("此公众号的分组数量为:::{}", groupMap.size());
if(!groupMap.isEmpty() && groupMap!=null){ if(!groupMap.isEmpty() && groupMap!=null){
for(Entry<String,Integer> group : groupMap.entrySet()){ for(Entry<String,Integer> group : groupMap.entrySet()){
logger.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue()); log.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue());
List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue()); List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue());
logger.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size()); log.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size());
if(userList!=null && !userList.isEmpty()){ if(userList!=null && !userList.isEmpty()){
wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue()); wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue());
} }
...@@ -38,7 +39,7 @@ public class UpdateWechatUserRun extends Thread{ ...@@ -38,7 +39,7 @@ public class UpdateWechatUserRun extends Thread{
} }
ZhiWeiTools.sleep(1*60*60*1000); ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) { } catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace()); log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000); ZhiWeiTools.sleep(1*60*60*1000);
continue; continue;
} }
......
...@@ -5,6 +5,7 @@ import java.util.Date; ...@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -15,10 +16,9 @@ import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; ...@@ -15,10 +16,9 @@ import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class WeiboHotSearchRun extends Thread{ public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
...@@ -36,11 +36,11 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -36,11 +36,11 @@ public class WeiboHotSearchRun extends Thread{
private void getHotList() { private void getHotList() {
logger.info("微博话题采集开始........"); log.info("微博话题采集开始........");
HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO(); HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch(); // List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){ for(HotSearchList weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch); int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
...@@ -49,7 +49,7 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -49,7 +49,7 @@ public class WeiboHotSearchRun extends Thread{
doc.put("name", weiboHotSearch.getName()); doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl()); doc.put("url", weiboHotSearch.getUrl());
doc.put("count", weiboHotSearch.getCount()); doc.put("count", weiboHotSearch.getCount());
doc.put("hot", weiboHotSearch.isHot()); doc.put("hot", weiboHotSearch.getHot());
doc.put("day", weiboHotSearch.getDay()); doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime()); doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount); doc.put("changeCount", changeCount);
...@@ -59,7 +59,7 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -59,7 +59,7 @@ public class WeiboHotSearchRun extends Thread{
data.add(doc); data.add(doc);
} }
weiboHotSearchDAO.addHotSearchList(data); weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........"); log.info("微博话题采集结束........");
} }
} }
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class WeiboSuperTopicRun extends Thread{
@Override
public void run() {
boolean f = true;
while(f) {
try {
getTopicList();
TimeUnit.HOURS.sleep(3);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getTopicList() {
WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
log.info("微博超话采集开始........");
List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
log.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(WeiboSuperTopic topic : list){
log.info("topic::::{}", topic);
DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("url", topic.getUrl());
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
log.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.slf4j.Logger; @Log4j2
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboTopic;
import com.zhiwei.searchhotcrawler.crawler.WeiboHuatiCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboTopicDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WeiboTopicRun extends Thread{ public class WeiboTopicRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboTopicRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
while(f) { while(f) {
try { try {
getTopicList(); getTopicList();
TimeUnit.HOURS.sleep(3); TimeUnit.MINUTES.sleep(3);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000); ZhiWeiTools.sleep(60*60*1000);
...@@ -36,28 +31,29 @@ public class WeiboTopicRun extends Thread{ ...@@ -36,28 +31,29 @@ public class WeiboTopicRun extends Thread{
private void getTopicList() { private void getTopicList() {
WeiboTopicDAO weiboTopicDAO = new WeiboTopicDAO(); HotSearchListDAO weiboHotSearchDAO = new HotSearchListDAO();
logger.info("微博超话采集开始........"); log.info("微博话题采集开始........");
List<WeiboTopic> list = WeiboHuatiCrawler.startCrawler(); List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone();
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){ for(HotSearchList topic : list){
logger.info("topic::::{}", topic); log.info("topic::::{}", topic);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId()); doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName()); doc.put("name", topic.getName());
doc.put("rank", topic.getRank()); doc.put("url", topic.getUrl());
doc.put("score_num", topic.getScore()); doc.put("count", topic.getCount());
doc.put("fensi_num", topic.getFensi()); doc.put("hot", topic.getHot());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay()); doc.put("day", topic.getDay());
doc.put("time", topic.getTime()); doc.put("time", topic.getTime());
doc.put("url", topic.getUrl()); doc.put("rank", topic.getRank());
doc.put("type", topic.getType());
doc.put("topic_lead", topic.getTopicLead());
doc.put("comment_count", topic.getCommentCount());
data.add(doc); data.add(doc);
} }
weiboTopicDAO.addTopicList(data); weiboHotSearchDAO.addHotSearchList(data);
logger.info("微博话题采集结束........"); log.info("微博话题采集结束........");
} }
} }
...@@ -4,6 +4,7 @@ import java.util.Date; ...@@ -4,6 +4,7 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.extern.log4j.Log4j2;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -14,10 +15,9 @@ import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler; ...@@ -14,10 +15,9 @@ import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public class ZhihuHotSearchRun extends Thread{ public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
@Override @Override
public void run() { public void run() {
boolean f = true; boolean f = true;
...@@ -34,22 +34,20 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -34,22 +34,20 @@ public class ZhihuHotSearchRun extends Thread{
} }
private void getHotList() { private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO(); HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
logger.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName()); log.info("知乎话题采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> mobilelist = ZhihuHotSearchCrawler.getMobileZhihuHotList();
list.addAll(mobilelist); list.addAll(mobilelist);
logger.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
for(HotSearchList zhihuHotSearch : list){ for(HotSearchList zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject(); DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getId()); zhihu.put("_id", zhihuHotSearch.getId());
zhihu.put("name", zhihuHotSearch.getName()); zhihu.put("name", zhihuHotSearch.getName());
zhihu.put("url", zhihuHotSearch.getUrl()); zhihu.put("url", zhihuHotSearch.getUrl());
zhihu.put("count", zhihuHotSearch.getCount()); zhihu.put("count", zhihuHotSearch.getCount());
zhihu.put("hot", zhihuHotSearch.isHot()); zhihu.put("hot", zhihuHotSearch.getHot());
zhihu.put("day", zhihuHotSearch.getDay()); zhihu.put("day", zhihuHotSearch.getDay());
zhihu.put("time", zhihuHotSearch.getTime()); zhihu.put("time", zhihuHotSearch.getTime());
zhihu.put("changeCount", 0); zhihu.put("changeCount", 0);
...@@ -57,7 +55,7 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -57,7 +55,7 @@ public class ZhihuHotSearchRun extends Thread{
zhihu.put("type", zhihuHotSearch.getType()); zhihu.put("type", zhihuHotSearch.getType());
hotSearchDAO.addHotSearch(zhihu); hotSearchDAO.addHotSearch(zhihu);
} }
logger.info("知乎话题采集结束........"); log.info("知乎话题采集结束........");
} }
} }
...@@ -3,8 +3,8 @@ mongoIp=192.168.0.101 ...@@ -3,8 +3,8 @@ mongoIp=192.168.0.101
mongoPort=30000 mongoPort=30000
#mongoIp=192.168.0.81 #mongoIp=192.168.0.81
#mongoPort=27017 #mongoPort=27017
db.username=datapush db.username=searchhotcrawleruser
db.paasword=4d8ce5c42073c db.paasword=searchhotcrawler1q2w3e4r
db.certifiedDB=admin db.certifiedDB=admin
dbName=hot_search_list dbName=hot_search_list
searchCollName=hot_search_list searchCollName=hot_search_list
......
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182 registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
group=hangzhou group=hangzhou
######################################################## ########################################################
#registry=zookeeper://192.168.0.36:2181 #registry=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
#group=local #group=local
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment