Commit a1ef24dc by zhiwei

第一次提交热搜采集项目

parents
# 榜单爬虫
#### 版本
- [2018年04月24日16:00:00]
#### 爬虫说明
1.按照1分钟抓取微博热搜榜数据,按照1小时推送
1.按照1分钟抓取知乎热搜榜数据,按照1小时推送
#### 运行说明
1. 目前运行地址:nbzhiwei@192.168.0.104/home/nbzhiwei/crawler/weibohot_topic/
2. 运行命令:cd $path && nohup java -jar searchhotcrawler-0.0.3-SNAPSHOT.jar &
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>searchhotcrawler</artifactId>
<name>各平台热搜榜单采集程序</name>
<version>0.0.3-SNAPSHOT</version>
<description>各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
<developers>
<developer>
<id>Bewilder</id>
<name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email>
</developer>
</developers>
<build>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer>
<mainClass>com.zhiwei.searchhotcrawler.run.HotSearchRun</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
<configuration>
<attach>true</attach>
</configuration>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
</project>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>searchhotcrawler</artifactId>
<version>0.0.3-SNAPSHOT</version>
<name>各平台热搜榜单采集程序</name>
<description>各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<developers>
<developer>
<id>Bewilder</id>
<name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>zhiweiTools</artifactId>
<version>0.0.6-SNAPSHOT</version>
</dependency>
<!-- 数据解析jar -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.6.3</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>sendmail</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.zhiwei.searchhotcrawler.run.HotSearchRun</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- 解决maven test命令时console出现中文乱码乱码 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine>
<skipTests>true</skipTests>
</configuration>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package com.zhiwei.searchhotcrawler.bean;
/**
* @ClassName: WeiboHotSearch
* @Description: TODO(微博时时热搜)
* @author hero
* @date 2017年9月26日 下午5:41:11
*/
import java.io.Serializable;
import java.util.Date;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public class WeiboHotSearch implements Serializable{
private static final long serialVersionUID = 2076919584659821600L;
private String id; //主键
private String url; //消息链接
private String name; //热搜关键词
private int count; //时时热搜量
private boolean hot; //状态(true 为热搜; false为时时上升)
private String day; //天
private Date time; //时间
private int changeCount; //据上分钟变化量
public WeiboHotSearch(){}
public WeiboHotSearch(String url, String name, int count,boolean hot){
this.id = name + "_" + new Date().getTime();
this.url = url;
this.name = name;
this.count = count;
this.hot = hot;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
}
@Override
public String toString(){
return "new WeiboHotSearch["
+ "id = " + id
+ ", url = " + url
+ ", name = " + name
+ ", count = " + count
+ ", time = " + time
+ ", hot = " + hot
+ ", day = " + day
+ ", changeCount = " + changeCount
+ "]";
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public int getChangeCount() {
return changeCount;
}
public void setChangeCount(int changeCount) {
this.changeCount = changeCount;
}
public static long getSerialversionuid() {
return serialVersionUID;
}
public boolean isHot() {
return hot;
}
public void setHot(boolean hot) {
this.hot = hot;
}
public String getDay() {
return day;
}
public void setDay(String day) {
this.day = day;
}
}
package com.zhiwei.searchhotcrawler.bean;
import java.io.Serializable;
import java.util.Date;
public class ZhihuHotSearch implements Serializable{
private static final long serialVersionUID = -7707110236217797510L;
private String url; //消息链接
private String query; //热搜关键词
private String displayQuery; //热搜关键词
private Date time; //时间
public ZhihuHotSearch(){}
public ZhihuHotSearch(String url, String query, String displayQuery, Date time){
this.url = url;
this.query = query;
this.displayQuery = displayQuery;
this.time = time;
}
@Override
public String toString(){
return "new ZhihuHotSearch["
+ "url = " + url
+ ", query = " + query
+ ", displayQuery = " + displayQuery
+ ", time = " + time
+ "]";
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public String getDisplayQuery() {
return displayQuery;
}
public void setDisplayQuery(String displayQuery) {
this.displayQuery = displayQuery;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
}
package com.zhiwei.searchhotcrawler.config;
import java.io.InputStream;
import java.util.Properties;
public class Config {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("db.properties");
conf = new Properties();
conf.load(is);
is.close();
mongoIp = conf.getProperty("mongoIp");
mongoPort = Integer.valueOf(conf.getProperty("mongoPort"));
userName = conf.getProperty("db.username");
userPwd = conf.getProperty("db.paasword");
authDB = conf.getProperty("db.certifiedDB");
dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName");
collZhihuName = conf.getProperty("collZhihuName");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String mongoIp;
public static int mongoPort;
public static String userName;
public static String userPwd;
public static String authDB;
public static String dbName;
public static String collWeiboName;
public static String collZhihuName;
}
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.zhiweiTools.tools.URLCodeUtil;
/**
* @ClassName: WeiboHotSearch
* @Description: TODO(微博实时热搜采集)
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
public class WeiboHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchCrawler.class);
/**
* @Title: weiboHotSearchTest
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<WeiboHotSearch> weiboHotSearch(){
String url = "http://s.weibo.com/top/summary?cate=realtimehot";
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Referer", "http://s.weibo.com/top/summary?cate=realtimehot");
headerMap.put("Host", "s.weibo.com");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36");
headerMap.put("Upgrade-Insecure-Requests", "1");
List<WeiboHotSearch> list = new ArrayList<WeiboHotSearch>();
for(int i =0; i<3; i++){
String htmlBody = null;
try {
htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try {
String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
script = script.replace("(", "").replace(")", "");
JSONObject json = JSONObject.parseObject(script);
String html = json.getString("html");
Document document = Jsoup.parse(html);
Elements elements = document.select("tbody").select("tr");
for(Element element : elements){
try {
String id = "http://s.weibo.com"+element.select("p.star_name").select("a").attr("href");
String name = element.select("p.star_name").select("a").text();
String num = !element.select("p.star_num").text().equals("")?element.select("p.star_num").text():"0";
int hotCount = Integer.valueOf(num);
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, true);
list.add(hotSearch);
} catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace());
continue;
}
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null;
}
}else{
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
break;
} catch (Exception e) {
if(i==2){
return list;
}else{
continue;
}
}
}
return list;
}
/**
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @param 设定文件
* @return void 返回类型
*/
public static List<WeiboHotSearch> weiboHotSearchByPhone(){
String url = "";
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Host", "mapi.weibo.com");
headerMap.put("User-Agent", "Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)");
List<WeiboHotSearch> result = new ArrayList<WeiboHotSearch>();
String htmlBody;
try {
htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if(htmlBody!=null){
try {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONArray cards = json.getJSONArray("cards");
for(int i=0;i<cards.size();i++){
try {
JSONObject card = cards.getJSONObject(i);
JSONArray card_group = card.getJSONArray("card_group");
String title = card.getString("title");
boolean hot = true;
if(title.contains("实时上升热点")){
hot = false;
}
for(int j=0; j<card_group.size(); j++){
JSONObject cardInfo = card_group.getJSONObject(j);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, hot);
logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch);
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace());
continue;
}
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
return null;
}
}else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
} catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1.fillInStackTrace());
}
return result;
}
}
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.zhiweiTools.tools.URLCodeUtil;
/**
* @ClassName: ZhihuHotCrawler
* @Description: TODO(知乎热搜采集程序)
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
public class ZhihuHotSearchCrawler {
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchCrawler.class);
/**
* @Title: getZhihuHotList
* @author hero
* @Description: 知乎热搜采集程序
* @param 设定文件
* @return void 返回类型
*/
public static List<ZhihuHotSearch> getZhihuHotList(){
List<ZhihuHotSearch> list = null;
String url = "https://www.zhihu.com/api/v4/search/top_search";
String rerferer = "https://www.zhihu.com/search?type=content&q=%E5%BF%AB%E6%89%8B";
Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
headerMap.put("Host", "www.zhihu.com");
headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
headerMap.put("accept", "application/json, text/plain, */*");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
headerMap.put("Referer", rerferer);
for(int j=0;j<3;j++){
try {
String htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if(htmlBody != null){
if(htmlBody.contains("words")){
list = new ArrayList<ZhihuHotSearch>();
JSONObject top_search = JSONObject.parseObject(htmlBody);
JSONArray words = top_search.getJSONObject("top_search").getJSONArray("words");
String link = null;
String display_query = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i);
query = word.getString("query");
display_query = word.getString("display_query");
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
ZhihuHotSearch zhihu = new ZhihuHotSearch(link, query, display_query,new Date());
list.add(zhihu);
}
break;
}
}
} catch (IOException e) {
logger.debug("获取知乎热搜时出现问题:{}", e.fillInStackTrace());
continue;
}
}
return list;
}
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class WeiboHotSearchDAO extends MongoDBTemplate{
public WeiboHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collWeiboName);
}
/**
* @Title: addWeiboHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
@SuppressWarnings("deprecation")
public void addWeiboHotSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list, WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
e.printStackTrace();
continue;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public int getChangeCount(WeiboHotSearch weiboHotSearch){
int result = 0;
DBObject query = new BasicDBObject();
query.put("name", weiboHotSearch.getName());
DBObject sort = new BasicDBObject();
sort.put("time", -1);
try {
DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
while(cur.hasNext()){
DBObject doc = cur.next();
result = weiboHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
break;
}
cur.close();
} catch (Exception e) {
e.printStackTrace();
return result;
}
return result;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getWeiboHotOneHour(){
List<DBObject> list = new ArrayList<DBObject>();
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
query.put("changeCount", 0);
List<String> nameList = new ArrayList<String>();
try {
DBCursor cur = this.getReadColl().find(query);
while(cur.hasNext()){
DBObject doc = cur.next();
String name = doc.get("name").toString();
if(!nameList.contains(name)){
nameList.add(name);
list.add(doc);
}
}
nameList.clear();
cur.close();
} catch (Exception e) {
return null;
}
return list;
}
}
package com.zhiwei.searchhotcrawler.dao;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.WriteConcern;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class ZhihuHotSearchDAO extends MongoDBTemplate{
public ZhihuHotSearchDAO() {
super();
super.setDbName(Config.dbName);
super.setCollName(Config.collZhihuName);
}
@SuppressWarnings("deprecation")
public void addZhiHuHotSearch(DBObject zhihu){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(zhihu,WriteConcern.SAFE);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
continue;
}
}
}
/**
* @Title: getZhiHuHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public List<DBObject> getZhiHuHotSearch(){
List<DBObject> list = null;
try {
Date date = new Date((new Date().getTime()-60*60*1000));
DBObject query = new BasicDBObject();
query.put("time", new BasicDBObject("$gte", date));
long count = this.getReadColl().count(query);
if(count>0){
list = new ArrayList<DBObject>();
DBCursor cur = this.getReadColl().find(query);
while(cur.hasNext()){
DBObject doc = cur.next();
list.add(doc);
}
cur.close();
}
return list;
} catch (Exception e) {
e.printStackTrace();
return list;
}
}
}
package com.zhiwei.searchhotcrawler.dbtemplate;
import java.util.Arrays;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.Mongo;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.MongoException;
import com.mongodb.ServerAddress;
import com.zhiwei.searchhotcrawler.config.Config;
/**
*
* @Description: MongoDB模板类
* @author Tou Tang
* @date 2014-11-14 下午3:24:40
*/
public class MongoDBTemplate {
protected static Mongo reader;
protected static Mongo writer;
protected String collName;
protected String dbName;
@SuppressWarnings("deprecation")
public MongoDBTemplate() {
try {
MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
if(reader==null)
{
reader = new MongoClient(address, Arrays.asList(credential));
}
if(writer==null)
{
writer = new MongoClient(address, Arrays.asList(credential));
}
} catch (MongoException e) {
e.printStackTrace();
}
}
public DBCollection getReadColl() {
@SuppressWarnings("deprecation")
final DB db = getReader().getDB(dbName);
final DBCollection coll = db.getCollection(collName);
return coll;
}
protected Mongo getReader() {
return reader;
}
public DBCollection getWriteColl() {
@SuppressWarnings("deprecation")
final DB db = getWriter().getDB(dbName);
final DBCollection coll = db.getCollection(collName);
return coll;
}
protected Mongo getWriter() {
return writer;
}
protected void setCollName(final String collName) {
this.collName = collName;
}
protected void setDbName(final String dbName) {
this.dbName = dbName;
}
@SuppressWarnings("static-access")
protected void setReader(final Mongo reader) {
this.reader = reader;
}
@SuppressWarnings("static-access")
protected void setWriter(final Mongo writer) {
this.writer = writer;
}
public static void main(String[] args) {
}
}
package com.zhiwei.searchhotcrawler.mail;
import com.zhiwei.sendmail.SendMail;
import com.zhiwei.sendmail.bean.MailInfo;
public class SendMailWeibo {
/**
* @Title: sendMail
* @Description: TODO(发送邮件)
* @param @param mailContent
* @param @param email
* @param @return 设定文件
* @return boolean 返回类型
*/
public static boolean sendMail(String mailContent,String email){
//这个类主要是设置邮件
String mailServerHost = "smtp.mxhichina.com";
String mailServerPort = "25";
String fromAddress = "zhangzhiwei@bewilderhk.com";
String toAddress = email;
String userName = "zhangzhiwei@bewilderhk.com";
String password = "OLP1437z..";
boolean validate = true;
String subject = "微博实时热搜榜采集程序";
String content = mailContent;
MailInfo mailInfo = new MailInfo(mailServerHost, mailServerPort, fromAddress, toAddress, userName, password, validate, subject, content,null);
boolean f=false;
//这个类主要来发送邮件
f = SendMail.sendMailByHtml(mailInfo);//发送文体格式
return f;
}
/**
* 测试发送邮件
*/
// public static void main(String[] args) {
// SendMailDaoImpl sendMail=new SendMailDaoImpl();
// String mailSubject="国家核电重点规则追踪提醒";
// String mailContent="消息内容:考或不考,成功的路就在那里,冥冥之中就会有一双手帮你。但如果只是为了一纸文凭,奉劝大家不要将有限的生命投入到无限的为考试而考之中。如果真的发自内心想要学英语,一定是会说会用,才算会英语!<br/><br/>地址:www.baidu.com<br/><br/> 本消息由国家核电舆情系统发送(请勿回复)";
// String[] email={"859548429@qq.com"};
// sendMail.SendMail(mailContent, email);
//
// }
}
package com.zhiwei.searchhotcrawler.run;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
public class HotSearchRun {
private ScheduledExecutorService scheduExec;
public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(4);
}
public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 1000, 60 * 1000, TimeUnit.MILLISECONDS);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 1000, 60 * 1000 , TimeUnit.MILLISECONDS);
scheduExec.scheduleAtFixedRate(new SendZhihuHotSearchRun(), 1000, 60 * 60 * 1000 , TimeUnit.MILLISECONDS);
scheduExec.scheduleAtFixedRate(new SendWeiboHotSearchRun(), 1000, 60 * 60 * 1000 , TimeUnit.MILLISECONDS);
}
public static void main(String[] args) {
new HotSearchRun().showTimer();
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public class SendWeiboHotSearchRun extends Thread{
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
@Override
public void run() {
List<DBObject> list = weiboHotSearchDAO.getWeiboHotOneHour();
System.out.println(list.size());
if(list!=null && list.size()>0){
for(DBObject weibo : list){
String title = weibo.get("name").toString();
String time = TimeParse.dateFormartString((Date)weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
String url = weibo.get("url").toString();
sendTemplateByUserIds(title, time, url);
}
}else{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
}
/**
* @Title: sendTemplateByUserIds
* @author hero
* @Description: 发送模版消息
* @param @param microTouTiao
* @param @param userList 设定文件
* @return void 返回类型
*/
public static void sendTemplateByUserIds(String title,String time, String url) {
Map<String, Object> dataMap = new HashMap<>();
JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自微博热搜榜的预警通知。");
dataMap.put("first", first);
JSONObject keyword1 = new JSONObject();
keyword1.put("value", title);
keyword1.put("color", "#173177");
dataMap.put("keyword1", keyword1);
JSONObject keyword2 = new JSONObject();
keyword2.put("value", "微博热搜榜");
keyword2.put("color", "#173177");
dataMap.put("keyword2", keyword2);
JSONObject keyword3 = new JSONObject();
keyword3.put("value", time);
keyword3.put("color", "#173177");
dataMap.put("keyword3", keyword3);
JSONObject remark = new JSONObject();
remark.put("value", "知微情报监测服务");
dataMap.put("remark", remark);
List<String> userList = getUserList();
for (String openId : userList) {
Template template = new Template();
template.setTouser(openId);
if(url!=null){
template.setUrl(url);
}
template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template.setData(dataMap);
JSONObject templateJson = (JSONObject)JSONObject.toJSON(template);
WechatCodeUtil.sendDataJson(templateJson);
}
}
/**
* @Title: getUserList
* @author hero
* @Description: 用户列表
* @param @param projectName
* @param @return 设定文件
* @return List<String> 返回类型
*/
public static List<String> getUserList(){
List<String> userList = new ArrayList<String>();
userList.add("o_J5m0ZXyC5MBb2hiwFzyUYpo1Fw");
return userList;
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
@Override
public void run() {
List<DBObject> list = zhihuHotSearchDAO.getZhiHuHotSearch();
if(list!=null && list.size()>0){
for(DBObject zhihu : list){
String title = zhihu.get("display_query").toString();
String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
String url = zhihu.get("_id").toString();
sendTemplateByUserIds(title, time, url);
}
}else{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
}
/**
* @Title: sendTemplateByUserIds
* @author hero
* @Description: 发送模版消息
* @param @param microTouTiao
* @param @param userList 设定文件
* @return void 返回类型
*/
public static void sendTemplateByUserIds(String title,String time, String url) {
Map<String, Object> dataMap = new HashMap<>();
JSONObject first = new JSONObject();
first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
dataMap.put("first", first);
JSONObject keyword1 = new JSONObject();
keyword1.put("value", title);
keyword1.put("color", "#173177");
dataMap.put("keyword1", keyword1);
JSONObject keyword2 = new JSONObject();
keyword2.put("value", "知乎热搜榜");
keyword2.put("color", "#173177");
dataMap.put("keyword2", keyword2);
JSONObject keyword3 = new JSONObject();
keyword3.put("value", time);
keyword3.put("color", "#173177");
dataMap.put("keyword3", keyword3);
JSONObject remark = new JSONObject();
remark.put("value", "知微情报监测服务");
dataMap.put("remark", remark);
List<String> userList = getUserList();
for (String openId : userList) {
Template template = new Template();
template.setTouser(openId);
if(url!=null){
template.setUrl(url);
}
template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template.setData(dataMap);
JSONObject templateJson = (JSONObject)JSONObject.toJSON(template);
WechatCodeUtil.sendDataJson(templateJson);
}
}
/**
* @Title: getUserList
* @author hero
* @Description: 用户列表
* @param @param projectName
* @param @return 设定文件
* @return List<String> 返回类型
*/
public static List<String> getUserList(){
List<String> userList = new ArrayList<String>();
userList.add("o_J5m0ZXyC5MBb2hiwFzyUYpo1Fw"); //mine
//JD组
userList.add("o_J5m0Ypn3DtKCum3tViKqDNFSN8"); //孙熠
userList.add("o_J5m0YgHuKvrbhxut4oukyi6nAE"); //demon波仔
userList.add("o_J5m0e5KQVI3D4InxWgJizW8LHY"); //R
userList.add("o_J5m0QslOcghBKzodrixE2yXbnk"); //尘埃眠于光年
//LP组
userList.add("o_J5m0USUBdmXsq-z_sgMwVpYvE4"); //胡芸莹
userList.add("o_J5m0a7SRz9C4wnzuZZqlY84s4A"); //汪宏帅
userList.add("o_J5m0ejlbqnO3nUNQPec_gooB_w"); //兢兢จุ๊บ
userList.add("o_J5m0Y8-cAjslRAz7HQXMg1vn3I"); //荠萸
userList.add("o_J5m0SwoXFU9itzq3s6WTDYD-rE"); //mogu
userList.add("o_J5m0UDO0qIVvo7EN7AM5oldycM"); //Judy
userList.add("o_J5m0XDzVVM4Lb_CT8utIazdVRo"); //Booming
userList.add("o_J5m0cy46a49Ta1h5z-jTGLfxg4"); //拾壹.
userList.add("o_J5m0XomZ0MtCHNPfaiVY-CJCpY"); //以陌
return userList;
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.WeiboHotSearch;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
public class WeiboHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(WeiboHotSearchRun.class);
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
@Override
public void run() {
logger.info("微博话题采集开始........");
List<WeiboHotSearch> list = WeiboHotSearchCrawler.weiboHotSearch();
List<DBObject> data = new ArrayList<DBObject>();
for(WeiboHotSearch weiboHotSearch : list){
int changeCount = weiboHotSearchDAO.getChangeCount(weiboHotSearch);
DBObject doc = new BasicDBObject();
doc.put("_id", weiboHotSearch.getId());
doc.put("name", weiboHotSearch.getName());
doc.put("url", weiboHotSearch.getUrl());
doc.put("count", weiboHotSearch.getCount());
doc.put("hot", weiboHotSearch.isHot());
doc.put("day", weiboHotSearch.getDay());
doc.put("time", weiboHotSearch.getTime());
doc.put("changeCount", changeCount);
logger.info("{}, 话题名字:{}",new Date(), weiboHotSearch.getName());
data.add(doc);
}
weiboHotSearchDAO.addWeiboHotSearch(data);
logger.info("微博话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch;
import com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
public class ZhihuHotSearchRun extends Thread{
private static Logger logger = LoggerFactory.getLogger(ZhihuHotSearchRun.class);
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
@Override
public void run() {
logger.info("知乎话题采集开始........");
List<ZhihuHotSearch> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<DBObject> data = new ArrayList<DBObject>();
for(ZhihuHotSearch zhihuHotSearch : list){
DBObject zhihu = new BasicDBObject();
zhihu.put("_id", zhihuHotSearch.getUrl());
zhihu.put("query", zhihuHotSearch.getQuery());
zhihu.put("display_query", zhihuHotSearch.getDisplayQuery());
zhihu.put("time", zhihuHotSearch.getTime());
logger.info("{}, 知乎话题名字:{}",new Date(), zhihuHotSearch.getQuery());
data.add(zhihu);
zhihuHotSearchDAO.addZhiHuHotSearch(zhihu);
}
logger.info("知乎话题采集结束........");
}
}
package com.zhiwei.searchhotcrawler.util;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import javax.crypto.Cipher;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
/**
* @author
* @version V1.0
* @Description
* @date 2017-12-28 14:26
**/
public class AESUtils {
private AESUtils() {
}
/**
* 加密
*
* @param secret 密钥
* @param value 待加密的字符串
* @return 加密后的字符串
*/
public static String encrypt(String secret, String value) {
SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
byte[] encrypted = cipher.doFinal(value.getBytes("UTF-8"));
return parseByte2HexStr(encrypted);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
String jsm=AESUtils.encrypt("wechat", "shenjinzhu");
System.out.println(jsm);
String jm=AESUtils.decrypt("wechat", jsm);
System.out.println(jm);
}
/**
* 解密
*
* @param secret 密钥
* @param value 待解密字符串
* @return 解密后的字符串
*/
public static String decrypt(String secret, String value) {
SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
byte[] encrypted1 = parseHexStr2Byte(value);
byte[] original = cipher.doFinal(encrypted1);
return new String(original, "UTF-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 生成加密的密钥,保证长度为16位
*
* @param secret 用户的密钥
* @return 生成的密钥
*/
private static SecretKeySpec getKey(String secret) {
byte[] bytes;
try {
bytes = secret.getBytes("UTF-8");
return new SecretKeySpec(Arrays.copyOf(bytes, 16), "AES");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
/**
* 将二进制转换成16进制
*
* @param buf
* @return
*/
public static String parseByte2HexStr(byte buf[]) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < buf.length; i++) {
String hex = Integer.toHexString(buf[i] & 0xFF);
if (hex.length() == 1) {
hex = '0' + hex;
}
sb.append(hex.toUpperCase());
}
return sb.toString();
}
/**
* 将16进制转换为二进制
*
* @param hexStr
* @return
*/
public static byte[] parseHexStr2Byte(String hexStr) {
if (hexStr.length() < 1)
return null;
byte[] result = new byte[hexStr.length() / 2];
for (int i = 0; i < hexStr.length() / 2; i++) {
int high = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 1), 16);
int low = Integer.parseInt(hexStr.substring(i * 2 + 1, i * 2 + 2),
16);
result[i] = (byte) (high * 16 + low);
}
return result;
}
}
package com.zhiwei.searchhotcrawler.util;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.ConnectException;
import java.net.URL;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
/**
* 菜单管理器类
*
* @author liufeng
* @date 2013-08-08
*/
public class HttpRequest {
private static Logger log = LoggerFactory.getLogger(HttpRequest.class);
/**
* 发起https请求并获取结果
*
* @param requestUrl
* 请求地址
* @param requestMethod
* 请求方式(GET、POST)
* @param outputStr
* 提交的数据
* @return JSONObject(通过JSONObject.get(key)的方式获取json对象的属性值)
*/
public static JSONObject httpRequest(String requestUrl,
String requestMethod, String outputStr) {
JSONObject jsonObject = null;
StringBuffer buffer = new StringBuffer();
try {
// 创建SSLContext对象,并使用我们指定的信任管理器初始化
TrustManager[] tm = { new MyX509TrustManager() };
SSLContext sslContext = SSLContext.getInstance("SSL", "SunJSSE");
sslContext.init(null, tm, new java.security.SecureRandom());
// 从上述SSLContext对象中得到SSLSocketFactory对象
SSLSocketFactory ssf = sslContext.getSocketFactory();
URL url = new URL(requestUrl);
HttpsURLConnection httpUrlConn = (HttpsURLConnection) url
.openConnection();
httpUrlConn.setSSLSocketFactory(ssf);
httpUrlConn.setDoOutput(true);
httpUrlConn.setDoInput(true);
httpUrlConn.setUseCaches(false);
// 设置请求方式(GET/POST)
httpUrlConn.setRequestMethod(requestMethod);
if ("GET".equalsIgnoreCase(requestMethod))
httpUrlConn.connect();
// 当有数据需要提交时
if (null != outputStr) {
OutputStream outputStream = httpUrlConn.getOutputStream();
// 注意编码格式,防止中文乱码
outputStream.write(outputStr.getBytes("UTF-8"));
outputStream.close();
}
// 将返回的输入流转换成字符串
InputStream inputStream = httpUrlConn.getInputStream();
InputStreamReader inputStreamReader = new InputStreamReader(
inputStream, "utf-8");
BufferedReader bufferedReader = new BufferedReader(
inputStreamReader);
String str = null;
while ((str = bufferedReader.readLine()) != null) {
buffer.append(str);
}
bufferedReader.close();
inputStreamReader.close();
// 释放资源
inputStream.close();
inputStream = null;
httpUrlConn.disconnect();
jsonObject = JSONObject.parseObject(buffer.toString());
} catch (ConnectException ce) {
log.error("Weixin server connection timed out.");
} catch (Exception e) {
log.error("https request error:{}", e);
}
return jsonObject;
}
}
package com.zhiwei.searchhotcrawler.util;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import javax.net.ssl.X509TrustManager;
/**
* 证书信任管理器(用于https请求)
*
* @author liufeng
* @date 2013-08-08
*/
public class MyX509TrustManager implements X509TrustManager {
public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
}
\ No newline at end of file
/**
* @Title: Template.java
* @Package com.zhiwei.weixin.pojo
* @Description: TODO(用一句话描述该文件做什么)
* @author hero
* @date 2016年1月27日 上午11:40:25
* @version V1.0
*/
package com.zhiwei.searchhotcrawler.util;
import java.util.Map;
/**
*
* @ClassName: Template
* @Description: TODO(模版消息类)
* @author 陈炜涛
* @date 2017年11月4日 上午11:31:49
*/
public class Template {
private String touser; //要发送的的用户
private String template_id; //模版消息长id
private String url; //点击通知的跳转地址
private Map<String,Object> data; //需要发送的模版消息内容
public String getTouser() {
return touser;
}
public void setTouser(String touser) {
this.touser = touser;
}
public String getTemplate_id() {
return template_id;
}
public void setTemplate_id(String template_id) {
this.template_id = template_id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public Map<String,Object> getData() {
return data;
}
public void setData(Map<String,Object> data) {
this.data = data;
}
@Override
public String toString() {
return "Template [touser=" + touser + ", template_id=" + template_id + ", url=" + url + ", data=" + data + "]";
}
}
package com.zhiwei.searchhotcrawler.util;
import java.util.Date;
/**
*
* @ClassName: TemplateData
* @Description: TODO(消息模板数据)
* @author 陈炜涛
* @date 2017年11月4日 上午11:02:13
*/
public class TemplateData {
/**
* 地址
*/
private String url;
/**
* 标题 若平台为知乎,则这为知乎问题
* 若平台为论坛/贴吧,则这为贴子名
*/
private String title;
/**
* 内容 若平台为知乎,则为回答内容
* 若平台为论坛/贴吧,则为用户回复内容
*/
private String content;
/**
* 来源 若平台为知乎,则为回答用户
* 若平台为论坛/贴吧,则为用户
*/
private String source;
/**
* 时间
*/
private Date time;
/**
* 平台
*/
private String pt;
/**
* 若为微博平台则有
*/
private int fensi;
/**
* 关键词规则
*/
private String wordRule;
/**
* 渠道规则
*/
private String channelRule;
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public String getPt() {
return pt;
}
public void setPt(String pt) {
this.pt = pt;
}
public int getFensi() {
return fensi;
}
public void setFensi(int fensi) {
this.fensi = fensi;
}
public String getWordRule() {
return wordRule;
}
public void setWordRule(String wordRule) {
this.wordRule = wordRule;
}
public String getChannelRule() {
return channelRule;
}
public void setChannelRule(String channelRule) {
this.channelRule = channelRule;
}
public TemplateData(String url, String title, String content, String source, Date time, String pt) {
super();
this.url = url;
this.title = title;
this.content = content;
this.source = source;
this.time = time;
this.pt = pt;
}
public TemplateData() {
super();
}
@Override
public String toString() {
return "TemplateData [url=" + url + ", title=" + title + ", content=" + content + ", source=" + source
+ ", time=" + time + ", pt=" + pt + ", fensi=" + fensi + ", wordRule=" + wordRule + ", channelRule="
+ channelRule + "]";
}
}
package com.zhiwei.searchhotcrawler.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import com.alibaba.fastjson.JSONObject;
public class WechatCodeUtil {
private static String getToken() {
String token="";
String appId="wx2f555218d66e5948";
String jmAppId=AESUtils.encrypt("wechat", appId);
String path = "http://yuqing.zhiweidata.com/WechatPublic/common/getToken?appId="+jmAppId;
BufferedReader in = null;
try {
String result = "";
URL url = new URL(path);
URLConnection connection = url.openConnection();
connection.setConnectTimeout(3000);
// 建立连接
connection.connect();
// 定义 BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
JSONObject jsonObject = JSONObject.parseObject(result);
JSONObject inJson = JSONObject.parseObject(jsonObject.getString("data"));
token=inJson.getString("accessToken");
System.out.println("token========"+token);
} catch (IOException e) {
e.printStackTrace();
}finally {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return token;
}
public static int sendDataJson(JSONObject templateJson) {
int msgid = 0;
String url = WechatConstant.WECHAT_TEMPLET_SEND_URL.replace("ACCESS_TOKEN", getToken());
try {
JSONObject jsonObject = HttpRequest.httpRequest(url, "POST", templateJson.toString());
if (null != jsonObject) {
if ("ok".equals(jsonObject.getString("errmsg"))) {
msgid = jsonObject.getIntValue("msgid");
}
}
} catch (Exception e) {
e.printStackTrace();
msgid = 0;
}
return msgid;
}
}
package com.zhiwei.searchhotcrawler.util;
/**
*
* @ClassName: WechatConstant
* @Description: <p>TODO(放置微信中的一些常量)</p>
* @author 陈炜涛
* @date 2017年11月4日 上午10:22:39
*/
public class WechatConstant {
/**
* 使用的微信号
*/
public static final String WECHAT_APPID = "wx2f555218d66e5948";
/**
* 预警模板id IT
*/
public static final String WECHAT_TEMPLATEID_EARLY_IT = "trBtKi7YyE_xbeH_xMtWYNGqAVgOZe4NlmPrH6mO-aw";
/**
* 获取access_token的url
*/
public static final String ACCESS_TOKEN_FETCH_URL = "https://api.weixin.qq.com/cgi-bin/token?grant_type=client_credential&appid=APPID&secret=APPSECRET";
/**
* 获取用户基本信息的url
*/
public static final String WECHAT_USER_FETCH_URL = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=ACCESS_TOKEN&openid=OPENID&lang=LANG";
/**
* 批量获取用户基本信息的url
*/
public static final String WECHAT_USER_BATCH_FETCH_URL = "https://api.weixin.qq.com/cgi-bin/user/info/batchget?access_token=ACCESS_TOKEN";
/**
* 创建标签url
*/
public static final String WECHAT_CREATE_TAG_URL = "https://api.weixin.qq.com/cgi-bin/tags/create?access_token=ACCESS_TOKEN";
/**
* 获取已创建的标签url
*/
public static final String WECHAT_GET_TAG_URL = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=ACCESS_TOKEN";
/**
* 编辑标签url
*/
public static final String WECHAT_EDIT_TAG_URL = "https://api.weixin.qq.com/cgi-bin/tags/update?access_token=ACCESS_TOKEN";
/**
* 删除标签url
*/
public static final String WECHAT_DELETE_TAG_URL = "https://api.weixin.qq.com/cgi-bin/tags/delete?access_token=ACCESS_TOKEN";
/**
* 获取该标签下的用户列表url
*/
public static final String WECHAT_GET_USER_OF_TAG_URL = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token=ACCESS_TOKEN";
/**
* 批量为用户打标签url
*/
public static final String WECHAT_BATCH_TAG_USER_URL = "https://api.weixin.qq.com/cgi-bin/tags/members/batchtagging?access_token=ACCESS_TOKEN";
/**
* 批量为用户取消标签url
*/
public static final String WECHAT_BATCH_UNTAG_USER_URL = "https://api.weixin.qq.com/cgi-bin/tags/members/batchuntagging?access_token=ACCESS_TOKEN";
/**
* 获取用户身上标注的标签列表url
*/
public static final String WECHAT_GET_TAGS_OF_USER_URL = "https://api.weixin.qq.com/cgi-bin/tags/getidlist?access_token=ACCESS_TOKEN";
/**
* 给用户备注的url
*/
public static final String WECHAT_REMARK_USER_URL = "https://api.weixin.qq.com/cgi-bin/user/info/updateremark?access_token=ACCESS_TOKEN";
/**
* 获取公众号的黑名单列表
*/
public static final String WECHAT_GET_BLACK_LIST_URL = "https://api.weixin.qq.com/cgi-bin/tags/members/getblacklist?access_token=ACCESS_TOKEN";
/**
* 拉黑用户url
*/
public static final String WECHAT_BLACK_USER_URL = "https://api.weixin.qq.com/cgi-bin/tags/members/batchblacklist?access_token=ACCESS_TOKEN";
/**
* 取消拉黑用户url
*/
public static final String WECHAT_UNBLACK_USER_URL = "https://api.weixin.qq.com/cgi-bin/tags/members/batchunblacklist?access_token=ACCESS_TOKEN";
/**
* 发送模版消息
*/
public static final String WECHAT_TEMPLET_SEND_URL = "https://api.weixin.qq.com/cgi-bin/message/template/send?access_token=ACCESS_TOKEN";
/**
* accesstoken的过期时间
*/
public static final long ACCESS_TOKEN_EXPIRED_TIME = 7200L;
/**
* accesstoken过期提前量(提前200秒过期)
*/
public static final long ACCESS_TOKEN_EXPIRED_delta = 200L;
/**
* 默认编码
*/
public static final String DEFAULT_CHARSET = "UTF-8";
/**
* 最多一次拉黑人数
*/
public static final int WECHAT_BLACK_USER_MAX_SIZE = 20;
/**
* 给用户备注的最大长度
*/
public static final int WECHAT_USER_REMARK_MAX_LENGTH = 30;
/**
* 每日笑话菜单的key
*/
public static final String MENU_JOKE_CLICK_KEY = "joke";
/**
* 用户标签的最大长度
*/
public static final int WECHAT_USER_TAG_LENGTH = 30;
/**
* url中的accessToken占位符
*/
public static final String ACCESS_TOKEN = "ACCESS_TOKEN";
/**
* [获取验证码]菜单对应的eventKey
*/
public static final String MENU_VER_CODE = "ver_code";
/**
* [我的信息]菜单对应的eventKey
*/
public static final String MENU_MY_CLICK_KEY = "my";
/**
* [发送位置]菜单对应的eventKey
*/
public static final String MENU_LOCATION_SELECT_KEY = "location_select";
/**
* [系统拍照发图]菜单对应的eventKey
*/
public static final String MENU_PIC_SYS_PHOTO = "pic_sysphoto";
/**
* [拍照或相册发图]菜单对应的eventKey
*/
public static final String MENU_PIC_PHOTO_OR_ALBUM = "pic_photo_or_album";
/**
* [微信相册发图]菜单对应的eventKey
*/
public static final String MENU_PIC_WEIXIN = "pic_weixin";
public static final String MENU_SCAN_CODE_WAIT_MSG = "scancode_waitmsg";
}
#mongoIp=115.236.59.91
mongoIp=192.168.0.101
mongoPort=27017
db.username=zzwno
db.paasword=zzwno1q2w3e4r
db.certifiedDB=admin
dbName=NetWork
collWeiboName=weibo_hotsearch
collZhihuName=zhihu_hotsearch
\ No newline at end of file
log4j.rootLogger=INFO,stdout,ROLLING_FILE
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE=org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE.Threshold=INFO
log4j.appender.ROLLING_FILE.File=./Log/hotweibo.log
log4j.appender.ROLLING_FILE.Append=true
log4j.appender.ROLLING_FILE.layout=org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout.ConversionPattern=<%d>[%5p] %c - %m%n
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment