Commit a78bc0f3 by [zhangzhiwei]

修正热搜采集

parent f87165c0
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>searchhotcrawler</artifactId> <artifactId>searchhotcrawler</artifactId>
<name>各平台热搜榜单采集程序</name> <name>各平台热搜榜单采集程序</name>
<version>0.0.3-SNAPSHOT</version> <version>0.0.6-SNAPSHOT</version>
<description>各平台热搜榜单采集程序 <description>各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description> 目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
<developers> <developers>
......
...@@ -20,6 +20,7 @@ public class Config { ...@@ -20,6 +20,7 @@ public class Config {
dbName = conf.getProperty("dbName"); dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName"); collWeiboName = conf.getProperty("collWeiboName");
collZhihuName = conf.getProperty("collZhihuName"); collZhihuName = conf.getProperty("collZhihuName");
collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
...@@ -35,5 +36,6 @@ public class Config { ...@@ -35,5 +36,6 @@ public class Config {
public static String dbName; public static String dbName;
public static String collWeiboName; public static String collWeiboName;
public static String collZhihuName; public static String collZhihuName;
public static String collWechatUserName;
} }
...@@ -64,13 +64,13 @@ public class WeiboHotSearchCrawler { ...@@ -64,13 +64,13 @@ public class WeiboHotSearchCrawler {
try { try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href"); String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text(); String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-03").text().equals("")?element.select("td.td-03").text():"0"; String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0";
int hotCount = Integer.valueOf(num); int hotCount = Integer.valueOf(num);
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, true); WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, true);
list.add(hotSearch); list.add(hotSearch);
} catch (Exception e) { } catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace()); logger.error("解析微博时时热搜时出现解析错误", e);
continue; continue;
} }
} }
......
...@@ -30,11 +30,10 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -30,11 +30,10 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @param doc 设定文件 * @param @param doc 设定文件
* @return void 返回类型 * @return void 返回类型
*/ */
@SuppressWarnings("deprecation")
public void addWeiboHotSearch(List<DBObject> list){ public void addWeiboHotSearch(List<DBObject> list){
for(int i=0; i<3; i++){ for(int i=0; i<3; i++){
try { try {
this.getReadColl().insert(list, WriteConcern.SAFE); this.getReadColl().insert(list);
ZhiWeiTools.sleep(200); ZhiWeiTools.sleep(200);
break; break;
} catch (Exception e) { } catch (Exception e) {
...@@ -44,7 +43,6 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{ ...@@ -44,7 +43,6 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
} }
} }
/** /**
* @Title: getChangeCount * @Title: getChangeCount
* @author hero * @author hero
......
...@@ -7,8 +7,10 @@ import java.util.concurrent.TimeUnit; ...@@ -7,8 +7,10 @@ import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun; import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun; import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun; import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun; import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class HotSearchRun { public class HotSearchRun {
...@@ -25,9 +27,11 @@ public class HotSearchRun { ...@@ -25,9 +27,11 @@ public class HotSearchRun {
} }
public static void main(String[] args) { public static void main(String[] args) {
// new HotSearchRun().showTimer(); new UpdateWechatUserRun().start();
// new CacheListener().startListen(); ZhiWeiTools.sleep(10000);
new HotSearchRun().showTimer();
new CacheListener().startListen();
new SendWeiboHotSearchRun().start(); new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start(); new SendZhihuHotSearchRun().start();
} }
} }
...@@ -9,6 +9,7 @@ import org.slf4j.Logger; ...@@ -9,6 +9,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template; import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
...@@ -18,6 +19,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; ...@@ -18,6 +19,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread { public class SendWeiboHotSearchRun extends Thread {
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO(); private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class);
@Override @Override
...@@ -111,14 +113,10 @@ public class SendWeiboHotSearchRun extends Thread { ...@@ -111,14 +113,10 @@ public class SendWeiboHotSearchRun extends Thread {
* @return List<String> 返回类型 * @return List<String> 返回类型
*/ */
public static List<String> getUserList() { public static List<String> getUserList() {
for (int i = 0; i < 3; i++) { List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot");
List<String> userList = WechatCodeUtil.getUserList("weibohot"); if(userList==null){
if (userList != null) { userList = WechatCodeUtil.getUserListByGroupName("weibohot");
return userList;
} else {
continue;
}
} }
return null; return userList;
} }
} }
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -12,6 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -12,6 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO; import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template; import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
...@@ -21,6 +21,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; ...@@ -21,6 +21,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{ public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO(); private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override @Override
public void run() { public void run() {
...@@ -115,16 +116,10 @@ public class SendZhihuHotSearchRun extends Thread{ ...@@ -115,16 +116,10 @@ public class SendZhihuHotSearchRun extends Thread{
*/ */
private static List<String> getUserList() private static List<String> getUserList()
{ {
List<String> userList = new ArrayList<String>(); List<String> userList = wechatUserDao.getWechatUserByGroup("LP组");
for(int i=0;i<3;i++){ if(userList==null){
List<String> lpUserList = WechatCodeUtil.getUserList("LP组"); userList = WechatCodeUtil.getUserListByGroupName("LP组");
if(lpUserList!=null){ }
userList.addAll(lpUserList);
break;
}else{
continue;
}
}
return userList; return userList;
} }
......
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -87,7 +88,7 @@ public class WechatCodeUtil { ...@@ -87,7 +88,7 @@ public class WechatCodeUtil {
* @return List<String> 返回类型 * @return List<String> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<String> getUserList(String groupName) { public static List<String> getUserListByGroupName(String groupName) {
try { try {
String token = getToken(); String token = getToken();
if(token!=null){ if(token!=null){
...@@ -99,6 +100,37 @@ public class WechatCodeUtil { ...@@ -99,6 +100,37 @@ public class WechatCodeUtil {
if (null != jsonObject) { if (null != jsonObject) {
if(jsonObject.containsKey("data")) { if(jsonObject.containsKey("data")) {
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class); return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
}else{
logger.info("拉取用户列表时,出现问题{}", jsonObject);
}
}
}else{
logger.info("token 获取失败");
}
} catch (Exception e) {
e.printStackTrace();
return null;
}
return null;
}
public static List<String> getUserListByGroupId(Integer groupId) {
try {
String token = getToken();
if(token!=null){
String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token;
JSONObject postData = new JSONObject();
postData.put("tagid", groupId);
postData.put("next_openid", "");
JSONObject jsonObject = HttpRequest.httpRequest(url, "GET", postData.toString());
if (null != jsonObject) {
if(jsonObject.containsKey("data")) {
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
}else{
logger.info("拉取用户列表时,出现问题{}", jsonObject);
} }
} }
}else{ }else{
...@@ -149,5 +181,35 @@ public class WechatCodeUtil { ...@@ -149,5 +181,35 @@ public class WechatCodeUtil {
} }
return groupId; return groupId;
} }
/**
* 查询公众号下的所有分组
* @return
*/
public static Map<String,Integer> getAllGroupIp() {
String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken();
Map<String,Integer> resultMap = new HashMap<String,Integer>();
Map<String, String> headerMap = HeaderTool.getCommonHead();
try {
String htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if (htmlBody != null) {
if(htmlBody.contains("tags")) {
JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags");
for (int i = 0; i < jsonArry.size(); i++) {
JSONObject data = jsonArry.getJSONObject(i);
Integer id = data.getInteger("id");
String name = data.getString("name");
resultMap.put(name, id);
}
}else{
logger.info("获取分组id时出现错误,数据为:::{}", htmlBody);
}
}
} catch (IOException e) {
logger.error("获取分组id时出现错误",e.fillInStackTrace());
return null;
}
return resultMap;
}
} }
#mongoIp=202.107.192.94 mongoIp=202.107.192.94
mongoIp=192.168.0.101 #mongoIp=192.168.0.101
mongoPort=30000 mongoPort=30000
db.username=zzwno db.username=zzwno
db.paasword=zzwno1q2w3e4r db.paasword=zzwno1q2w3e4r
db.certifiedDB=admin db.certifiedDB=admin
dbName=NetWork dbName=NetWork
collWeiboName=weibo_hotsearch2018_10 collWeiboName=weibo_hotsearch2018_10
collZhihuName=zhihu_hotsearch2018_10 collZhihuName=zhihu_hotsearch2018_10
\ No newline at end of file collWechatUserName=wechat_user
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment