Commit a78bc0f3 by [zhangzhiwei]

修正热搜采集

parent f87165c0
......@@ -4,7 +4,7 @@
<groupId>com.zhiwei</groupId>
<artifactId>searchhotcrawler</artifactId>
<name>各平台热搜榜单采集程序</name>
<version>0.0.3-SNAPSHOT</version>
<version>0.0.6-SNAPSHOT</version>
<description>各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
<developers>
......
......@@ -20,6 +20,7 @@ public class Config {
dbName = conf.getProperty("dbName");
collWeiboName = conf.getProperty("collWeiboName");
collZhihuName = conf.getProperty("collZhihuName");
collWechatUserName = conf.getProperty("collWechatUserName");
} catch (Exception e) {
e.printStackTrace();
......@@ -35,5 +36,6 @@ public class Config {
public static String dbName;
public static String collWeiboName;
public static String collZhihuName;
public static String collWechatUserName;
}
......@@ -64,13 +64,13 @@ public class WeiboHotSearchCrawler {
try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-03").text().equals("")?element.select("td.td-03").text():"0";
String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0";
int hotCount = Integer.valueOf(num);
WeiboHotSearch hotSearch = new WeiboHotSearch(id, name, hotCount, true);
list.add(hotSearch);
} catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace());
logger.error("解析微博时时热搜时出现解析错误", e);
continue;
}
}
......
......@@ -30,11 +30,10 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @param doc 设定文件
* @return void 返回类型
*/
@SuppressWarnings("deprecation")
public void addWeiboHotSearch(List<DBObject> list){
for(int i=0; i<3; i++){
try {
this.getReadColl().insert(list, WriteConcern.SAFE);
this.getReadColl().insert(list);
ZhiWeiTools.sleep(200);
break;
} catch (Exception e) {
......@@ -44,7 +43,6 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
}
/**
* @Title: getChangeCount
* @author hero
......
......@@ -7,8 +7,10 @@ import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class HotSearchRun {
......@@ -25,9 +27,11 @@ public class HotSearchRun {
}
public static void main(String[] args) {
// new HotSearchRun().showTimer();
// new CacheListener().startListen();
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
new HotSearchRun().showTimer();
new CacheListener().startListen();
new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
}
......@@ -9,6 +9,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
......@@ -18,6 +19,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread {
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendWeiboHotSearchRun.class);
@Override
......@@ -111,14 +113,10 @@ public class SendWeiboHotSearchRun extends Thread {
* @return List<String> 返回类型
*/
public static List<String> getUserList() {
for (int i = 0; i < 3; i++) {
List<String> userList = WechatCodeUtil.getUserList("weibohot");
if (userList != null) {
return userList;
} else {
continue;
List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot");
if(userList==null){
userList = WechatCodeUtil.getUserListByGroupName("weibohot");
}
}
return null;
return userList;
}
}
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
......@@ -12,6 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.dao.WechatUserDao;
import com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO;
import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
......@@ -21,6 +21,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private static WechatUserDao wechatUserDao = new WechatUserDao();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override
public void run() {
......@@ -115,15 +116,9 @@ public class SendZhihuHotSearchRun extends Thread{
*/
private static List<String> getUserList()
{
List<String> userList = new ArrayList<String>();
for(int i=0;i<3;i++){
List<String> lpUserList = WechatCodeUtil.getUserList("LP组");
if(lpUserList!=null){
userList.addAll(lpUserList);
break;
}else{
continue;
}
List<String> userList = wechatUserDao.getWechatUserByGroup("LP组");
if(userList==null){
userList = WechatCodeUtil.getUserListByGroupName("LP组");
}
return userList;
}
......
package com.zhiwei.searchhotcrawler.util;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
......@@ -87,7 +88,7 @@ public class WechatCodeUtil {
* @return List<String> 返回类型
*/
@SuppressWarnings("unchecked")
public static List<String> getUserList(String groupName) {
public static List<String> getUserListByGroupName(String groupName) {
try {
String token = getToken();
if(token!=null){
......@@ -99,6 +100,37 @@ public class WechatCodeUtil {
if (null != jsonObject) {
if(jsonObject.containsKey("data")) {
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
}else{
logger.info("拉取用户列表时,出现问题{}", jsonObject);
}
}
}else{
logger.info("token 获取失败");
}
} catch (Exception e) {
e.printStackTrace();
return null;
}
return null;
}
public static List<String> getUserListByGroupId(Integer groupId) {
try {
String token = getToken();
if(token!=null){
String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token;
JSONObject postData = new JSONObject();
postData.put("tagid", groupId);
postData.put("next_openid", "");
JSONObject jsonObject = HttpRequest.httpRequest(url, "GET", postData.toString());
if (null != jsonObject) {
if(jsonObject.containsKey("data")) {
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
}else{
logger.info("拉取用户列表时,出现问题{}", jsonObject);
}
}
}else{
......@@ -150,4 +182,34 @@ public class WechatCodeUtil {
return groupId;
}
/**
* 查询公众号下的所有分组
* @return
*/
public static Map<String,Integer> getAllGroupIp() {
String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken();
Map<String,Integer> resultMap = new HashMap<String,Integer>();
Map<String, String> headerMap = HeaderTool.getCommonHead();
try {
String htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if (htmlBody != null) {
if(htmlBody.contains("tags")) {
JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags");
for (int i = 0; i < jsonArry.size(); i++) {
JSONObject data = jsonArry.getJSONObject(i);
Integer id = data.getInteger("id");
String name = data.getString("name");
resultMap.put(name, id);
}
}else{
logger.info("获取分组id时出现错误,数据为:::{}", htmlBody);
}
}
} catch (IOException e) {
logger.error("获取分组id时出现错误",e.fillInStackTrace());
return null;
}
return resultMap;
}
}
#mongoIp=202.107.192.94
mongoIp=192.168.0.101
mongoIp=202.107.192.94
#mongoIp=192.168.0.101
mongoPort=30000
db.username=zzwno
db.paasword=zzwno1q2w3e4r
......@@ -7,3 +7,4 @@ db.certifiedDB=admin
dbName=NetWork
collWeiboName=weibo_hotsearch2018_10
collZhihuName=zhihu_hotsearch2018_10
collWechatUserName=wechat_user
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment