Commit 718abf10 by yangchen

微信 关键词采集增加cookie

parent 4287aea9
...@@ -22,6 +22,7 @@ public class WechatAccountInfo { ...@@ -22,6 +22,7 @@ public class WechatAccountInfo {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class); private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class);
/*** /***
* @Title: getWechatAccount * @Title: getWechatAccount
* @Description: TODO(根据帐号id查询帐号信息) * @Description: TODO(根据帐号id查询帐号信息)
......
...@@ -8,6 +8,7 @@ import java.util.ArrayList; ...@@ -8,6 +8,7 @@ import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
...@@ -59,10 +60,13 @@ public class WechatAritcleSearch { ...@@ -59,10 +60,13 @@ public class WechatAritcleSearch {
* @return List<Wechat> 返回类型 * @return List<Wechat> 返回类型
*/ */
public static List<WechatAricle> wechatKeywordSearch(String word, int tsn, String startTime, String endTime, public static List<WechatAricle> wechatKeywordSearch(String word, int tsn, String startTime, String endTime,
Proxy proxy) throws Exception, UnsupportedEncodingException { Proxy proxy,String cookie) throws Exception, UnsupportedEncodingException {
List<WechatAricle> result = new ArrayList<WechatAricle>(); List<WechatAricle> result = new ArrayList<WechatAricle>();
Map<String, String> headerMap = HeaderTool.getCommonHead(); Map<String, String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "weixin.sogou.com"); headerMap.put("Host", "weixin.sogou.com");
if(Objects.nonNull(cookie)) {
headerMap.put("cookie", cookie);
}
boolean f = true; boolean f = true;
int page = 1; int page = 1;
...@@ -119,13 +123,13 @@ public class WechatAritcleSearch { ...@@ -119,13 +123,13 @@ public class WechatAritcleSearch {
wechat = new WechatAricle(link, title, source, content, date, readNum, 0, openid, "unknow"); wechat = new WechatAricle(link, title, source, content, date, readNum, 0, openid, "unknow");
result.add(wechat); result.add(wechat);
} catch (Exception e) { } catch (Exception e) {
logger.debug("解析数据出现错误:{}", e.getMessage()); logger.debug("解析数据出现错误:{}", e);
continue;
} }
} }
// 解析最大可寻页码 // 解析最大可寻页码
String pageNext = document.select("[id=pagebar_container]>a").text(); String pageNext = document.select("[id=pagebar_container]>a").text();
if (pageNext.contains("下一页")) { if (pageNext.contains("下一页")) {
// logger.info("采集到 {} 页" , page);
page++; page++;
} else { } else {
f = false; f = false;
...@@ -133,7 +137,7 @@ public class WechatAritcleSearch { ...@@ -133,7 +137,7 @@ public class WechatAritcleSearch {
// logger.info("数据总页数为:{}", page); // logger.info("数据总页数为:{}", page);
} catch (Exception e) { } catch (Exception e) {
logger.debug("获取数据出现问题:{}", e.getMessage()); logger.debug("获取数据出现问题:{}", e.getMessage());
return null; return result;
} }
} else { } else {
logger.info("根据关键词获取微信文章失败,返回的数据结果集: {}", htmlBody); logger.info("根据关键词获取微信文章失败,返回的数据结果集: {}", htmlBody);
...@@ -424,16 +428,21 @@ public class WechatAritcleSearch { ...@@ -424,16 +428,21 @@ public class WechatAritcleSearch {
String openId = null; String openId = null;
String url = "https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query=" + URLCodeUtil.getURLEncode(idOrName, "utf-8"); String url = "https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query=" + URLCodeUtil.getURLEncode(idOrName, "utf-8");
String htmlBody; String htmlBody;
for(int i = 1;i < 3;i++) {
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), proxyHolder,true).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), proxyHolder).body().string();
System.out.println(htmlBody); System.out.println(htmlBody);
if (htmlBody != null) { if (htmlBody != null) {
JSONObject json = JSONObject.parseObject(htmlBody); JSONObject json = JSONObject.parseObject(htmlBody);
openId = json.getString("openid"); openId = json.getString("openid");
return openId;
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace();
openId = null; openId = null;
} }
}
return openId; return openId;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment