Commit 718abf10 by yangchen

微信 关键词采集增加cookie

parent 4287aea9
......@@ -22,6 +22,7 @@ public class WechatAccountInfo {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class);
/***
* @Title: getWechatAccount
* @Description: TODO(根据帐号id查询帐号信息)
......
......@@ -8,6 +8,7 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
......@@ -59,10 +60,13 @@ public class WechatAritcleSearch {
* @return List<Wechat> 返回类型
*/
public static List<WechatAricle> wechatKeywordSearch(String word, int tsn, String startTime, String endTime,
Proxy proxy) throws Exception, UnsupportedEncodingException {
Proxy proxy,String cookie) throws Exception, UnsupportedEncodingException {
List<WechatAricle> result = new ArrayList<WechatAricle>();
Map<String, String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "weixin.sogou.com");
if(Objects.nonNull(cookie)) {
headerMap.put("cookie", cookie);
}
boolean f = true;
int page = 1;
......@@ -119,13 +123,13 @@ public class WechatAritcleSearch {
wechat = new WechatAricle(link, title, source, content, date, readNum, 0, openid, "unknow");
result.add(wechat);
} catch (Exception e) {
logger.debug("解析数据出现错误:{}", e.getMessage());
continue;
logger.debug("解析数据出现错误:{}", e);
}
}
// 解析最大可寻页码
String pageNext = document.select("[id=pagebar_container]>a").text();
if (pageNext.contains("下一页")) {
// logger.info("采集到 {} 页" , page);
page++;
} else {
f = false;
......@@ -133,7 +137,7 @@ public class WechatAritcleSearch {
// logger.info("数据总页数为:{}", page);
} catch (Exception e) {
logger.debug("获取数据出现问题:{}", e.getMessage());
return null;
return result;
}
} else {
logger.info("根据关键词获取微信文章失败,返回的数据结果集: {}", htmlBody);
......@@ -424,16 +428,21 @@ public class WechatAritcleSearch {
String openId = null;
String url = "https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query=" + URLCodeUtil.getURLEncode(idOrName, "utf-8");
String htmlBody;
for(int i = 1;i < 3;i++) {
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), proxyHolder,true).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), proxyHolder).body().string();
System.out.println(htmlBody);
if (htmlBody != null) {
JSONObject json = JSONObject.parseObject(htmlBody);
openId = json.getString("openid");
return openId;
}
} catch (Exception e) {
e.printStackTrace();
openId = null;
}
}
return openId;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment