Commit 65b7efe4 by 马黎滨

微博话题采集修改

parent 2e6f9c72
...@@ -129,14 +129,14 @@ public class WeiboTopicCrawler { ...@@ -129,14 +129,14 @@ public class WeiboTopicCrawler {
*/ */
public static List<HotSearchList> startCrawlerByPhone(){ public static List<HotSearchList> startCrawlerByPhone(){
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=7; page++){ for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page; String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page;
//重试三次 //重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) { for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try { try {
// log.info("pageUrl::{}", pageUrl); // log.info("pageUrl::{}", pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("top_mark_text")) { if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
topicList.addAll(parseTopicHtml(htmlBody)); topicList.addAll(parseTopicHtml(htmlBody));
break; break;
}else { }else {
...@@ -170,8 +170,11 @@ public class WeiboTopicCrawler { ...@@ -170,8 +170,11 @@ public class WeiboTopicCrawler {
rank = cardGroup.getInteger("top_mark_text"); rank = cardGroup.getInteger("top_mark_text");
topicName = cardGroup.getString("title_sub"); topicName = cardGroup.getString("title_sub");
url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8"); url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8");
description = cardGroup.getString("desc1"); description = null;
desc2 = cardGroup.getString("desc2"); if(cardGroup.containsKey("card_expand")){
description = cardGroup.getJSONObject("card_expand").getString("content");
}
desc2 = cardGroup.getString("desc");
String commentNumStr = desc2.replaceAll("讨论.*", "").trim(); String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim(); String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
try { try {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment