You need to sign in or sign up before continuing.
Commit 65b7efe4 by 马黎滨

微博话题采集修改

parent 2e6f9c72
......@@ -129,14 +129,14 @@ public class WeiboTopicCrawler {
*/
public static List<HotSearchList> startCrawlerByPhone(){
List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=7; page++){
for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try {
// log.info("pageUrl::{}", pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("top_mark_text")) {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
topicList.addAll(parseTopicHtml(htmlBody));
break;
}else {
......@@ -170,8 +170,11 @@ public class WeiboTopicCrawler {
rank = cardGroup.getInteger("top_mark_text");
topicName = cardGroup.getString("title_sub");
url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8");
description = cardGroup.getString("desc1");
desc2 = cardGroup.getString("desc2");
description = null;
if(cardGroup.containsKey("card_expand")){
description = cardGroup.getJSONObject("card_expand").getString("content");
}
desc2 = cardGroup.getString("desc");
String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
try {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment