Commit 9635dc66 by 马黎滨

Merge branch 'mlbWork' into 'master'

微博话题采集接口更换

See merge request !14
parents 8b912371 3cda663a
......@@ -9,6 +9,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
......@@ -132,7 +133,7 @@ public class WeiboTopicCrawler {
public static List<HotSearchList> startCrawlerByPhone(){
List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page;
String pageUrl = "https://m.weibo.cn/api/container/getIndex?containerid=231648_-_2&page=" + page;
Request request = RequestUtils.wrapGet(pageUrl);
String htmlBody = null;
//重试三次
......@@ -144,7 +145,7 @@ public class WeiboTopicCrawler {
log.error("下载榜单列表页面时出现错误,错误为:{}", e);
continue;
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
topicList.addAll(parseTopicHtml(htmlBody));
break;
} else {
......@@ -158,7 +159,7 @@ public class WeiboTopicCrawler {
private static List<HotSearchList> parseTopicHtml(String htmlBody) {
try {
JSONArray cards = JSONObject.parseObject(htmlBody).getJSONArray("cards");
JSONArray cards = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("cards");
if(Objects.nonNull(cards) && !cards.isEmpty()) {
List<HotSearchList> topicList = new ArrayList<>();
Integer rank = null;
......@@ -182,25 +183,27 @@ public class WeiboTopicCrawler {
String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
try {
if(commentNumStr.contains("万")){
commentNumStr = commentNumStr.replaceAll("万", "");
commentNum = (int)(Double.parseDouble(commentNumStr)*10000);
}else if(commentNumStr.contains("亿")){
commentNumStr = commentNumStr.replaceAll("亿", "");
commentNum = (int)(Double.parseDouble(commentNumStr)*10000000);
}else{
commentNum = Integer.getInteger(commentNumStr);
}
if(readNumStr.contains("万")){
readNumStr = readNumStr.replaceAll("万", "");
readNum = (int)(Double.parseDouble(readNumStr)*10000);
}else if(readNumStr.contains("亿")){
readNumStr = readNumStr.replaceAll("亿", "");
readNum = (int)(Double.parseDouble(readNumStr)*10000000);
}else{
readNum = Integer.getInteger(readNumStr);
}
commentNum = TipsUtils.getHotCount(commentNumStr);
readNum = TipsUtils.getHotCount(readNumStr);
// if(commentNumStr.contains("万")){
// commentNumStr = commentNumStr.replaceAll("万", "");
// commentNum = (int)(Double.parseDouble(commentNumStr)*10000);
// }else if(commentNumStr.contains("亿")){
// commentNumStr = commentNumStr.replaceAll("亿", "");
// commentNum = (int)(Double.parseDouble(commentNumStr)*10000000);
// }else{
// commentNum = Integer.getInteger(commentNumStr);
// }
//
// if(readNumStr.contains("万")){
// readNumStr = readNumStr.replaceAll("万", "");
// readNum = (int)(Double.parseDouble(readNumStr)*10000);
// }else if(readNumStr.contains("亿")){
// readNumStr = readNumStr.replaceAll("亿", "");
// readNum = (int)(Double.parseDouble(readNumStr)*10000000);
// }else{
// readNum = Integer.getInteger(readNumStr);
// }
}catch (Exception e){
e.printStackTrace();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment