Commit 9635dc66 by 马黎滨

Merge branch 'mlbWork' into 'master'

微博话题采集接口更换

See merge request !14
parents 8b912371 3cda663a
...@@ -9,6 +9,7 @@ import com.zhiwei.crawler.utils.RequestUtils; ...@@ -9,6 +9,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic; import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
...@@ -132,7 +133,7 @@ public class WeiboTopicCrawler { ...@@ -132,7 +133,7 @@ public class WeiboTopicCrawler {
public static List<HotSearchList> startCrawlerByPhone(){ public static List<HotSearchList> startCrawlerByPhone(){
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=6; page++){ for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page; String pageUrl = "https://m.weibo.cn/api/container/getIndex?containerid=231648_-_2&page=" + page;
Request request = RequestUtils.wrapGet(pageUrl); Request request = RequestUtils.wrapGet(pageUrl);
String htmlBody = null; String htmlBody = null;
//重试三次 //重试三次
...@@ -144,7 +145,7 @@ public class WeiboTopicCrawler { ...@@ -144,7 +145,7 @@ public class WeiboTopicCrawler {
log.error("下载榜单列表页面时出现错误,错误为:{}", e); log.error("下载榜单列表页面时出现错误,错误为:{}", e);
continue; continue;
} }
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) { if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
topicList.addAll(parseTopicHtml(htmlBody)); topicList.addAll(parseTopicHtml(htmlBody));
break; break;
} else { } else {
...@@ -158,7 +159,7 @@ public class WeiboTopicCrawler { ...@@ -158,7 +159,7 @@ public class WeiboTopicCrawler {
private static List<HotSearchList> parseTopicHtml(String htmlBody) { private static List<HotSearchList> parseTopicHtml(String htmlBody) {
try { try {
JSONArray cards = JSONObject.parseObject(htmlBody).getJSONArray("cards"); JSONArray cards = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("cards");
if(Objects.nonNull(cards) && !cards.isEmpty()) { if(Objects.nonNull(cards) && !cards.isEmpty()) {
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
Integer rank = null; Integer rank = null;
...@@ -182,25 +183,27 @@ public class WeiboTopicCrawler { ...@@ -182,25 +183,27 @@ public class WeiboTopicCrawler {
String commentNumStr = desc2.replaceAll("讨论.*", "").trim(); String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim(); String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
try { try {
if(commentNumStr.contains("万")){ commentNum = TipsUtils.getHotCount(commentNumStr);
commentNumStr = commentNumStr.replaceAll("万", ""); readNum = TipsUtils.getHotCount(readNumStr);
commentNum = (int)(Double.parseDouble(commentNumStr)*10000); // if(commentNumStr.contains("万")){
}else if(commentNumStr.contains("亿")){ // commentNumStr = commentNumStr.replaceAll("万", "");
commentNumStr = commentNumStr.replaceAll("亿", ""); // commentNum = (int)(Double.parseDouble(commentNumStr)*10000);
commentNum = (int)(Double.parseDouble(commentNumStr)*10000000); // }else if(commentNumStr.contains("亿")){
}else{ // commentNumStr = commentNumStr.replaceAll("亿", "");
commentNum = Integer.getInteger(commentNumStr); // commentNum = (int)(Double.parseDouble(commentNumStr)*10000000);
} // }else{
// commentNum = Integer.getInteger(commentNumStr);
if(readNumStr.contains("万")){ // }
readNumStr = readNumStr.replaceAll("万", ""); //
readNum = (int)(Double.parseDouble(readNumStr)*10000); // if(readNumStr.contains("万")){
}else if(readNumStr.contains("亿")){ // readNumStr = readNumStr.replaceAll("万", "");
readNumStr = readNumStr.replaceAll("亿", ""); // readNum = (int)(Double.parseDouble(readNumStr)*10000);
readNum = (int)(Double.parseDouble(readNumStr)*10000000); // }else if(readNumStr.contains("亿")){
}else{ // readNumStr = readNumStr.replaceAll("亿", "");
readNum = Integer.getInteger(readNumStr); // readNum = (int)(Double.parseDouble(readNumStr)*10000000);
} // }else{
// readNum = Integer.getInteger(readNumStr);
// }
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); e.printStackTrace();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment