Commit 50407ed7 by leiliangliang

更新微博话题采集程序

parent 2eda193a
...@@ -222,8 +222,8 @@ public class WeiboTopicCrawler { ...@@ -222,8 +222,8 @@ public class WeiboTopicCrawler {
*/ */
public static List<HotSearchList> startCrawlerByPc(Date date){ public static List<HotSearchList> startCrawlerByPc(Date date){
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=2; page++){ for(int page=1; page<=6; page++){
String pageUrl = "https://weibo.com/ajax/statuses/topic_band?sid=v_weibopro&category=all&page="+page+"&count=50"; String pageUrl = "https://weibo.com/ajax/statuses/topic_band?sid=v_weibopro&category=all&page="+page+"&count=10";
Request request = RequestUtils.wrapGet(pageUrl); Request request = RequestUtils.wrapGet(pageUrl);
String htmlBody = null; String htmlBody = null;
//重试三次 //重试三次
...@@ -236,8 +236,12 @@ public class WeiboTopicCrawler { ...@@ -236,8 +236,12 @@ public class WeiboTopicCrawler {
}else { }else {
htmlBody = response.bodyString(); htmlBody = response.bodyString();
} }
if (StringUtils.isNotBlank(htmlBody)) { if (htmlBody.contains("data") && Objects.nonNull(JSONObject.parseObject(htmlBody).get("data"))) {
topicList.addAll(parseTopicPcHtml(htmlBody,date)); JSONObject data = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray statuses = data.getJSONArray("statuses");
if (statuses.size()>0) {
topicList.addAll(parseTopicPcHtml(statuses, date));
}
break; break;
} else { } else {
log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody); log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody);
...@@ -247,10 +251,8 @@ public class WeiboTopicCrawler { ...@@ -247,10 +251,8 @@ public class WeiboTopicCrawler {
return topicList; return topicList;
} }
private static List<HotSearchList> parseTopicPcHtml(String htmlBody,Date date) { private static List<HotSearchList> parseTopicPcHtml(JSONArray jsonArray,Date date) {
try { try {
JSONObject data = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray jsonArray = data.getJSONArray("statuses");
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
for (int j=0; j< jsonArray.size(); j++){ for (int j=0; j< jsonArray.size(); j++){
JSONObject card = jsonArray.getJSONObject(j); JSONObject card = jsonArray.getJSONObject(j);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment