Commit db96247a by zhiwei

修复排名问题

parent 67b48e23
......@@ -57,15 +57,24 @@ public class WeiboHuatiCrawler {
String type = entry.getKey();
for(int page= 1; page<=5; page++) {
String pageUrl = url + "&page=" + page;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
try {
System.out.println("pageUrl=========="+pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl, headMap), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) {
topicList.addAll(parseTopicRankHtml(htmlBody, type));
topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
break;
}else {
logger.error("获取榜单列表页面时数据格式错误,页面为:{}", htmlBody);
}
} catch (Exception e) {
logger.error("获取榜单列表页面时出现错误,错误为:{}", e);
continue;
}
}
}
}
return topicList;
}
......@@ -78,10 +87,12 @@ public class WeiboHuatiCrawler {
* @param type
* @return void
*/
private static List<WeiboTopic> parseTopicRankHtml(String htmlBody, String type) {
private static List<WeiboTopic> parseTopicRankHtml(int page,String htmlBody, String type) {
try {
JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list");
if(Objects.nonNull(list) && !list.isEmpty()) {
page = (page-1)*20;
List<WeiboTopic> topicList = new ArrayList<>();
Integer toprank = null;
String topicName = null;
......@@ -92,7 +103,7 @@ public class WeiboHuatiCrawler {
String url = null;
for(int i=0;i<list.size();i++) {
JSONObject data = list.getJSONObject(i);
toprank = data.getInteger("toprank");
toprank = page + data.getInteger("toprank");
topicName = data.getString("display_name");
id = data.getString("page_id");
score = data.getString("score");
......@@ -102,6 +113,7 @@ public class WeiboHuatiCrawler {
WeiboTopic topic = new WeiboTopic(url, topicName, toprank, score, fensi, type);
topic = getTopicInfo(id, topic);
System.out.println("topic====="+topic);
topicList.add(topic);
}
return topicList;
......@@ -123,7 +135,7 @@ public class WeiboHuatiCrawler {
* @return WeiboTopic
*/
private static WeiboTopic getTopicInfo(String id, WeiboTopic topic) {
for(int i=1;i<=3;i++) {
for(int retryTimes=1; retryTimes<=3; retryTimes++) {
try {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment