Commit 88b59f64 by 马黎滨

百度采集热搜指数

parent 806be52f
...@@ -91,6 +91,9 @@ public class BaiDuHotSearchCrawler { ...@@ -91,6 +91,9 @@ public class BaiDuHotSearchCrawler {
} else if (!element.select("td.last").select("span.icon-rise").isEmpty()) { } else if (!element.select("td.last").select("span.icon-rise").isEmpty()) {
hot = element.select("td.last").select("span.icon-rise").text(); hot = element.select("td.last").select("span.icon-rise").text();
} }
else if (!element.select("td.last").select("span.icon-fair").isEmpty()) {
hot = element.select("td.last").select("span.icon-fair").text();
}
int count = 0; int count = 0;
// 判断hot是否为空 // 判断hot是否为空
if (StringUtils.isNotBlank(hot)) { if (StringUtils.isNotBlank(hot)) {
......
...@@ -36,7 +36,7 @@ public class ZhihuTopicSearchCrawler { ...@@ -36,7 +36,7 @@ public class ZhihuTopicSearchCrawler {
// ZhiWeiTools.sleep(10000L); // ZhiWeiTools.sleep(10000L);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),
ProxyHolder.NAT_HEAVY_PROXY).body().string(); ProxyHolder.NAT_HEAVY_PROXY).body().string();
log.info("页面内容获取:{}",htmlBody); // log.info("页面内容获取:{}",htmlBody);
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").select("#js-initialData").html(); String html = document.getElementsByTag("script").select("#js-initialData").html();
jsonObject = JSONObject.parseObject(html); jsonObject = JSONObject.parseObject(html);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment