Commit 806be52f by 马黎滨

知乎热搜榜单采集404页面解决

parent 141137f3
......@@ -29,14 +29,14 @@ public class ZhihuTopicSearchCrawler {
public static List<HotSearchList> getZhihuTopicSearch(){
List<HotSearchList> list = new ArrayList<>();
String url = "https://www.zhihu.com/topsearch";
Map<String, String> headerMap = HeaderTool.getCommonHead();
JSONObject jsonObject = null;
try {
for(int t=0 ;t<3&&jsonObject== null;t++)
for(int t=0 ;t<3 && jsonObject== null;t++)
{
ZhiWeiTools.sleep(500L);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap),
// ZhiWeiTools.sleep(10000L);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),
ProxyHolder.NAT_HEAVY_PROXY).body().string();
log.info("页面内容获取:{}",htmlBody);
Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").select("#js-initialData").html();
jsonObject = JSONObject.parseObject(html);
......@@ -55,6 +55,7 @@ public class ZhihuTopicSearchCrawler {
return list;
}else{
log.error("知乎热搜榜单页面获取异常,404");
log.error(jsonObject);
}
} catch (IOException e) {
log.error("知乎热搜获取异常", e);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment