Commit 3be7dcdd by leiliangliang

增加知乎标签异常捕获

parent 63e222b6
...@@ -164,34 +164,38 @@ public class ZhihuHotSearchCrawler { ...@@ -164,34 +164,38 @@ public class ZhihuHotSearchCrawler {
Map<String,String> Map = HeaderTool.getCommonHead(); Map<String,String> Map = HeaderTool.getCommonHead();
Map.put("cookie", "_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4"); Map.put("cookie", "_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4");
Request request = RequestUtils.wrapGet(url,Map); Request request = RequestUtils.wrapGet(url,Map);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY); try {
if (response.hasCause()){ Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Throwable cause = response.cause(); if (response.hasCause()){
log.error("单条知乎热搜数据页面连接失败",cause); Throwable cause = response.cause();
return doc; log.error("单条知乎热搜数据页面连接失败",cause);
}else {
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" ");
//获取关注数
doc.put("fans",Long.valueOf(count[0].replaceAll(",","").trim()));
//获取浏览量
doc.put("view",Long.valueOf(count[1].replaceAll(",","").trim()));
return doc; return doc;
}else { }else {
return doc; String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" ");
//获取关注数
doc.put("fans",Long.valueOf(count[0].replaceAll(",","").trim()));
//获取浏览量
doc.put("view",Long.valueOf(count[1].replaceAll(",","").trim()));
return doc;
}else {
return doc;
}
} }
} catch (Exception e) {
log.info("知乎热搜标签解析异常",e);
} }
return doc;
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment