Commit 3be7dcdd by leiliangliang

增加知乎标签异常捕获

parent 63e222b6
......@@ -164,34 +164,38 @@ public class ZhihuHotSearchCrawler {
Map<String,String> Map = HeaderTool.getCommonHead();
Map.put("cookie", "_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4");
Request request = RequestUtils.wrapGet(url,Map);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("单条知乎热搜数据页面连接失败",cause);
return doc;
}else {
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" ");
//获取关注数
doc.put("fans",Long.valueOf(count[0].replaceAll(",","").trim()));
//获取浏览量
doc.put("view",Long.valueOf(count[1].replaceAll(",","").trim()));
try {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("单条知乎热搜数据页面连接失败",cause);
return doc;
}else {
return doc;
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" ");
//获取关注数
doc.put("fans",Long.valueOf(count[0].replaceAll(",","").trim()));
//获取浏览量
doc.put("view",Long.valueOf(count[1].replaceAll(",","").trim()));
return doc;
}else {
return doc;
}
}
} catch (Exception e) {
log.info("知乎热搜标签解析异常",e);
}
return doc;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment