Commit 1325c572 by zhiwei

修复搜狗知乎采集时回答数处理问题

parent 2ca74931
......@@ -178,10 +178,19 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
ZhiHuData zhihu = null;
if(type.contains("文章")){
String source = element.select("p.about-answer").select("cite").text();
Integer attitudes_count = Integer.valueOf(element.select("p.about-answer").select("span.count").text().replaceAll("个赞", ""));
String attitudesCount = element.select("p.about-answer").select("span.count").text().replaceAll("个赞", "");
if(attitudesCount.contains("k")){
attitudesCount = attitudesCount.split("k")[0]+"000";
}
Integer attitudes_count = Integer.valueOf(attitudesCount);
Integer comment_count = 0;
if(!"".equals(answerText.replace("个评论", "").trim())){
comment_count = Integer.valueOf(answerText.replace("个评论", "").trim());
String commentCount = answerText.replace("个评论", "").trim();
if(commentCount.contains("k")){
commentCount = commentCount.split("k")[0]+"000";
}
comment_count = Integer.valueOf(commentCount);
}
zhihu = new ZhiHuData(link, title, pt, type, null, source, null, attitudes_count, null, comment_count, word);
zhihu = analysisZhihuArticle(link, proxy, zhihu);
......@@ -189,7 +198,10 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
Integer answer_count = 0;
answerText = answerText.replace("个回答", "").trim();
if(answerText!=null && !"".equals(answerText)){
answer_count = Integer.valueOf(answer_count);
if(answerText.contains("k")){
answerText = answerText.split("k")[0]+"000";
}
answer_count = Integer.valueOf(answerText);
}
zhihu = new ZhiHuData(link, title, pt, type, null, null, null, null, answer_count, null, word);
zhihu = analysisZhihuAnswer(link, proxy, zhihu);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment