Commit 253b3b7f by chenweitao

Merge branch 'working' into 'master'

重启知乎标签采集程序

See merge request !169
parents 26d43f81 de854d36
...@@ -132,14 +132,14 @@ public class ZhihuHotSearchCrawler { ...@@ -132,14 +132,14 @@ public class ZhihuHotSearchCrawler {
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
//org.bson.Document doc = getTag(link); org.bson.Document doc = getTag(link);
// String tog = nonNull(doc.get("tag")) ? doc.getString("tag") : null; String tog = nonNull(doc.get("tag")) ? doc.getString("tag") : null;
// Long view = nonNull(doc.get("view")) ? Long.valueOf(doc.get("view").toString()) : null; Long view = nonNull(doc.get("view")) ? Long.valueOf(doc.get("view").toString()) : null;
// Long fans = nonNull(doc.get("fans")) ? Long.valueOf(doc.get("fans").toString()) : null; Long fans = nonNull(doc.get("fans")) ? Long.valueOf(doc.get("fans").toString()) : null;
HotSearchList zhihu = new HotSearchList(link, displayQuery, hotCount, i + 1, HotSearchType.知乎热搜.name(),date); HotSearchList zhihu = new HotSearchList(link, displayQuery, hotCount, i + 1, HotSearchType.知乎热搜.name(),date);
// zhihu.setFans(fans); zhihu.setFans(fans);
// zhihu.setView(view); zhihu.setView(view);
// zhihu.setTag(tog); zhihu.setTag(tog);
list.add(zhihu); list.add(zhihu);
} }
} }
...@@ -165,8 +165,12 @@ public class ZhihuHotSearchCrawler { ...@@ -165,8 +165,12 @@ public class ZhihuHotSearchCrawler {
if (htmlBody != null && htmlBody.contains("QuestionHeader")) { if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
//获取标签 //获取标签
String content = "`"+document.select("div.Tag").text()+";"; String label="";
String label = content.replaceAll(" ", ";`"); Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim()); doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text(); String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" "); String[] count = strong.split(" ");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment