Commit 1312177e by leiliangliang

更新知乎热搜标签采集程序

parent 2acb8a7e
......@@ -53,7 +53,7 @@ public class XinLangHotSearchCrawler {
Elements elements = document.getElementsByTag("script");
for (Element element : elements) {
String html = element.html();
log.info(html);
//log.info(html);
if (html.contains("SM =")) {
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
......
......@@ -183,13 +183,13 @@ public class ZhihuHotSearchCrawler {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label = "";
Elements select = document.select("div.Tag");
Elements select = document.select("div.QuestionHeader-topics").select("div.css-1gomreu");
for (Element element : select) {
String text = "`" + element.select("div.Popover").text() + ";";
String text = "`" + element.select("div.css-1gomreu").text() + ";";
label = label + text;
}
doc.put("tag", label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String strong = document.select("strong.NumberBoard-itemValue").text();
String[] count = strong.split(" ");
//获取关注数
doc.put("fans", Long.valueOf(count[0].replaceAll(",", "").trim()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment