Commit 06e7cd6e by chenweitao

Merge branch 'working' into 'master'

更新知乎热搜标签采集程序

See merge request !216
parents 311fa5af 1312177e
...@@ -53,7 +53,7 @@ public class XinLangHotSearchCrawler { ...@@ -53,7 +53,7 @@ public class XinLangHotSearchCrawler {
Elements elements = document.getElementsByTag("script"); Elements elements = document.getElementsByTag("script");
for (Element element : elements) { for (Element element : elements) {
String html = element.html(); String html = element.html();
log.info(html); //log.info(html);
if (html.contains("SM =")) { if (html.contains("SM =")) {
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1)); jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result"); JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
......
...@@ -183,13 +183,13 @@ public class ZhihuHotSearchCrawler { ...@@ -183,13 +183,13 @@ public class ZhihuHotSearchCrawler {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
//获取标签 //获取标签
String label = ""; String label = "";
Elements select = document.select("div.Tag"); Elements select = document.select("div.QuestionHeader-topics").select("div.css-1gomreu");
for (Element element : select) { for (Element element : select) {
String text = "`" + element.select("div.Popover").text() + ";"; String text = "`" + element.select("div.css-1gomreu").text() + ";";
label = label + text; label = label + text;
} }
doc.put("tag", label.trim()); doc.put("tag", label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text(); String strong = document.select("strong.NumberBoard-itemValue").text();
String[] count = strong.split(" "); String[] count = strong.split(" ");
//获取关注数 //获取关注数
doc.put("fans", Long.valueOf(count[0].replaceAll(",", "").trim())); doc.put("fans", Long.valueOf(count[0].replaceAll(",", "").trim()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment