Commit 3fdd0d2c by cwy
parents 831957e0 610fbdb7
...@@ -5,6 +5,7 @@ import java.util.Date; ...@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -53,38 +54,43 @@ public class Huxiu { ...@@ -53,38 +54,43 @@ public class Huxiu {
String uname = document.select("div.user-name").text(); String uname = document.select("div.user-name").text();
Elements elements = document.select("div.message-box > div.mod-b.mod-art"); Elements elements = document.select("div.message-box > div.mod-b.mod-art");
for(Element e:elements) { if(Objects.nonNull(elements) && !elements.isEmpty()) {
String title = e.select("div.mob-ctt > h3 > a").text(); for(Element e:elements) {
String title = e.select("div.mob-ctt > h3 > a").text();
//列表显示的时间(补充文章获取失败后的时间)
String artTime = e.select("div.mob-author > span.time").text(); String artTime = e.select("div.mob-author > span.time").text();
Date time = TimeParse.stringFormartDate(artTime); Date time = TimeParse.stringFormartDate(artTime);
//跳进文章获取具体时间 String artUrl = "https://www.huxiu.com" + e.select("div.mob-ctt > h3 > a").attr("href");
String artUrl = "https://www.huxiu.com" + e.select("div.mob-ctt > h3 > a").attr("href");
//超出时间则不获取
//超出时间则不获取 if(haveNext.contains("Next") || (time.getTime() < TimeParse.stringFormartDate(endTime).getTime()) || page > 500) {
if(haveNext.contains("Next") || (time.getTime() < TimeParse.stringFormartDate(endTime).getTime()) || page > 500) { next = false;
next = false; break;
break; }
Map<String, Object> map = new HashMap<>();
map.put("title", title);
map.put("url", artUrl);
map.put("time", time);
map.put("uid", uid);
map.put("uname", uname);
map.put("source", "虎嗅");
bodyList.add(map);
} }
}else {
Map<String, Object> map = new HashMap<>(); next = false;
map.put("title", title);
map.put("url", artUrl);
map.put("time", time);
map.put("uid", uid);
map.put("uname", uname);
map.put("source", "虎嗅");
bodyList.add(map);
} }
page ++; page ++;
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析数据失败", e); logger.error("解析数据失败", e);
if(i == 4) {
next = false;
}
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment