Commit 3fdd0d2c by cwy
parents 831957e0 610fbdb7
...@@ -5,6 +5,7 @@ import java.util.Date; ...@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -53,14 +54,13 @@ public class Huxiu { ...@@ -53,14 +54,13 @@ public class Huxiu {
String uname = document.select("div.user-name").text(); String uname = document.select("div.user-name").text();
Elements elements = document.select("div.message-box > div.mod-b.mod-art"); Elements elements = document.select("div.message-box > div.mod-b.mod-art");
if(Objects.nonNull(elements) && !elements.isEmpty()) {
for(Element e:elements) { for(Element e:elements) {
String title = e.select("div.mob-ctt > h3 > a").text(); String title = e.select("div.mob-ctt > h3 > a").text();
//列表显示的时间(补充文章获取失败后的时间)
String artTime = e.select("div.mob-author > span.time").text(); String artTime = e.select("div.mob-author > span.time").text();
Date time = TimeParse.stringFormartDate(artTime); Date time = TimeParse.stringFormartDate(artTime);
//跳进文章获取具体时间
String artUrl = "https://www.huxiu.com" + e.select("div.mob-ctt > h3 > a").attr("href"); String artUrl = "https://www.huxiu.com" + e.select("div.mob-ctt > h3 > a").attr("href");
//超出时间则不获取 //超出时间则不获取
...@@ -79,12 +79,18 @@ public class Huxiu { ...@@ -79,12 +79,18 @@ public class Huxiu {
bodyList.add(map); bodyList.add(map);
} }
}else {
next = false;
}
page ++; page ++;
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析数据失败", e); logger.error("解析数据失败", e);
if(i == 4) {
next = false;
}
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment