Commit 7decb33f by zhiwei

添加错误判断及处理

parent a0e652f6
...@@ -512,14 +512,20 @@ public class TouTiaoArticleParse { ...@@ -512,14 +512,20 @@ public class TouTiaoArticleParse {
* @return * @return
*/ */
public static String getContent(String url,Proxy proxy) { public static String getContent(String url,Proxy proxy) {
String htmlBody = downloadHtml(url, proxy, null); try {
if(!StringUtils.isBlank(htmlBody)) { String htmlBody = downloadHtml(url, proxy, null);
if(htmlBody.contains("content:")) { if(!StringUtils.isBlank(htmlBody)) {
String content = htmlBody.split(" content: '")[1].split("',")[0]; if(htmlBody.contains("content:")) {
return ZhiWeiTools.delHTMLTag(content); String content = htmlBody.split(" content: '")[1].split("',")[0];
return ZhiWeiTools.delHTMLTag(content);
}
} }
return null;
} catch (Exception e) {
logger.error("跟据链接采集全文出现错误", e);
return null;
} }
return null;
} }
......
...@@ -37,38 +37,41 @@ public class TouTiaoExample { ...@@ -37,38 +37,41 @@ public class TouTiaoExample {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
ProxyFactory.init(registry, group, GroupType.PROVIDER); ProxyFactory.init(registry, group, GroupType.PROVIDER);
String url = "https://www.toutiao.com/a6659244827009352196/";
String content = TouTiaoArticleParse.getContent(url, null);
System.out.println(content);
List<String> urlList = new ArrayList<String>(); // List<String> urlList = new ArrayList<String>();
urlList.add("1920576965"); // urlList.add("1920576965");
Date endTime = TimeParse.stringFormartDate("2018-10-01"); // Date endTime = TimeParse.stringFormartDate("2018-10-01");
//
for (String url : urlList) { // for (String url : urlList) {
long a = System.currentTimeMillis(); // long a = System.currentTimeMillis();
String mid = url; // String mid = url;
Long max_behot_time = 0L; // Long max_behot_time = 0L;
List<TouTiaoArticle> list = new ArrayList<>(); // List<TouTiaoArticle> list = new ArrayList<>();
boolean f = true; // boolean f = true;
while (f) { // while (f) {
Map<String, Object> dataMap = null; // Map<String, Object> dataMap = null;
dataMap = TouTiaoArticleParse.getTouTiaoHistory(mid, max_behot_time+"", endTime, ProxyHolder.NAT_PROXY); // dataMap = TouTiaoArticleParse.getTouTiaoHistory(mid, max_behot_time+"", endTime, ProxyHolder.NAT_PROXY);
if (dataMap != null && !dataMap.isEmpty()) { // if (dataMap != null && !dataMap.isEmpty()) {
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data"); // List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
max_behot_time = (Long)dataMap.get("max_behot_time"); // max_behot_time = (Long)dataMap.get("max_behot_time");
System.out.println(max_behot_time + "=======" + ttlist.size()); // System.out.println(max_behot_time + "=======" + ttlist.size());
if (null == max_behot_time || ttlist.isEmpty()) { // if (null == max_behot_time || ttlist.isEmpty()) {
f = false; // f = false;
} else { // } else {
if (ttlist.size() > 0) { // if (ttlist.size() > 0) {
list.addAll(ttlist); // list.addAll(ttlist);
} // }
} // }
}else{ // }else{
f = false; // f = false;
} // }
} // }
long b = System.currentTimeMillis(); // long b = System.currentTimeMillis();
System.out.println("一轮的采集时间为:" + (b - a) / 1000+" 数据量为" + list.size()); // System.out.println("一轮的采集时间为:" + (b - a) / 1000+" 数据量为" + list.size());
} // }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment