Commit 7decb33f by zhiwei

添加错误判断及处理

parent a0e652f6
......@@ -512,6 +512,7 @@ public class TouTiaoArticleParse {
* @return
*/
public static String getContent(String url,Proxy proxy) {
try {
String htmlBody = downloadHtml(url, proxy, null);
if(!StringUtils.isBlank(htmlBody)) {
if(htmlBody.contains("content:")) {
......@@ -520,6 +521,11 @@ public class TouTiaoArticleParse {
}
}
return null;
} catch (Exception e) {
logger.error("跟据链接采集全文出现错误", e);
return null;
}
}
......
......@@ -37,38 +37,41 @@ public class TouTiaoExample {
public static void main(String[] args) throws Exception {
ProxyFactory.init(registry, group, GroupType.PROVIDER);
String url = "https://www.toutiao.com/a6659244827009352196/";
String content = TouTiaoArticleParse.getContent(url, null);
System.out.println(content);
List<String> urlList = new ArrayList<String>();
urlList.add("1920576965");
Date endTime = TimeParse.stringFormartDate("2018-10-01");
for (String url : urlList) {
long a = System.currentTimeMillis();
String mid = url;
Long max_behot_time = 0L;
List<TouTiaoArticle> list = new ArrayList<>();
boolean f = true;
while (f) {
Map<String, Object> dataMap = null;
dataMap = TouTiaoArticleParse.getTouTiaoHistory(mid, max_behot_time+"", endTime, ProxyHolder.NAT_PROXY);
if (dataMap != null && !dataMap.isEmpty()) {
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
max_behot_time = (Long)dataMap.get("max_behot_time");
System.out.println(max_behot_time + "=======" + ttlist.size());
if (null == max_behot_time || ttlist.isEmpty()) {
f = false;
} else {
if (ttlist.size() > 0) {
list.addAll(ttlist);
}
}
}else{
f = false;
}
}
long b = System.currentTimeMillis();
System.out.println("一轮的采集时间为:" + (b - a) / 1000+" 数据量为" + list.size());
}
// List<String> urlList = new ArrayList<String>();
// urlList.add("1920576965");
// Date endTime = TimeParse.stringFormartDate("2018-10-01");
//
// for (String url : urlList) {
// long a = System.currentTimeMillis();
// String mid = url;
// Long max_behot_time = 0L;
// List<TouTiaoArticle> list = new ArrayList<>();
// boolean f = true;
// while (f) {
// Map<String, Object> dataMap = null;
// dataMap = TouTiaoArticleParse.getTouTiaoHistory(mid, max_behot_time+"", endTime, ProxyHolder.NAT_PROXY);
// if (dataMap != null && !dataMap.isEmpty()) {
// List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
// max_behot_time = (Long)dataMap.get("max_behot_time");
// System.out.println(max_behot_time + "=======" + ttlist.size());
// if (null == max_behot_time || ttlist.isEmpty()) {
// f = false;
// } else {
// if (ttlist.size() > 0) {
// list.addAll(ttlist);
// }
// }
// }else{
// f = false;
// }
// }
// long b = System.currentTimeMillis();
// System.out.println("一轮的采集时间为:" + (b - a) / 1000+" 数据量为" + list.size());
// }
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment