Commit 364aa66a by chenweiyang

是否删除判断调整

parent 6e20a1f6
......@@ -271,6 +271,10 @@ public class UrlLiveCrawler {
}else {
title = String.valueOf(JSONPath.read(result, "$..title"));
}
}else if(url.contains("page.om.qq.com")) {
if(result.contains("内容被删除")) {
title = "网页已删除";
}
}
//若title 为拿到 用 此方法
......@@ -320,12 +324,12 @@ public class UrlLiveCrawler {
,"百度新闻——全球最大的中文新闻平台","以上文章由以下机构判定为不实信息","该公众号已迁移"
,"财经网-CAIJING.COM.CN","蚂蚁资讯","参数错误","时尚头条_YOKA时尚网","该文章已经被删除"
,"网易","链接已过期","找不到页面","今晚网","该文章已被删除", "该回答已被删除-知乎", "资源不存在","文章未找到"
, "UC头条");
, "UC头条", "该内容暂无法显示", "手机搜狐网");
List<String> cList = Arrays.asList("提示信息-","此内容因违规无法查看","微信公众号不存在"
,"此内容被投诉且经审核涉嫌侵权,无法查看","thepageyourequestedwasnotfound","未知错误"
,"Objectmoved","404","页面没有找到","页面未找到","301MovedPermanently","加载异常",
"此帐号已被屏蔽, 内容无法查看","链接不存在", "新闻已删除");
"此帐号已被屏蔽, 内容无法查看","链接不存在", "新闻已删除", "视频去哪了呢");
return cList.stream().anyMatch(title::contains) || eList.stream().anyMatch(title::equals);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment