Commit d705de1f by chenweiyang

是否删除调整

parent 364aa66a
......@@ -216,6 +216,9 @@ public class UrlLiveCrawler {
if(Objects.isNull(title) || title.isEmpty()) {
title = doc.select("h2").text();
}
if(Objects.isNull(title) || title.isEmpty()) {
title = doc.select("div.weui-msg__text-area > h3").text();
}
// 获取title
Matcher ma5 = Pattern.compile("var msg_title = \'(.*)\'")
.matcher(result);
......@@ -224,7 +227,7 @@ public class UrlLiveCrawler {
}
if(Objects.isNull(title) || title.isEmpty()) {
if(result.contains("此帐号已被屏蔽, 内容无法查看") || result.contains("该公众号已迁移") || result.contains("此帐号已自主注销,内容无法查看")
|| result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除")) {
|| result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除") || result.contains("此内容被投诉且经审核涉嫌侵权")) {
title = "网页已删除";
}
}
......@@ -324,7 +327,7 @@ public class UrlLiveCrawler {
,"百度新闻——全球最大的中文新闻平台","以上文章由以下机构判定为不实信息","该公众号已迁移"
,"财经网-CAIJING.COM.CN","蚂蚁资讯","参数错误","时尚头条_YOKA时尚网","该文章已经被删除"
,"网易","链接已过期","找不到页面","今晚网","该文章已被删除", "该回答已被删除-知乎", "资源不存在","文章未找到"
, "UC头条", "该内容暂无法显示", "手机搜狐网");
, "UC头条", "该内容暂无法显示", "手机搜狐网", "此内容被投诉且经审核涉嫌侵权,无法查看。");
List<String> cList = Arrays.asList("提示信息-","此内容因违规无法查看","微信公众号不存在"
,"此内容被投诉且经审核涉嫌侵权,无法查看","thepageyourequestedwasnotfound","未知错误"
......
......@@ -72,7 +72,7 @@ public class URLLive {
public static void main(String[] args) {
ProxyInit.initProxy();
List<String> urlList = new ArrayList<>();
urlList.add("https://www.toutiao.com/a6982350814614405670/");
urlList.add("https://mp.weixin.qq.com/s/YLlXGwlSugJpXTIqrLgPPw");
// urlList.add("http://www.yidianzixun.com/article/0PYO4Gbh");
List<UrlLiveBean> u = URLLive.verificationURLLive(urlList);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment