Commit 6e20a1f6 by chenweiyang

是否删除错误处理

parent 72bdcd09
......@@ -222,9 +222,11 @@ public class UrlLiveCrawler {
if (ma5.find()) {
title = ma5.group(1).replaceAll(" ", " ").trim();
}
if(result.contains("此帐号已被屏蔽, 内容无法查看") || result.contains("该公众号已迁移") || result.contains("此帐号已自主注销,内容无法查看")
|| result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除")) {
title = "网页已删除";
if(Objects.isNull(title) || title.isEmpty()) {
if(result.contains("此帐号已被屏蔽, 内容无法查看") || result.contains("该公众号已迁移") || result.contains("此帐号已自主注销,内容无法查看")
|| result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除")) {
title = "网页已删除";
}
}
}else if(url.contains("kuaibao")){
title = doc.select("p.title").text().replaceAll(" ", "");
......@@ -263,6 +265,12 @@ public class UrlLiveCrawler {
} catch (Exception e) {
logger.error(" uc 数据 json 转换失败", e);
}
}else if(attr.getAttr().toString().contains("toutiao.com")) {
if(result.contains("\"success\":false")) {
title = "网页已删除";
}else {
title = String.valueOf(JSONPath.read(result, "$..title"));
}
}
//若title 为拿到 用 此方法
......@@ -280,11 +288,6 @@ public class UrlLiveCrawler {
title = doc.select("h1").text().replaceAll(" ", "");
}
if(result.contains("\"success\":false") && attr.getAttr().toString().contains("toutiao.com")) {
title = "网页已删除";
}else {
title = String.valueOf(JSONPath.read(result, "$..title"));
}
//若title 为拿到 用 此方法 无法获取标题不进行程序迷惑性判断
// if(Objects.isNull(title) || title.length() < 1 || result.length() < 200) {
// title = "网页已删除";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment