Commit 6e20a1f6 by chenweiyang

是否删除错误处理

parent 72bdcd09
...@@ -222,10 +222,12 @@ public class UrlLiveCrawler { ...@@ -222,10 +222,12 @@ public class UrlLiveCrawler {
if (ma5.find()) { if (ma5.find()) {
title = ma5.group(1).replaceAll(" ", " ").trim(); title = ma5.group(1).replaceAll(" ", " ").trim();
} }
if(Objects.isNull(title) || title.isEmpty()) {
if(result.contains("此帐号已被屏蔽, 内容无法查看") || result.contains("该公众号已迁移") || result.contains("此帐号已自主注销,内容无法查看") if(result.contains("此帐号已被屏蔽, 内容无法查看") || result.contains("该公众号已迁移") || result.contains("此帐号已自主注销,内容无法查看")
|| result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除")) { || result.contains("此帐号处于帐号迁移流程中") || result.contains("该内容已被发布者删除")) {
title = "网页已删除"; title = "网页已删除";
} }
}
}else if(url.contains("kuaibao")){ }else if(url.contains("kuaibao")){
title = doc.select("p.title").text().replaceAll(" ", ""); title = doc.select("p.title").text().replaceAll(" ", "");
}else if(url.contains("chinadaily.com.cn")){ }else if(url.contains("chinadaily.com.cn")){
...@@ -263,6 +265,12 @@ public class UrlLiveCrawler { ...@@ -263,6 +265,12 @@ public class UrlLiveCrawler {
} catch (Exception e) { } catch (Exception e) {
logger.error(" uc 数据 json 转换失败", e); logger.error(" uc 数据 json 转换失败", e);
} }
}else if(attr.getAttr().toString().contains("toutiao.com")) {
if(result.contains("\"success\":false")) {
title = "网页已删除";
}else {
title = String.valueOf(JSONPath.read(result, "$..title"));
}
} }
//若title 为拿到 用 此方法 //若title 为拿到 用 此方法
...@@ -280,11 +288,6 @@ public class UrlLiveCrawler { ...@@ -280,11 +288,6 @@ public class UrlLiveCrawler {
title = doc.select("h1").text().replaceAll(" ", ""); title = doc.select("h1").text().replaceAll(" ", "");
} }
if(result.contains("\"success\":false") && attr.getAttr().toString().contains("toutiao.com")) {
title = "网页已删除";
}else {
title = String.valueOf(JSONPath.read(result, "$..title"));
}
//若title 为拿到 用 此方法 无法获取标题不进行程序迷惑性判断 //若title 为拿到 用 此方法 无法获取标题不进行程序迷惑性判断
// if(Objects.isNull(title) || title.length() < 1 || result.length() < 200) { // if(Objects.isNull(title) || title.length() < 1 || result.length() < 200) {
// title = "网页已删除"; // title = "网页已删除";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment