Commit 4f5be1c7 by zhiwei

添加验证来源中的渠道验证

parent 0f93a339
...@@ -33,10 +33,14 @@ public class SourceForwardPageProcessor implements PageProcessor { ...@@ -33,10 +33,14 @@ public class SourceForwardPageProcessor implements PageProcessor {
String source = null; String source = null;
String channel = "新闻"; String channel = "新闻";
try { try {
if(page.getStatusCode()!=404){
List<Node> nodeList = page.getHtml().getDocument().head().childNodes(); channel = verifyChannel(page.getUrl().get());
source = TreateData.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList); if(channel==null){
channel = TreateData.matchChannel(nodeList); if(page.getStatusCode()!=404){
List<Node> nodeList = page.getHtml().getDocument().head().childNodes();
source = TreateData.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList);
channel = TreateData.matchChannel(nodeList);
}
} }
} catch (Exception e) { } catch (Exception e) {
source = null; source = null;
...@@ -49,6 +53,46 @@ public class SourceForwardPageProcessor implements PageProcessor { ...@@ -49,6 +53,46 @@ public class SourceForwardPageProcessor implements PageProcessor {
page.putField("data", data); page.putField("data", data);
} }
/**
* @Title: verifyChannel
* @author hero
* @Description: 根据链接验证文章频道
* @param @param url
* @param @return 设定文件
* @return String 返回类型
*/
private static String verifyChannel(String url){
String channel = null;
if(url.contains("news.")){
channel = "新闻";
}else if(url.contains("finance.") || url.contains("business.")
|| url.contains("money.")){
channel = "财经";
}else if(url.contains("tech.") || url.contains("it.")){
channel = "科技";
}else if(url.contains("sports.")){
channel = "体育";
}else if(url.contains("ent.") || url.contains("yule.")){
channel = "娱乐";
}else if(url.contains("auto.")){
channel = "汽车";
}else if(url.contains("fashion.")){
channel = "时尚";
}else if(url.contains("learning.") || url.contains("edu.")){
channel = "教育";
}else if(url.contains("baobao.")){
channel = "母婴";
}else if(url.contains("house.") ||url.contains("leju.")
|| url.contains("focus.")){
channel = "房产";
}else if(url.contains("games.")){
channel = "游戏";
}
return channel;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment