Commit 4dac5870 by zhiwei

1.添加自媒体号中的帐号来源采集中的一点资讯匹配规则

parent cd456869
package com.zhiwei.source_forward.crawler;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.nodes.Node;
import com.zhiwei.source_forward.util.SourceData;
import com.zhiwei.source_forward.util.TreateData;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
......
......@@ -123,7 +123,7 @@ public class TreateData {
if(url.contains("toutiao.com")){
//今日头条帐号匹配
if(html.contains(" source: '")){
source = "今日头条-" + html.split(" source: '")[1].split("',")[0];
source = "今日头条-" + html.split("source: '")[1].split("',")[0];
}
}else if(url.contains("sohu.com")){
//搜狐自媒体号
......@@ -144,6 +144,13 @@ public class TreateData {
}else if(url.contains("baijia.baidu.com")){
//百度百家
source = "百家号-" + document.select("section.info").select("span.author").text();
}else if(url.contains("yidianzixun.com")){
//一点资讯
if(html.contains("related_wemedia")){
source = "一点号-" + html.split("media_name\":\"")[1].split("\",\"")[0];
}else{
source = html.split("source\":\"")[1].split("\",\"")[0];
}
}
return source;
} catch (Exception e) {
......@@ -152,10 +159,6 @@ public class TreateData {
}
/**
* @Title: matchChannel
* @author hero
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment