Commit 4dac5870 by zhiwei

1.添加自媒体号中的帐号来源采集中的一点资讯匹配规则

parent cd456869
package com.zhiwei.source_forward.crawler; package com.zhiwei.source_forward.crawler;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.jsoup.nodes.Node;
import com.zhiwei.source_forward.util.SourceData;
import com.zhiwei.source_forward.util.TreateData; import com.zhiwei.source_forward.util.TreateData;
import us.codecraft.webmagic.Page; import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site; import us.codecraft.webmagic.Site;
......
...@@ -123,7 +123,7 @@ public class TreateData { ...@@ -123,7 +123,7 @@ public class TreateData {
if(url.contains("toutiao.com")){ if(url.contains("toutiao.com")){
//今日头条帐号匹配 //今日头条帐号匹配
if(html.contains(" source: '")){ if(html.contains(" source: '")){
source = "今日头条-" + html.split(" source: '")[1].split("',")[0]; source = "今日头条-" + html.split("source: '")[1].split("',")[0];
} }
}else if(url.contains("sohu.com")){ }else if(url.contains("sohu.com")){
//搜狐自媒体号 //搜狐自媒体号
...@@ -144,6 +144,13 @@ public class TreateData { ...@@ -144,6 +144,13 @@ public class TreateData {
}else if(url.contains("baijia.baidu.com")){ }else if(url.contains("baijia.baidu.com")){
//百度百家 //百度百家
source = "百家号-" + document.select("section.info").select("span.author").text(); source = "百家号-" + document.select("section.info").select("span.author").text();
}else if(url.contains("yidianzixun.com")){
//一点资讯
if(html.contains("related_wemedia")){
source = "一点号-" + html.split("media_name\":\"")[1].split("\",\"")[0];
}else{
source = html.split("source\":\"")[1].split("\",\"")[0];
}
} }
return source; return source;
} catch (Exception e) { } catch (Exception e) {
...@@ -151,10 +158,6 @@ public class TreateData { ...@@ -151,10 +158,6 @@ public class TreateData {
} }
} }
/** /**
* @Title: matchChannel * @Title: matchChannel
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment