Commit 3342069b by win 10

新增了QQ看点、文汇APP、博客中国三个自媒体的来源

parent 9557316d
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -141,7 +141,7 @@ public class MediaSelfSourceCrawler {
logger.error("exception ",e);
source = null;
}
logger.info(attr.get()+"================="+source);
logger.info(attr.get()+"=================来源" + source);
MediaSelfSourceBean msfb = new MediaSelfSourceBean(attr.get().toString(), source, channel);
if (callback == null) {
logger.warn("DataCallback 对象为 null,无法保存数据");
......
......@@ -25,7 +25,7 @@ public class MediaSelfSource {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/9dcfa48989d33df34?cota=1&sign=360_e39369d1&refer_scene=so_3");
urlList.add("http://yugang.blogchina.com/713055888.html");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) {
System.out.println(b.toString());
......
......@@ -419,17 +419,20 @@ public class MatchSource {
if(source!=null && source.length()>1){
source = "汽车之家-" + source;
}
}
else if(url.contains("item.btime.com")){
}else if(url.contains("item.btime.com")){
//北京时间
source = document.select("a.author").text();
if(source!=null && source.length()>1){
source = "北京时间-" + source;
}
}
else if(url.contains("item.btime.com")){
}else if(url.contains("item.btime.com")){
//北京时间
source = document.select("span.col cite").text();
}else if(url.contains("mp.qq.com")){
source = document.select("div#account_top > div.puin_text > div.pname").text();
if(source!=null && !source.equals("")){
source = "QQ看点-" + source;
}
}else if(url.contains("qq.com/")){
//腾讯网-企鹅号
source = html.split("media\": \"")[1].split("\",")[0];
......@@ -514,6 +517,21 @@ public class MatchSource {
if(source!=null && !source.equals("")){
source = "连线家-" + source;
}
}else if(url.contains("itouchtv.cn")){
source = document.select("div.index__article-media-20Tg_ > span:nth-child(1)").text();
if(source!=null && !source.equals("")){
source = "触电新闻-" + source;
}
}else if(url.contains("whb.cn")){
source = document.select("div.yidian-info > span:nth-child(1)").text();
if(source!=null && !source.equals("")){
source = "文汇APP-" + source;
}
}else if(url.contains("blogchina.com")){
source = document.select("div.meta-top > label.lm_name > span > a").text();
if(source!=null && !source.equals("")){
source = "博客中国-" + source;
}
}
return source;
} catch (Exception e) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment