Commit 3342069b by win 10

新增了QQ看点、文汇APP、博客中国三个自媒体的来源

parent 9557316d
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -141,7 +141,7 @@ public class MediaSelfSourceCrawler { ...@@ -141,7 +141,7 @@ public class MediaSelfSourceCrawler {
logger.error("exception ",e); logger.error("exception ",e);
source = null; source = null;
} }
logger.info(attr.get()+"================="+source); logger.info(attr.get()+"=================来源" + source);
MediaSelfSourceBean msfb = new MediaSelfSourceBean(attr.get().toString(), source, channel); MediaSelfSourceBean msfb = new MediaSelfSourceBean(attr.get().toString(), source, channel);
if (callback == null) { if (callback == null) {
logger.warn("DataCallback 对象为 null,无法保存数据"); logger.warn("DataCallback 对象为 null,无法保存数据");
......
...@@ -25,7 +25,7 @@ public class MediaSelfSource { ...@@ -25,7 +25,7 @@ public class MediaSelfSource {
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>(); List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/9dcfa48989d33df34?cota=1&sign=360_e39369d1&refer_scene=so_3"); urlList.add("http://yugang.blogchina.com/713055888.html");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList); List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) { for(MediaSelfSourceBean b : u) {
System.out.println(b.toString()); System.out.println(b.toString());
......
...@@ -419,18 +419,21 @@ public class MatchSource { ...@@ -419,18 +419,21 @@ public class MatchSource {
if(source!=null && source.length()>1){ if(source!=null && source.length()>1){
source = "汽车之家-" + source; source = "汽车之家-" + source;
} }
} }else if(url.contains("item.btime.com")){
else if(url.contains("item.btime.com")){
//北京时间 //北京时间
source = document.select("a.author").text(); source = document.select("a.author").text();
if(source!=null && source.length()>1){ if(source!=null && source.length()>1){
source = "北京时间-" + source; source = "北京时间-" + source;
} }
} }else if(url.contains("item.btime.com")){
else if(url.contains("item.btime.com")){
//北京时间 //北京时间
source = document.select("span.col cite").text(); source = document.select("span.col cite").text();
}else if(url.contains("qq.com/")){ }else if(url.contains("mp.qq.com")){
source = document.select("div#account_top > div.puin_text > div.pname").text();
if(source!=null && !source.equals("")){
source = "QQ看点-" + source;
}
}else if(url.contains("qq.com/")){
//腾讯网-企鹅号 //腾讯网-企鹅号
source = html.split("media\": \"")[1].split("\",")[0]; source = html.split("media\": \"")[1].split("\",")[0];
if(source!=null && source.length()>1){ if(source!=null && source.length()>1){
...@@ -514,6 +517,21 @@ public class MatchSource { ...@@ -514,6 +517,21 @@ public class MatchSource {
if(source!=null && !source.equals("")){ if(source!=null && !source.equals("")){
source = "连线家-" + source; source = "连线家-" + source;
} }
}else if(url.contains("itouchtv.cn")){
source = document.select("div.index__article-media-20Tg_ > span:nth-child(1)").text();
if(source!=null && !source.equals("")){
source = "触电新闻-" + source;
}
}else if(url.contains("whb.cn")){
source = document.select("div.yidian-info > span:nth-child(1)").text();
if(source!=null && !source.equals("")){
source = "文汇APP-" + source;
}
}else if(url.contains("blogchina.com")){
source = document.select("div.meta-top > label.lm_name > span > a").text();
if(source!=null && !source.equals("")){
source = "博客中国-" + source;
}
} }
return source; return source;
} catch (Exception e) { } catch (Exception e) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment