Commit 29320d28 by yangchen

提升版本 增加匹配来源

parent ab587b25
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId> <artifactId>source-forward</artifactId>
<version>0.1.8-SNAPSHOT</version> <version>0.1.9-SNAPSHOT</version>
<name>source-forward</name> <name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description> <description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
......
...@@ -66,7 +66,9 @@ public class SourceForwardCrawler { ...@@ -66,7 +66,9 @@ public class SourceForwardCrawler {
if(url.contains("www.toutiao.com")){ if(url.contains("www.toutiao.com")){
headers.put("referer", url); headers.put("referer", url);
} }
if(url.contains("china.prcfe.com")) {
url = "http://china.prcfe.com/e/extend/ShowSource/?id=" + url.split("/")[url.split("/").length-1].split("\\.")[0];
}
Request request = RequestUtils.wrapGet(url, headers); Request request = RequestUtils.wrapGet(url, headers);
counter.add(); counter.add();
httpBoot.asyncCall(request, ProxyHolder.NAT_HEAVY_PROXY, true).whenComplete((rs,ex) -> { httpBoot.asyncCall(request, ProxyHolder.NAT_HEAVY_PROXY, true).whenComplete((rs,ex) -> {
......
...@@ -81,7 +81,7 @@ public class SourceForward { ...@@ -81,7 +81,7 @@ public class SourceForward {
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>(); List<String> urlList = new ArrayList<>();
urlList.add("http://industry.caijing.com.cn/20190423/4582310.shtml"); urlList.add("http://stock.10jqka.com.cn/usstock/20190621/c612094454.shtml");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList); List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) { for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString()); System.out.println(sfb.toString());
......
...@@ -184,6 +184,9 @@ public class MatchSource { ...@@ -184,6 +184,9 @@ public class MatchSource {
}else if(url.contains("finance.ifeng.com")){ }else if(url.contains("finance.ifeng.com")){
//单独处理凤凰网 //单独处理凤凰网
source = document.select("p.p_time").select("span").select("span").select("a").text(); source = document.select("p.p_time").select("span").select("span").select("a").text();
if(Objects.isNull(source) || source.length() < 1) {
source = html.split("source\":\"")[1].split("\"")[0];
}
}else if(url.contains("iphone.265g.com")){ }else if(url.contains("iphone.265g.com")){
//单独处理265G网 //单独处理265G网
source = document.select("div.article_info").select("span").text().replaceAll(".*来源:|QQ群号.*", ""); source = document.select("div.article_info").select("span").text().replaceAll(".*来源:|QQ群号.*", "");
...@@ -299,6 +302,14 @@ public class MatchSource { ...@@ -299,6 +302,14 @@ public class MatchSource {
}else if(url.contains("finance.youth.cn")){ }else if(url.contains("finance.youth.cn")){
//单独处理中国青年网 //单独处理中国青年网
source = document.select("span#source_baidu").text().replaceAll("来源:|作者.*", ""); source = document.select("span#source_baidu").text().replaceAll("来源:|作者.*", "");
}else if(url.contains("china.com")) {
//中国金融商报
source = document.select("#chan_newsInfo > a").text();
}else if(url.contains("xw.qq.com")) {
//腾讯网客户端
source = document.select("div.tpl_header_author").text();
}else if(url.contains("china.prcfe.com")) {
source = html.split("\"")[1];
} }
if(Objects.nonNull(source) && source.length() != 0) { if(Objects.nonNull(source) && source.length() != 0) {
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment