Commit 37ac4e23 by yangchen

提交修改后版本

parent bde825dd
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId> <artifactId>source-forward</artifactId>
<version>0.0.7-SNAPSHOT</version> <version>0.0.8-SNAPSHOT</version>
<name>source-forward</name> <name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description> <description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
......
...@@ -45,7 +45,7 @@ public class MediaSelfSourceCrawler { ...@@ -45,7 +45,7 @@ public class MediaSelfSourceCrawler {
* @throws Exception * @throws Exception
*/ */
public MultiThreadingCounter submitTask(MediaSelfSourceDataCallBack callback,String... urls) throws Exception { public MultiThreadingCounter submitTask(MediaSelfSourceDataCallBack callback,String... urls) throws Exception {
MultiThreadingCounter counter = new MultiThreadingCounter("任务======= ", 15,TimeUnit.SECONDS,true); MultiThreadingCounter counter = new MultiThreadingCounter("任务======= ", 15,TimeUnit.MINUTES,true);
start(counter, callback, urls); start(counter, callback, urls);
return counter; return counter;
} }
......
...@@ -100,10 +100,8 @@ public class SourceForwardCrawler { ...@@ -100,10 +100,8 @@ public class SourceForwardCrawler {
isforward = "未知"; isforward = "未知";
} }
}else if(attr.get().toString().contains("www.toutiao.com")){ }else if(attr.get().toString().contains("www.toutiao.com")){
if(body.contains("isOriginal")){ if(body.contains("isOriginal") && body.contains("isOriginal: true")){
if(body.contains("isOriginal: true")){ isforward = "原创";
isforward = "原创";
}
} }
}else{ }else{
channel = MatchChannel.verifyChannel(attr.get().toString()); channel = MatchChannel.verifyChannel(attr.get().toString());
......
...@@ -7,8 +7,6 @@ import java.util.List; ...@@ -7,8 +7,6 @@ import java.util.List;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean; import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution; import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler; import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
...@@ -19,18 +17,17 @@ public class MediaSelfSource { ...@@ -19,18 +17,17 @@ public class MediaSelfSource {
private static Logger logger = LogManager.getLogger(MediaSelfSource.class); private static Logger logger = LogManager.getLogger(MediaSelfSource.class);
public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) { public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) {
List<MediaSelfSourceBean> list = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList); return MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
return list;
} }
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); // ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>(); // List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1"); // urlList.add("http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList); // List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) { // for(MediaSelfSourceBean b : u) {
System.out.println(b.toString()); // System.out.println(b.toString());
} // }
} }
static class MediaSelfSourceCrawlerThread extends Thread{ static class MediaSelfSourceCrawlerThread extends Thread{
......
...@@ -2,7 +2,6 @@ package com.zhiwei.source_forward.run; ...@@ -2,7 +2,6 @@ package com.zhiwei.source_forward.run;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
...@@ -10,11 +9,9 @@ import java.util.Map.Entry; ...@@ -10,11 +9,9 @@ import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.SourceForwardBean; import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution; import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler; import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.run.MediaSelfSource.MediaSelfSourceCrawlerThread;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack; import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/** /**
...@@ -28,82 +25,6 @@ public class SourceForward { ...@@ -28,82 +25,6 @@ public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class); private static Logger logger = LogManager.getLogger(SourceForward.class);
/** /**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体号名称
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getMediaSelfSource(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean msfb : sourceForwardList){
String url = msfb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
data.put("自媒体号", msfb.getMediaself());
data.put("频道", msfb.getChannel());
dataMap.put(url, data);
}
}
return dataMap;
}
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param urlList
* @param @return 设定文件
* @return Map<String,String> 返回类型
*/
public static Map<String,String> getMediaSelfSource(List<String> urlList){
//启动验证来源程序
Map<String,String> dataMap = new HashMap<>();
for(String url : urlList){
dataMap.put(url, null);
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean mssb : sourceForwardList){
String url = mssb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
dataMap.put(url, mssb.getMediaself());
}
}
return dataMap;
}
/**
*
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param url
* @param @return 设定文件
* @return String 返回类型
*/
public static String getMediaSelfSource(String url){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
urlList.add(url);
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean sourceMap : sourceForwardList){
return sourceMap.getMediaself();
}
return null;
}
/**
* @Title: getSourceForward * @Title: getSourceForward
* @author hero * @author hero
* @Description: 验证文章是否转发 * @Description: 验证文章是否转发
...@@ -117,9 +38,7 @@ public class SourceForward { ...@@ -117,9 +38,7 @@ public class SourceForward {
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){ for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey()); urlList.add(entry.getKey());
} }
System.out.println(urlList.size());
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList); List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
System.out.println(dataList.size());
for(SourceForwardBean sfb : dataList){ for(SourceForwardBean sfb : dataList){
String url = sfb.getUrl(); String url = sfb.getUrl();
String root_source = sfb.getRoot_source(); String root_source = sfb.getRoot_source();
...@@ -161,7 +80,7 @@ public class SourceForward { ...@@ -161,7 +80,7 @@ public class SourceForward {
public static void main(String[] args) { public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); // ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// List<String> urlList = new ArrayList<>(); // List<String> urlList = new ArrayList<>();
// urlList.add("http://www.toutiao.com/a6452936157751968013/"); // urlList.add("https://www.toutiao.com/a6634320415839748621");
// List<SourceForwardBean> da = SourceForward.getSourceForward(urlList); // List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
// for(SourceForwardBean sfb : da) { // for(SourceForwardBean sfb : da) {
// System.out.println(sfb.toString()); // System.out.println(sfb.toString());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment