Commit 87e9aaf3 by yangchen

添加代理

parent 19bb2414
...@@ -4,7 +4,7 @@ public class MediaSelfSourceBean { ...@@ -4,7 +4,7 @@ public class MediaSelfSourceBean {
private String url; private String url;
private String source; private String mediaself;
private String channel; private String channel;
...@@ -16,14 +16,6 @@ public class MediaSelfSourceBean { ...@@ -16,14 +16,6 @@ public class MediaSelfSourceBean {
this.url = url; this.url = url;
} }
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getChannel() { public String getChannel() {
return channel; return channel;
} }
...@@ -36,19 +28,29 @@ public class MediaSelfSourceBean { ...@@ -36,19 +28,29 @@ public class MediaSelfSourceBean {
super(); super();
} }
public MediaSelfSourceBean(String url, String source, String channel) { @Override
public String toString() {
return "MediaSelfSourceBean [url=" + url + ", mediaself=" + mediaself
+ ", channel=" + channel + "]";
}
public MediaSelfSourceBean(String url, String mediaself, String channel) {
super(); super();
this.url = url; this.url = url;
this.source = source; this.mediaself = mediaself;
this.channel = channel; this.channel = channel;
} }
@Override public String getMediaself() {
public String toString() { return mediaself;
return "MediaSelfSourceBean [url=" + url + ", source=" + source
+ ", channel=" + channel + "]";
} }
public void setMediaself(String mediaself) {
this.mediaself = mediaself;
}
/** /**
......
package com.zhiwei.source_forward.config;
import java.io.InputStream;
import java.util.Properties;
public class ProxyConfig {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("proxyip.properties");
conf = new Properties();
conf.load(is);
is.close();
registry = conf.getProperty("registry");
group = conf.getProperty("group");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String registry;
public static String group;
}
...@@ -3,6 +3,8 @@ package com.zhiwei.source_forward.run; ...@@ -3,6 +3,8 @@ package com.zhiwei.source_forward.run;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
...@@ -17,31 +19,40 @@ public class ContentMatch { ...@@ -17,31 +19,40 @@ public class ContentMatch {
private static Logger logger = LogManager.getLogger(ContentMatch.class); private static Logger logger = LogManager.getLogger(ContentMatch.class);
public static List<ContentBean> getContentMatch(List<String> urlList){ /**
//启动获取链接来源 * @Title: getContent
List<ContentBean> dataList = ContentMatchCrawlerThread.getContentMatch(urlList); * @author hero
return dataList; * @Description: 获取链接正文
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getContent(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
} }
public static void main(String[] args) { List<ContentBean> contentList = ContentMatchCrawlerThread.getContentMatch(urlList);
List<String> urlList = new ArrayList<>(); for(ContentBean cb : contentList){
urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1"); String url = cb.getUrl();
urlList.add("http://news.ctocio.com.cn/383/14543883.shtml"); //搜集原文
urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm"); if(dataMap.containsKey(url)){
urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx"); Map<String,Object> data = dataMap.get(url);
urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1"); String content = cb.getContent() + "";
urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1"); data.put("content", content);
urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18"); dataMap.put(url, data);
urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1"); }
urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
urlList.add("https://www.toutiao.com/i6573922350037729796/");
urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
urlList.add("https://www.toutiao.com/a6573774143949373956/");
List<ContentBean> da = ContentMatch.getContentMatch(urlList);
for(ContentBean sfb : da) {
System.out.println(sfb.toString());
} }
return dataMap;
}
public static List<ContentBean> getContentMatch(List<String> urlList){
//启动获取链接正文
List<ContentBean> dataList = ContentMatchCrawlerThread.getContentMatch(urlList);
return dataList;
} }
static class ContentMatchCrawlerThread extends Thread{ static class ContentMatchCrawlerThread extends Thread{
......
...@@ -21,18 +21,9 @@ public class MediaSelfSource { ...@@ -21,18 +21,9 @@ public class MediaSelfSource {
return list; return list;
} }
public static void main(String[] args) {
List<String> urlList = new ArrayList<>();
urlList.add("https://baijiahao.baidu.com/s?id=1606950814338460255&wfr=spider&for=pc&qq-pf-to=pcqq.c2c");
List<MediaSelfSourceBean> da = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean mssb : da) {
System.out.println(mssb.toString());
}
}
static class MediaSelfSourceCrawlerThread extends Thread{ static class MediaSelfSourceCrawlerThread extends Thread{
private static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){ static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
try{ try{
MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler(); MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>()); List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
......
...@@ -2,14 +2,19 @@ package com.zhiwei.source_forward.run; ...@@ -2,14 +2,19 @@ package com.zhiwei.source_forward.run;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.SourceForwardBean; import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution; import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler; import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.run.MediaSelfSource.MediaSelfSourceCrawlerThread;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack; import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/** /**
...@@ -22,32 +27,157 @@ public class SourceForward { ...@@ -22,32 +27,157 @@ public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class); private static Logger logger = LogManager.getLogger(SourceForward.class);
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体号名称
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getMediaSelfSource(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean msfb : sourceForwardList){
String url = msfb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
data.put("自媒体号", msfb.getMediaself());
data.put("频道", msfb.getChannel());
dataMap.put(url, data);
}
}
return dataMap;
}
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param urlList
* @param @return 设定文件
* @return Map<String,String> 返回类型
*/
public static Map<String,String> getMediaSelfSource(List<String> urlList){
//启动验证来源程序
Map<String,String> dataMap = new HashMap<>();
for(String url : urlList){
dataMap.put(url, null);
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean mssb : sourceForwardList){
String url = mssb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
dataMap.put(url, mssb.getMediaself());
}
}
return dataMap;
}
/**
*
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param url
* @param @return 设定文件
* @return String 返回类型
*/
public static String getMediaSelfSource(String url){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
urlList.add(url);
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean sourceMap : sourceForwardList){
return sourceMap.getMediaself();
}
return null;
}
/**
* @Title: getSourceForward
* @author hero
* @Description: 验证文章是否转发
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
for(SourceForwardBean sfb : dataList){
String url = sfb.getUrl();
String root_source = sfb.getRoot_source();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
String source = data.get("来源")+"";
String isForward = "转发";
if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
isForward = "原创";
}
if(url.contains("mp.weixin.qq.com")){
isForward = sfb.getIsforward();
}else{
data.put("原来源", root_source);
data.put("频道", sfb.getChannel());
}
data.put("是否转发", isForward);
dataMap.put(url, data);
}
}
return dataMap;
}
/**
*
* @Description 批量传入链接获取数据
* @param urlList
* @return
*/
public static List<SourceForwardBean> getSourceForward(List<String> urlList){ public static List<SourceForwardBean> getSourceForward(List<String> urlList){
//启动获取链接来源 //启动获取链接来源
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList); List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
return dataList; return dataList;
} }
public static void main(String[] args) { // public static void main(String[] args) {
List<String> urlList = new ArrayList<>(); // List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1"); // urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
urlList.add("http://news.ctocio.com.cn/383/14543883.shtml"); // urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm"); // urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx"); // urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1"); // urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1"); // urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18"); // urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1"); // urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm"); // urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml"); // urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
urlList.add("https://www.toutiao.com/i6573922350037729796/"); // urlList.add("https://www.toutiao.com/i6573922350037729796/");
urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml"); // urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
urlList.add("https://www.toutiao.com/a6573774143949373956/"); // urlList.add("https://www.toutiao.com/a6573774143949373956/");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList); // List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) { // for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString()); // System.out.println(sfb.toString());
} // }
} // }
static class SourceForwardCrawlerThread extends Thread{ static class SourceForwardCrawlerThread extends Thread{
......
...@@ -38,12 +38,6 @@ public class URLLive { ...@@ -38,12 +38,6 @@ public class URLLive {
List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList); List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
for(UrlLiveBean ub : dataList){ for(UrlLiveBean ub : dataList){
String url = ub.getUrl(); String url = ub.getUrl();
if(!url.contains("http")){
url = "http://"+url;
}
if(!url.contains("www")){
url = url.replace("://", "://www.");
}
boolean live = ub.isLive(); boolean live = ub.isLive();
if(dataMap.containsKey(url)){ if(dataMap.containsKey(url)){
Map<String,Object> map = dataMap.get(url); Map<String,Object> map = dataMap.get(url);
......
...@@ -5,6 +5,7 @@ import java.net.Proxy; ...@@ -5,6 +5,7 @@ import java.net.Proxy;
import com.zhiwei.proxy.common.Definition.GroupType; import com.zhiwei.proxy.common.Definition.GroupType;
import com.zhiwei.proxy.core.ProxyClient; import com.zhiwei.proxy.core.ProxyClient;
import com.zhiwei.proxy.core.ProxyClientFactory; import com.zhiwei.proxy.core.ProxyClientFactory;
import com.zhiwei.source_forward.config.ProxyConfig;
public class ProxyClientUtil { public class ProxyClientUtil {
...@@ -27,7 +28,7 @@ public class ProxyClientUtil { ...@@ -27,7 +28,7 @@ public class ProxyClientUtil {
if(client==null) { if(client==null) {
synchronized (ProxyClientUtil.class) { synchronized (ProxyClientUtil.class) {
if(client==null) { if(client==null) {
client = ProxyClientFactory.build("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER); client = ProxyClientFactory.build(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER);
} }
} }
} }
......
#registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#group=hangzhou
##########################测试地址##############################
registry=zookeeper://192.168.0.36:2181
group=local
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment