Commit 87e9aaf3 by yangchen

添加代理

parent 19bb2414
......@@ -4,7 +4,7 @@ public class MediaSelfSourceBean {
private String url;
private String source;
private String mediaself;
private String channel;
......@@ -16,14 +16,6 @@ public class MediaSelfSourceBean {
this.url = url;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getChannel() {
return channel;
}
......@@ -36,19 +28,29 @@ public class MediaSelfSourceBean {
super();
}
public MediaSelfSourceBean(String url, String source, String channel) {
@Override
public String toString() {
return "MediaSelfSourceBean [url=" + url + ", mediaself=" + mediaself
+ ", channel=" + channel + "]";
}
public MediaSelfSourceBean(String url, String mediaself, String channel) {
super();
this.url = url;
this.source = source;
this.mediaself = mediaself;
this.channel = channel;
}
@Override
public String toString() {
return "MediaSelfSourceBean [url=" + url + ", source=" + source
+ ", channel=" + channel + "]";
public String getMediaself() {
return mediaself;
}
public void setMediaself(String mediaself) {
this.mediaself = mediaself;
}
/**
......
package com.zhiwei.source_forward.config;
import java.io.InputStream;
import java.util.Properties;
public class ProxyConfig {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("proxyip.properties");
conf = new Properties();
conf.load(is);
is.close();
registry = conf.getProperty("registry");
group = conf.getProperty("group");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String registry;
public static String group;
}
......@@ -3,6 +3,8 @@ package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
......@@ -17,31 +19,40 @@ public class ContentMatch {
private static Logger logger = LogManager.getLogger(ContentMatch.class);
public static List<ContentBean> getContentMatch(List<String> urlList){
//启动获取链接来源
List<ContentBean> dataList = ContentMatchCrawlerThread.getContentMatch(urlList);
return dataList;
/**
* @Title: getContent
* @author hero
* @Description: 获取链接正文
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getContent(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
public static void main(String[] args) {
List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
urlList.add("https://www.toutiao.com/i6573922350037729796/");
urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
urlList.add("https://www.toutiao.com/a6573774143949373956/");
List<ContentBean> da = ContentMatch.getContentMatch(urlList);
for(ContentBean sfb : da) {
System.out.println(sfb.toString());
List<ContentBean> contentList = ContentMatchCrawlerThread.getContentMatch(urlList);
for(ContentBean cb : contentList){
String url = cb.getUrl();
//搜集原文
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
String content = cb.getContent() + "";
data.put("content", content);
dataMap.put(url, data);
}
}
return dataMap;
}
public static List<ContentBean> getContentMatch(List<String> urlList){
//启动获取链接正文
List<ContentBean> dataList = ContentMatchCrawlerThread.getContentMatch(urlList);
return dataList;
}
static class ContentMatchCrawlerThread extends Thread{
......
......@@ -21,18 +21,9 @@ public class MediaSelfSource {
return list;
}
public static void main(String[] args) {
List<String> urlList = new ArrayList<>();
urlList.add("https://baijiahao.baidu.com/s?id=1606950814338460255&wfr=spider&for=pc&qq-pf-to=pcqq.c2c");
List<MediaSelfSourceBean> da = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean mssb : da) {
System.out.println(mssb.toString());
}
}
static class MediaSelfSourceCrawlerThread extends Thread{
private static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
try{
MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
......
......@@ -2,14 +2,19 @@ package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.run.MediaSelfSource.MediaSelfSourceCrawlerThread;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/**
......@@ -22,32 +27,157 @@ public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class);
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体号名称
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getMediaSelfSource(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean msfb : sourceForwardList){
String url = msfb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
data.put("自媒体号", msfb.getMediaself());
data.put("频道", msfb.getChannel());
dataMap.put(url, data);
}
}
return dataMap;
}
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param urlList
* @param @return 设定文件
* @return Map<String,String> 返回类型
*/
public static Map<String,String> getMediaSelfSource(List<String> urlList){
//启动验证来源程序
Map<String,String> dataMap = new HashMap<>();
for(String url : urlList){
dataMap.put(url, null);
}
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean mssb : sourceForwardList){
String url = mssb.getUrl();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
dataMap.put(url, mssb.getMediaself());
}
}
return dataMap;
}
/**
*
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param url
* @param @return 设定文件
* @return String 返回类型
*/
public static String getMediaSelfSource(String url){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
urlList.add(url);
List<MediaSelfSourceBean> sourceForwardList = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
for(MediaSelfSourceBean sourceMap : sourceForwardList){
return sourceMap.getMediaself();
}
return null;
}
/**
* @Title: getSourceForward
* @author hero
* @Description: 验证文章是否转发
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
for(SourceForwardBean sfb : dataList){
String url = sfb.getUrl();
String root_source = sfb.getRoot_source();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
String source = data.get("来源")+"";
String isForward = "转发";
if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
isForward = "原创";
}
if(url.contains("mp.weixin.qq.com")){
isForward = sfb.getIsforward();
}else{
data.put("原来源", root_source);
data.put("频道", sfb.getChannel());
}
data.put("是否转发", isForward);
dataMap.put(url, data);
}
}
return dataMap;
}
/**
*
* @Description 批量传入链接获取数据
* @param urlList
* @return
*/
public static List<SourceForwardBean> getSourceForward(List<String> urlList){
//启动获取链接来源
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
return dataList;
}
public static void main(String[] args) {
List<String> urlList = new ArrayList<>();
urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
urlList.add("https://www.toutiao.com/i6573922350037729796/");
urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
urlList.add("https://www.toutiao.com/a6573774143949373956/");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString());
}
}
// public static void main(String[] args) {
// List<String> urlList = new ArrayList<>();
// urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
// urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
// urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
// urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
// urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
// urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
// urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
// urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
// urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
// urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
// urlList.add("https://www.toutiao.com/i6573922350037729796/");
// urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
// urlList.add("https://www.toutiao.com/a6573774143949373956/");
// List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
// for(SourceForwardBean sfb : da) {
// System.out.println(sfb.toString());
// }
// }
static class SourceForwardCrawlerThread extends Thread{
......
......@@ -38,12 +38,6 @@ public class URLLive {
List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
for(UrlLiveBean ub : dataList){
String url = ub.getUrl();
if(!url.contains("http")){
url = "http://"+url;
}
if(!url.contains("www")){
url = url.replace("://", "://www.");
}
boolean live = ub.isLive();
if(dataMap.containsKey(url)){
Map<String,Object> map = dataMap.get(url);
......
......@@ -5,6 +5,7 @@ import java.net.Proxy;
import com.zhiwei.proxy.common.Definition.GroupType;
import com.zhiwei.proxy.core.ProxyClient;
import com.zhiwei.proxy.core.ProxyClientFactory;
import com.zhiwei.source_forward.config.ProxyConfig;
public class ProxyClientUtil {
......@@ -27,7 +28,7 @@ public class ProxyClientUtil {
if(client==null) {
synchronized (ProxyClientUtil.class) {
if(client==null) {
client = ProxyClientFactory.build("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
client = ProxyClientFactory.build(ProxyConfig.registry, ProxyConfig.group, GroupType.PROVIDER);
}
}
}
......
#registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#group=hangzhou
##########################测试地址##############################
registry=zookeeper://192.168.0.36:2181
group=local
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment