Commit 0abfbd4a by zhiwei

添加自媒体匹配

parent 4e02a60f
This source diff could not be displayed because it is too large. You can view the blob instead.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId> <artifactId>source-forward</artifactId>
<version>0.2.1-SNAPSHOT</version> <version>0.2.2-SNAPSHOT</version>
<name>source-forward</name> <name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description> <description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties> </properties>
<developers> <developers>
<developer> <developer>
<id>Bewilder</id> <id>Bewilder</id>
<name>zhiwei zhang</name> <name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email> <email>zhangzhiwei@zhiweidata.com</email>
</developer> </developer>
</developers> </developers>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.3-SNAPSHOT</version> <version>0.1.6-SNAPSHOT</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.5.6-SNAPSHOT</version> <version>0.6.1.0-SNAPSHOT</version>
</dependency> </dependency>
</dependencies> </dependencies>
<!-- 打包管理 --> <!-- 打包管理 -->
<build> <build>
<plugins> <plugins>
<!-- 发布源码 --> <!-- 发布源码 -->
<plugin> <plugin>
<artifactId>maven-source-plugin</artifactId> <artifactId>maven-source-plugin</artifactId>
<version>2.4</version> <version>2.4</version>
<configuration> <configuration>
<attach>true</attach> <attach>true</attach>
</configuration> </configuration>
<executions> <executions>
<execution> <execution>
<phase>compile</phase> <phase>compile</phase>
<goals> <goals>
<goal>jar</goal> <goal>jar</goal>
</goals> </goals>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId> <artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version> <version>2.10.4</version>
</plugin> </plugin>
<!-- 解决maven test命令时console出现中文乱码乱码 --> <!-- 解决maven test命令时console出现中文乱码乱码 -->
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId> <artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version> <version>2.19.1</version>
<configuration> <configuration>
<forkMode>once</forkMode> <forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine> <argLine>-Dfile.encoding=UTF-8</argLine>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
<!-- 分发管理:管理distribution和supporting files --> <!-- 分发管理:管理distribution和supporting files -->
<distributionManagement> <distributionManagement>
<snapshotRepository> <snapshotRepository>
<id>nexus-releases</id> <id>nexus-releases</id>
<name>User Porject Snapshot</name> <name>User Porject Snapshot</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url> <url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url>
<uniqueVersion>true</uniqueVersion> <uniqueVersion>true</uniqueVersion>
</snapshotRepository> </snapshotRepository>
<repository> <repository>
<id>nexus-releases</id> <id>nexus-releases</id>
<name>User Porject Release</name> <name>User Porject Release</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url> <url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url>
</repository> </repository>
</distributionManagement> </distributionManagement>
</project> </project>
\ No newline at end of file
package com.zhiwei.source_forward.config; package com.zhiwei.source_forward.config;
import java.io.InputStream; import java.io.InputStream;
import java.util.Properties; import java.util.Properties;
public class ProxyConfig { public class ProxyConfig {
static { static {
Properties conf = null; Properties conf = null;
try { try {
InputStream is = Thread.currentThread().getContextClassLoader() InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("proxyip.properties"); .getResourceAsStream("proxyip.properties");
conf = new Properties(); conf = new Properties();
conf.load(is); conf.load(is);
is.close(); is.close();
registry = conf.getProperty("registry"); registry = conf.getProperty("registry");
group = conf.getProperty("group"); proxyid = Long.valueOf(conf.getProperty("proxyid"));
} catch (Exception e) { group = conf.getProperty("group");
e.printStackTrace();
} } catch (Exception e) {
} e.printStackTrace();
}
}
public static String registry;
public static String group;
public static String registry;
} public static Long proxyid;
public static String group;
}
package com.zhiwei.source_forward.run; package com.zhiwei.source_forward.run;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.logging.log4j.LogManager; import com.zhiwei.proxy.config.SimpleConfig;
import org.apache.logging.log4j.Logger; import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.LogManager;
import com.zhiwei.common.config.GroupType; import org.apache.logging.log4j.Logger;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean; import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution; import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler; import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack; import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack;
/** /**
* *
* @ClassName: MediaSelfSource * @ClassName: MediaSelfSource
* @Description: 自媒体号匹配 * @Description: 自媒体号匹配
* @author 0xff * @author 0xff
* @date 2019年12月5日 下午4:05:08 * @date 2019年12月5日 下午4:05:08
*/ */
public class MediaSelfSource { public class MediaSelfSource {
private static Logger logger = LogManager.getLogger(MediaSelfSource.class); private static Logger logger = LogManager.getLogger(MediaSelfSource.class);
public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) { public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) {
return MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList); return MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
} }
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER, 10000002L); ProxyInit.initProxy();
List<String> urlList = new ArrayList<>();
urlList.add("https://wap.peopleapp.com/article/rmh12074926/0"); List<String> urlList = new ArrayList<>();
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList); urlList.add("https://www.tuicool.com/articles/nIfmu2B");
for(MediaSelfSourceBean b : u) { List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
System.out.println(b.toString()); for(MediaSelfSourceBean b : u) {
} System.out.println(b.toString());
} }
}
static class MediaSelfSourceCrawlerThread extends Thread{
static class MediaSelfSourceCrawlerThread extends Thread{
static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>()); static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
try{ List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler(); try{
MediaSelfSourceDataCallBack callback = new MediaSelfSourceDataCallBack() { MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
MediaSelfSourceDataCallBack callback = new MediaSelfSourceDataCallBack() {
@Override
public void onData(MediaSelfSourceBean data, Attribution attr) { @Override
list.add(data); public void onData(MediaSelfSourceBean data, Attribution attr) {
logger.info("列表大小:::{}",list.size()); list.add(data);
} logger.info("列表大小:::{}",list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await(); };
}catch (Exception e){ crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
logger.error(" 网媒自媒体号 判断 {} ",e); }catch (Exception e){
} logger.error(" 网媒自媒体号 判断 {} ",e);
return list; }
} return list;
} }
}
}
}
package com.zhiwei.source_forward.run; package com.zhiwei.source_forward.run;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager; import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean; import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution; import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler; import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/**
/** * @ClassName: SourceForward
* @ClassName: SourceForward * @Description: 验证文章是否为转发
* @Description: 验证文章是否为转发 * @author hero
* @author hero * @date 2017年12月5日 下午7:03:57
* @date 2017年12月5日 下午7:03:57 */
*/ public class SourceForward {
public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class);
private static Logger logger = LogManager.getLogger(SourceForward.class);
/**
/** * @Title: getSourceForward
* @Title: getSourceForward * @author hero
* @author hero * @Description: 验证文章是否转发
* @Description: 验证文章是否转发 * @param @param dataMap
* @param @param dataMap * @param @return 设定文件
* @param @return 设定文件 * @return Map<String,Map<String,Object>> 返回类型
* @return Map<String,Map<String,Object>> 返回类型 */
*/ public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){ //启动验证来源程序
//启动验证来源程序 List<String> urlList = new ArrayList<>();
List<String> urlList = new ArrayList<>(); for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){ urlList.add(entry.getKey());
urlList.add(entry.getKey()); }
} List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList); for(SourceForwardBean sfb : dataList){
for(SourceForwardBean sfb : dataList){ String url = sfb.getUrl();
String url = sfb.getUrl(); String root_source = sfb.getRoot_source();
String root_source = sfb.getRoot_source(); //整合数据及验证转发原创
//整合数据及验证转发原创 if(dataMap.containsKey(url)){
if(dataMap.containsKey(url)){ Map<String,Object> data = dataMap.get(url);
Map<String,Object> data = dataMap.get(url); String source = data.get("来源")+"";
String source = data.get("来源")+""; String isForward = "转发";
String isForward = "转发"; if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){ isForward = "原创";
isForward = "原创"; }
}
if(url.contains("mp.weixin.qq.com")){
if(url.contains("mp.weixin.qq.com")){ isForward = sfb.getIsforward();
isForward = sfb.getIsforward(); }else{
}else{ data.put("原来源", root_source);
data.put("原来源", root_source); data.put("频道", sfb.getChannel());
data.put("频道", sfb.getChannel()); }
}
data.put("是否转发", isForward);
data.put("是否转发", isForward); dataMap.put(url, data);
dataMap.put(url, data); }
} }
} return dataMap;
return dataMap; }
}
/**
/** *
* * @Description 批量传入链接获取数据
* @Description 批量传入链接获取数据 * @param urlList
* @param urlList * @return
* @return */
*/ public static List<SourceForwardBean> getSourceForward(List<String> urlList){
public static List<SourceForwardBean> getSourceForward(List<String> urlList){ //启动获取链接来源
//启动获取链接来源 return SourceForwardCrawlerThread.getSourceForward(urlList);
return SourceForwardCrawlerThread.getSourceForward(urlList); }
}
public static void main(String[] args) {
public static void main(String[] args) { ProxyInit.initProxy();
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER, 10000002); List<String> urlList = new ArrayList<>();
List<String> urlList = new ArrayList<>(); urlList.add("http://software.it168.com/a2019/0621/6005/000006005693.shtml");
urlList.add("http://software.it168.com/a2019/0621/6005/000006005693.shtml"); List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList); for(SourceForwardBean sfb : da) {
for(SourceForwardBean sfb : da) { System.out.println(sfb.toString());
System.out.println(sfb.toString()); }
} }
}
static class SourceForwardCrawlerThread extends Thread{
static class SourceForwardCrawlerThread extends Thread{
private static List<SourceForwardBean> getSourceForward(List<String> urlList){
private static List<SourceForwardBean> getSourceForward(List<String> urlList){ List<SourceForwardBean> list = Collections.synchronizedList(new ArrayList<SourceForwardBean>());
List<SourceForwardBean> list = Collections.synchronizedList(new ArrayList<SourceForwardBean>()); try{
try{ SourceForwardCrawler crawler = new SourceForwardCrawler();
SourceForwardCrawler crawler = new SourceForwardCrawler(); SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {
SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {
@Override
@Override public void onData(SourceForwardBean data, Attribution attr) {
public void onData(SourceForwardBean data, Attribution attr) { list.add(data);
list.add(data); logger.info("列表大小:::{}",list.size());
logger.info("列表大小:::{}",list.size()); }
}
};
}; crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await(); }catch (Exception e){
}catch (Exception e){ logger.error(" 来源判断 出错 {} ",e);
logger.error(" 来源判断 出错 {} ",e); }
} return list;
return list; }
} }
}
}
}
package com.zhiwei.source_forward.run; package com.zhiwei.source_forward.run;
import java.io.IOException; import java.util.ArrayList;
import java.net.Proxy; import java.util.Collections;
import java.util.ArrayList; import java.util.List;
import java.util.Collections; import java.util.Map;
import java.util.List; import java.util.Map.Entry;
import java.util.Map;
import java.util.Map.Entry; import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.Logger;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.async.TaskBoot; import com.zhiwei.source_forward.bean.UrlLiveBean;
import com.zhiwei.common.config.GroupType; import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.source_forward.crawler.UrlLiveCrawler;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.source_forward.util.UrlLiveDataCallback;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; /**
import com.zhiwei.source_forward.bean.UrlLiveBean; * @ClassName: URLLive
import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution; * @Description: 验证链接是否已删除
import com.zhiwei.source_forward.crawler.UrlLiveCrawler; * @author hero
import com.zhiwei.source_forward.crawler.UrlLiveCrawlerNew; * @date 2017年12月6日 上午9:22:49
import com.zhiwei.source_forward.util.UrlLiveDataCallback; */
public class URLLive {
import okhttp3.Request;
import okhttp3.Response; private static Logger logger = LogManager.getLogger(URLLive.class);
/** private static HttpBoot httpBoot = new HttpBoot.Builder().build();
* @ClassName: URLLive
* @Description: 验证链接是否已删除 /**
* @author hero * @Title: verificationURLLive
* @date 2017年12月6日 上午9:22:49 * @author hero
*/ * @Description: 验证数据是否已删除
public class URLLive { * @param @param dataMap
* @param @return 设定文件
private static Logger logger = LogManager.getLogger(URLLive.class); * @return Map<String,Map<String,Object>> 返回类型
*/
private static HttpBoot httpBoot = new HttpBoot.Builder().build(); public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){
List<String> urlList = new ArrayList<>();
/** //启动验证链接是否有效程序程序
* @Title: verificationURLLive for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
* @author hero urlList.add(entry.getKey());
* @Description: 验证数据是否已删除 }
* @param @param dataMap System.out.println(urlList.size());
* @param @return 设定文件 //验证数据是否已删除
* @return Map<String,Map<String,Object>> 返回类型 List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
*/ for(UrlLiveBean ub : dataList){
public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){ String url = ub.getUrl();
List<String> urlList = new ArrayList<>(); int i = ub.isLive();
//启动验证链接是否有效程序程序 if(dataMap.containsKey(url)){
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){ Map<String,Object> map = dataMap.get(url);
urlList.add(entry.getKey()); if(i == 1) {
} map.put("是否删除", true);
System.out.println(urlList.size()); }else if(i == 0) {
//验证数据是否已删除 map.put("是否删除", false);
List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList); }
for(UrlLiveBean ub : dataList){ map.put("title", ub.getTitle());
String url = ub.getUrl(); dataMap.put(url, map);
int i = ub.isLive(); }
if(dataMap.containsKey(url)){ }
Map<String,Object> map = dataMap.get(url); return dataMap;
if(i == 1) { }
map.put("是否删除", true);
}else if(i == 0) { /**
map.put("是否删除", false); *
} * @Description (TODO这里用一句话描述这个方法的作用)
map.put("title", ub.getTitle()); * @param urlList
dataMap.put(url, map); * @return UrlLiveBean 1 已删除 2 未删除 -1 访问失败
} */
} public static List<UrlLiveBean> verificationURLLive(List<String> urlList){
return dataMap; //启动验证链接是否有效程序程序
} return UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
}
/**
* public static void main(String[] args) {
* @Description (TODO这里用一句话描述这个方法的作用) ProxyInit.initProxy();
* @param urlList List<String> urlList = new ArrayList<>();
* @return UrlLiveBean 1 已删除 2 未删除 -1 访问失败 urlList.add("http://a.mp.uc.cn/article.html?uc_param_str=frdnsnpfvecpntnwprdssskt#!wm_aid=038b8207b444418c845f43e4d2d3a754");
*/
public static List<UrlLiveBean> verificationURLLive(List<String> urlList){ List<UrlLiveBean> u = URLLive.verificationURLLive(urlList);
//启动验证链接是否有效程序程序 for(UrlLiveBean b : u) {
return UrlLiveCrawlerThread.getUrlLiveCrawle(urlList); System.out.println(b.toString());
} }
}
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER, 10000002); static class UrlLiveCrawlerThread extends Thread{
List<String> urlList = new ArrayList<>();
urlList.add("http://a.mp.uc.cn/article.html?uc_param_str=frdnsnpfvecpntnwprdssskt#!wm_aid=038b8207b444418c845f43e4d2d3a754"); private static List<UrlLiveBean> getUrlLiveCrawle(List<String> urlList){
List<UrlLiveBean> list = Collections.synchronizedList(new ArrayList<UrlLiveBean>());
List<UrlLiveBean> u = URLLive.verificationURLLive(urlList); try{
for(UrlLiveBean b : u) { UrlLiveCrawler crawler = new UrlLiveCrawler();
System.out.println(b.toString()); UrlLiveDataCallback callback = new UrlLiveDataCallback() {
}
} @Override
public void onData(UrlLiveBean data, Attribution attr) {
static class UrlLiveCrawlerThread extends Thread{ list.add(data);
System.out.println("列表大小:::"+list.size());
private static List<UrlLiveBean> getUrlLiveCrawle(List<String> urlList){ }
List<UrlLiveBean> list = Collections.synchronizedList(new ArrayList<UrlLiveBean>());
try{ };
UrlLiveCrawler crawler = new UrlLiveCrawler(); crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
UrlLiveDataCallback callback = new UrlLiveDataCallback() { }catch (Exception e){
logger.error(" 数据采集运行有问题 {} ", e);
@Override }
public void onData(UrlLiveBean data, Attribution attr) { return list;
list.add(data); }
System.out.println("列表大小:::"+list.size()); }
}
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 数据采集运行有问题 {} ", e);
}
return list;
}
}
}
package com.zhiwei.source_forward.util;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.source_forward.config.ProxyConfig;
/**
* 初始化代理
* @author xMx
* @date 2020年1月6日 上午9:29:04
*/
public class ProxyInit {
/**
* 初始化代理
* void
*/
public static void initProxy() {
String address = ProxyConfig.registry;
String appName = "xumiaoxin";
long appId = ProxyConfig.proxyid;
ProxyFactory.init(SimpleConfig.builder().registry(address).appName(appName).appId(appId).group(ProxyConfig.group).build());
}
}
#registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181 #registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#group=hangzhou #group=hangzhou
##########################测试地址############################## ##########################测试地址##############################
registry=zookeeper://192.168.0.36:2181 registry=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
proxyid=10000002
group=local group=local
\ No newline at end of file
//package com.zhiwei.source_forward.sourceforward.test; //package com.zhiwei.source_forward.sourceforward.test;
// //
//import java.util.HashMap; //import java.util.HashMap;
//import java.util.Map; //import java.util.Map;
// //
//import org.junit.Test; //import org.junit.Test;
// //
//import com.zhiwei.source_forward.run.SourceForward; //import com.zhiwei.source_forward.run.SourceForward;
// //
///** ///**
// * @ClassName: SourceForwardTest // * @ClassName: SourceForwardTest
// * @Description: 来源验证 // * @Description: 来源验证
// * @author hero // * @author hero
// * @date 2017年12月6日 上午9:55:13 // * @date 2017年12月6日 上午9:55:13
// */ // */
//public class MediaSelfSourceTest { //public class MediaSelfSourceTest {
// //
// @Test // @Test
// public void sourceForwardTest(){ // public void sourceForwardTest(){
// Map<String,Map<String,Object>> dataMap = new HashMap<String,Map<String,Object>>(); // Map<String,Map<String,Object>> dataMap = new HashMap<String,Map<String,Object>>();
// String url = "https://www.toutiao.com/a6549872248428167687/"; // String url = "https://www.toutiao.com/a6549872248428167687/";
// Map<String,Object> data = new HashMap<String,Object>(); // Map<String,Object> data = new HashMap<String,Object>();
// dataMap.put(url, data); // dataMap.put(url, data);
// //
// SourceForward.getMediaSelfSource(dataMap); // SourceForward.getMediaSelfSource(dataMap);
// //
// } // }
// //
// //
// //
// //
// //
// //
// //
// //
//} //}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment