Commit 0abfbd4a by zhiwei

添加自媒体匹配

parent 4e02a60f
This source diff could not be displayed because it is too large. You can view the blob instead.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId>
<version>0.2.1-SNAPSHOT</version>
<name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<developers>
<developer>
<id>Bewilder</id>
<name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.1.3-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.5.5.6-SNAPSHOT</version>
</dependency>
</dependencies>
<!-- 打包管理 -->
<build>
<plugins>
<!-- 发布源码 -->
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
</plugin>
<!-- 解决maven test命令时console出现中文乱码乱码 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine>
</configuration>
</plugin>
</plugins>
</build>
<!-- 分发管理:管理distribution和supporting files -->
<distributionManagement>
<snapshotRepository>
<id>nexus-releases</id>
<name>User Porject Snapshot</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url>
<uniqueVersion>true</uniqueVersion>
</snapshotRepository>
<repository>
<id>nexus-releases</id>
<name>User Porject Release</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url>
</repository>
</distributionManagement>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId>
<version>0.2.2-SNAPSHOT</version>
<name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<developers>
<developer>
<id>Bewilder</id>
<name>zhiwei zhang</name>
<email>zhangzhiwei@zhiweidata.com</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.1.6-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.1.0-SNAPSHOT</version>
</dependency>
</dependencies>
<!-- 打包管理 -->
<build>
<plugins>
<!-- 发布源码 -->
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.10.4</version>
</plugin>
<!-- 解决maven test命令时console出现中文乱码乱码 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<forkMode>once</forkMode>
<argLine>-Dfile.encoding=UTF-8</argLine>
</configuration>
</plugin>
</plugins>
</build>
<!-- 分发管理:管理distribution和supporting files -->
<distributionManagement>
<snapshotRepository>
<id>nexus-releases</id>
<name>User Porject Snapshot</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url>
<uniqueVersion>true</uniqueVersion>
</snapshotRepository>
<repository>
<id>nexus-releases</id>
<name>User Porject Release</name>
<url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url>
</repository>
</distributionManagement>
</project>
\ No newline at end of file
package com.zhiwei.source_forward.config;
import java.io.InputStream;
import java.util.Properties;
public class ProxyConfig {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("proxyip.properties");
conf = new Properties();
conf.load(is);
is.close();
registry = conf.getProperty("registry");
group = conf.getProperty("group");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String registry;
public static String group;
}
package com.zhiwei.source_forward.config;
import java.io.InputStream;
import java.util.Properties;
public class ProxyConfig {
static {
Properties conf = null;
try {
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("proxyip.properties");
conf = new Properties();
conf.load(is);
is.close();
registry = conf.getProperty("registry");
proxyid = Long.valueOf(conf.getProperty("proxyid"));
group = conf.getProperty("group");
} catch (Exception e) {
e.printStackTrace();
}
}
public static String registry;
public static Long proxyid;
public static String group;
}
package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack;
/**
*
* @ClassName: MediaSelfSource
* @Description: 自媒体号匹配
* @author 0xff
* @date 2019年12月5日 下午4:05:08
*/
public class MediaSelfSource {
private static Logger logger = LogManager.getLogger(MediaSelfSource.class);
public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) {
return MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
}
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER, 10000002L);
List<String> urlList = new ArrayList<>();
urlList.add("https://wap.peopleapp.com/article/rmh12074926/0");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) {
System.out.println(b.toString());
}
}
static class MediaSelfSourceCrawlerThread extends Thread{
static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
try{
MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
MediaSelfSourceDataCallBack callback = new MediaSelfSourceDataCallBack() {
@Override
public void onData(MediaSelfSourceBean data, Attribution attr) {
list.add(data);
logger.info("列表大小:::{}",list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 网媒自媒体号 判断 {} ",e);
}
return list;
}
}
}
package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack;
/**
*
* @ClassName: MediaSelfSource
* @Description: 自媒体号匹配
* @author 0xff
* @date 2019年12月5日 下午4:05:08
*/
public class MediaSelfSource {
private static Logger logger = LogManager.getLogger(MediaSelfSource.class);
public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) {
return MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
}
public static void main(String[] args) {
ProxyInit.initProxy();
List<String> urlList = new ArrayList<>();
urlList.add("https://www.tuicool.com/articles/nIfmu2B");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) {
System.out.println(b.toString());
}
}
static class MediaSelfSourceCrawlerThread extends Thread{
static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
try{
MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
MediaSelfSourceDataCallBack callback = new MediaSelfSourceDataCallBack() {
@Override
public void onData(MediaSelfSourceBean data, Attribution attr) {
list.add(data);
logger.info("列表大小:::{}",list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 网媒自媒体号 判断 {} ",e);
}
return list;
}
}
}
package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/**
* @ClassName: SourceForward
* @Description: 验证文章是否为转发
* @author hero
* @date 2017年12月5日 下午7:03:57
*/
public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class);
/**
* @Title: getSourceForward
* @author hero
* @Description: 验证文章是否转发
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
for(SourceForwardBean sfb : dataList){
String url = sfb.getUrl();
String root_source = sfb.getRoot_source();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
String source = data.get("来源")+"";
String isForward = "转发";
if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
isForward = "原创";
}
if(url.contains("mp.weixin.qq.com")){
isForward = sfb.getIsforward();
}else{
data.put("原来源", root_source);
data.put("频道", sfb.getChannel());
}
data.put("是否转发", isForward);
dataMap.put(url, data);
}
}
return dataMap;
}
/**
*
* @Description 批量传入链接获取数据
* @param urlList
* @return
*/
public static List<SourceForwardBean> getSourceForward(List<String> urlList){
//启动获取链接来源
return SourceForwardCrawlerThread.getSourceForward(urlList);
}
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER, 10000002);
List<String> urlList = new ArrayList<>();
urlList.add("http://software.it168.com/a2019/0621/6005/000006005693.shtml");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString());
}
}
static class SourceForwardCrawlerThread extends Thread{
private static List<SourceForwardBean> getSourceForward(List<String> urlList){
List<SourceForwardBean> list = Collections.synchronizedList(new ArrayList<SourceForwardBean>());
try{
SourceForwardCrawler crawler = new SourceForwardCrawler();
SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {
@Override
public void onData(SourceForwardBean data, Attribution attr) {
list.add(data);
logger.info("列表大小:::{}",list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 来源判断 出错 {} ",e);
}
return list;
}
}
}
package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.SourceForwardBean;
import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
/**
* @ClassName: SourceForward
* @Description: 验证文章是否为转发
* @author hero
* @date 2017年12月5日 下午7:03:57
*/
public class SourceForward {
private static Logger logger = LogManager.getLogger(SourceForward.class);
/**
* @Title: getSourceForward
* @author hero
* @Description: 验证文章是否转发
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
//启动验证来源程序
List<String> urlList = new ArrayList<>();
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
for(SourceForwardBean sfb : dataList){
String url = sfb.getUrl();
String root_source = sfb.getRoot_source();
//整合数据及验证转发原创
if(dataMap.containsKey(url)){
Map<String,Object> data = dataMap.get(url);
String source = data.get("来源")+"";
String isForward = "转发";
if(root_source == null || root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
isForward = "原创";
}
if(url.contains("mp.weixin.qq.com")){
isForward = sfb.getIsforward();
}else{
data.put("原来源", root_source);
data.put("频道", sfb.getChannel());
}
data.put("是否转发", isForward);
dataMap.put(url, data);
}
}
return dataMap;
}
/**
*
* @Description 批量传入链接获取数据
* @param urlList
* @return
*/
public static List<SourceForwardBean> getSourceForward(List<String> urlList){
//启动获取链接来源
return SourceForwardCrawlerThread.getSourceForward(urlList);
}
public static void main(String[] args) {
ProxyInit.initProxy();
List<String> urlList = new ArrayList<>();
urlList.add("http://software.it168.com/a2019/0621/6005/000006005693.shtml");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString());
}
}
static class SourceForwardCrawlerThread extends Thread{
private static List<SourceForwardBean> getSourceForward(List<String> urlList){
List<SourceForwardBean> list = Collections.synchronizedList(new ArrayList<SourceForwardBean>());
try{
SourceForwardCrawler crawler = new SourceForwardCrawler();
SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {
@Override
public void onData(SourceForwardBean data, Attribution attr) {
list.add(data);
logger.info("列表大小:::{}",list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 来源判断 出错 {} ",e);
}
return list;
}
}
}
package com.zhiwei.source_forward.run;
import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.async.TaskBoot;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.source_forward.bean.UrlLiveBean;
import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.source_forward.crawler.UrlLiveCrawler;
import com.zhiwei.source_forward.crawler.UrlLiveCrawlerNew;
import com.zhiwei.source_forward.util.UrlLiveDataCallback;
import okhttp3.Request;
import okhttp3.Response;
/**
* @ClassName: URLLive
* @Description: 验证链接是否已删除
* @author hero
* @date 2017年12月6日 上午9:22:49
*/
public class URLLive {
private static Logger logger = LogManager.getLogger(URLLive.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().build();
/**
* @Title: verificationURLLive
* @author hero
* @Description: 验证数据是否已删除
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){
List<String> urlList = new ArrayList<>();
//启动验证链接是否有效程序程序
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
System.out.println(urlList.size());
//验证数据是否已删除
List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
for(UrlLiveBean ub : dataList){
String url = ub.getUrl();
int i = ub.isLive();
if(dataMap.containsKey(url)){
Map<String,Object> map = dataMap.get(url);
if(i == 1) {
map.put("是否删除", true);
}else if(i == 0) {
map.put("是否删除", false);
}
map.put("title", ub.getTitle());
dataMap.put(url, map);
}
}
return dataMap;
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param urlList
* @return UrlLiveBean 1 已删除 2 未删除 -1 访问失败
*/
public static List<UrlLiveBean> verificationURLLive(List<String> urlList){
//启动验证链接是否有效程序程序
return UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
}
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER, 10000002);
List<String> urlList = new ArrayList<>();
urlList.add("http://a.mp.uc.cn/article.html?uc_param_str=frdnsnpfvecpntnwprdssskt#!wm_aid=038b8207b444418c845f43e4d2d3a754");
List<UrlLiveBean> u = URLLive.verificationURLLive(urlList);
for(UrlLiveBean b : u) {
System.out.println(b.toString());
}
}
static class UrlLiveCrawlerThread extends Thread{
private static List<UrlLiveBean> getUrlLiveCrawle(List<String> urlList){
List<UrlLiveBean> list = Collections.synchronizedList(new ArrayList<UrlLiveBean>());
try{
UrlLiveCrawler crawler = new UrlLiveCrawler();
UrlLiveDataCallback callback = new UrlLiveDataCallback() {
@Override
public void onData(UrlLiveBean data, Attribution attr) {
list.add(data);
System.out.println("列表大小:::"+list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 数据采集运行有问题 {} ", e);
}
return list;
}
}
}
package com.zhiwei.source_forward.run;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.zhiwei.source_forward.util.ProxyInit;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.source_forward.bean.UrlLiveBean;
import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.source_forward.crawler.UrlLiveCrawler;
import com.zhiwei.source_forward.util.UrlLiveDataCallback;
/**
* @ClassName: URLLive
* @Description: 验证链接是否已删除
* @author hero
* @date 2017年12月6日 上午9:22:49
*/
public class URLLive {
private static Logger logger = LogManager.getLogger(URLLive.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().build();
/**
* @Title: verificationURLLive
* @author hero
* @Description: 验证数据是否已删除
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){
List<String> urlList = new ArrayList<>();
//启动验证链接是否有效程序程序
for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
urlList.add(entry.getKey());
}
System.out.println(urlList.size());
//验证数据是否已删除
List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
for(UrlLiveBean ub : dataList){
String url = ub.getUrl();
int i = ub.isLive();
if(dataMap.containsKey(url)){
Map<String,Object> map = dataMap.get(url);
if(i == 1) {
map.put("是否删除", true);
}else if(i == 0) {
map.put("是否删除", false);
}
map.put("title", ub.getTitle());
dataMap.put(url, map);
}
}
return dataMap;
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param urlList
* @return UrlLiveBean 1 已删除 2 未删除 -1 访问失败
*/
public static List<UrlLiveBean> verificationURLLive(List<String> urlList){
//启动验证链接是否有效程序程序
return UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
}
public static void main(String[] args) {
ProxyInit.initProxy();
List<String> urlList = new ArrayList<>();
urlList.add("http://a.mp.uc.cn/article.html?uc_param_str=frdnsnpfvecpntnwprdssskt#!wm_aid=038b8207b444418c845f43e4d2d3a754");
List<UrlLiveBean> u = URLLive.verificationURLLive(urlList);
for(UrlLiveBean b : u) {
System.out.println(b.toString());
}
}
static class UrlLiveCrawlerThread extends Thread{
private static List<UrlLiveBean> getUrlLiveCrawle(List<String> urlList){
List<UrlLiveBean> list = Collections.synchronizedList(new ArrayList<UrlLiveBean>());
try{
UrlLiveCrawler crawler = new UrlLiveCrawler();
UrlLiveDataCallback callback = new UrlLiveDataCallback() {
@Override
public void onData(UrlLiveBean data, Attribution attr) {
list.add(data);
System.out.println("列表大小:::"+list.size());
}
};
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){
logger.error(" 数据采集运行有问题 {} ", e);
}
return list;
}
}
}
package com.zhiwei.source_forward.util;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.source_forward.config.ProxyConfig;
/**
* 初始化代理
* @author xMx
* @date 2020年1月6日 上午9:29:04
*/
public class ProxyInit {
/**
* 初始化代理
* void
*/
public static void initProxy() {
String address = ProxyConfig.registry;
String appName = "xumiaoxin";
long appId = ProxyConfig.proxyid;
ProxyFactory.init(SimpleConfig.builder().registry(address).appName(appName).appId(appId).group(ProxyConfig.group).build());
}
}
#registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#group=hangzhou
##########################测试地址##############################
registry=zookeeper://192.168.0.36:2181
#registry=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
#group=hangzhou
##########################测试地址##############################
registry=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
proxyid=10000002
group=local
\ No newline at end of file
//package com.zhiwei.source_forward.sourceforward.test;
//
//import java.util.HashMap;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.source_forward.run.SourceForward;
//
///**
// * @ClassName: SourceForwardTest
// * @Description: 来源验证
// * @author hero
// * @date 2017年12月6日 上午9:55:13
// */
//public class MediaSelfSourceTest {
//
// @Test
// public void sourceForwardTest(){
// Map<String,Map<String,Object>> dataMap = new HashMap<String,Map<String,Object>>();
// String url = "https://www.toutiao.com/a6549872248428167687/";
// Map<String,Object> data = new HashMap<String,Object>();
// dataMap.put(url, data);
//
// SourceForward.getMediaSelfSource(dataMap);
//
// }
//
//
//
//
//
//
//
//
//}
//package com.zhiwei.source_forward.sourceforward.test;
//
//import java.util.HashMap;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.source_forward.run.SourceForward;
//
///**
// * @ClassName: SourceForwardTest
// * @Description: 来源验证
// * @author hero
// * @date 2017年12月6日 上午9:55:13
// */
//public class MediaSelfSourceTest {
//
// @Test
// public void sourceForwardTest(){
// Map<String,Map<String,Object>> dataMap = new HashMap<String,Map<String,Object>>();
// String url = "https://www.toutiao.com/a6549872248428167687/";
// Map<String,Object> data = new HashMap<String,Object>();
// dataMap.put(url, data);
//
// SourceForward.getMediaSelfSource(dataMap);
//
// }
//
//
//
//
//
//
//
//
//}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment