第一次git项目提交测试,来源验证及是否删除验证程序

0f93a339 · zhiwei · 0f93a339 · 0f93a339 · 0f93a339 · 0f93a339
Commit 0f93a339 authored Dec 08, 2017 by zhiwei
14 changed files
--- a/pom.xml
+++ b/pom.xml
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.zhiwei</groupId>
+  <artifactId>source_forward</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  <name>source_forward</name>
+  <description>验证网媒的转发关系及链接的有效性（转发验证微信及自媒体匹配率不高）</description>
+  
+  <properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+		<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+	</properties>
+
+	<developers>
+		<developer>
+			<id>Bewilder</id>
+			<name>zhiwei zhang</name>
+			<email>zhangzhiwei@zhiweidata.com</email>
+		</developer>
+	</developers>
+  
+  <dependencies>
+  	<dependency>
+		    <groupId>cn.edu.hfut.dmic.webcollector</groupId>
+		    <artifactId>WebCollector</artifactId>
+		    <version>2.71</version>
+		</dependency>
+		
+		
+		<dependency>
+			<groupId>us.codecraft</groupId>
+			<artifactId>webmagic-core</artifactId>
+			<version>0.6.1</version>
+		</dependency>
+
+
+		<dependency>
+			<groupId>us.codecraft</groupId>
+			<artifactId>webmagic-extension</artifactId>
+			<version>0.6.1</version>
+			<exclusions>
+				<exclusion>
+					<groupId>org.slf4j</groupId>
+					<artifactId>slf4j-log4j12</artifactId>
+				</exclusion>
+			</exclusions>
+		</dependency>
+
+		<dependency>
+			<groupId>us.codecraft</groupId>
+			<artifactId>webmagic-saxon</artifactId>
+			<version>0.6.1</version>
+		</dependency>
+		<dependency>
+			<groupId>com.zhiwei</groupId>
+			<artifactId>zhiweiTools</artifactId>
+			<version>0.0.6-SNAPSHOT</version>
+		</dependency>
+  </dependencies>
+  
+  <!-- 打包管理 -->
+	<build>
+		<plugins>
+			<!-- 发布源码 -->
+        	<plugin>
+				<artifactId>maven-source-plugin</artifactId>
+				<version>2.4</version>
+				<configuration>
+				<attach>true</attach>
+				</configuration>
+				<executions>
+					<execution>
+						<phase>compile</phase>
+						<goals>
+							<goal>jar</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+   				<groupId>org.apache.maven.plugins</groupId>
+   				<artifactId>maven-javadoc-plugin</artifactId>
+   				<version>2.10.4</version>
+			</plugin>
+
+			<!-- 解决maven test命令时console出现中文乱码乱码 -->
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<version>2.19.1</version>
+				<configuration>
+					<forkMode>once</forkMode>
+					<argLine>-Dfile.encoding=UTF-8</argLine>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+
+	<!-- 分发管理:管理distribution和supporting files -->
+	<distributionManagement>
+		<snapshotRepository>
+			<id>nexus-releases</id>
+			<name>User Porject Snapshot</name>
+			<url>http://192.168.0.30:8081/nexus/content/repositories/snapshots/</url>
+			<uniqueVersion>true</uniqueVersion>
+		</snapshotRepository>
+		<repository>
+			<id>nexus-releases</id>
+			<name>User Porject Release</name>
+			<url>http://192.168.0.30:8081/nexus/content/repositories/releases/</url>
+		</repository>
+	</distributionManagement>
+  
+  
+  
+</project>
\ No newline at end of file
--- a/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
+package com.zhiwei.source_forward.crawler;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.jsoup.nodes.Node;
+import com.zhiwei.source_forward.util.SourceData;
+import com.zhiwei.source_forward.util.TreateData;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+public class SourceForwardPageProcessor implements PageProcessor {
+
+	private static List<String> sourceList = SourceData.getSourceList();
+	
+	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
+			.setTimeOut(10000)
+			.addHeader("User-Agent",
+					"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
+			.addHeader("Accept",
+					"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+			;
+	
+	@Override
+	public Site getSite() {
+		return site;
+	}
+	
+	@Override
+	public void process(Page page) {
+		Map<String,String> data = new HashMap<String,String>();
+		String source = null;
+		String channel = "新闻";
+		try {
+			if(page.getStatusCode()!=404){
+				List<Node> nodeList = page.getHtml().getDocument().head().childNodes();
+				source = TreateData.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList);
+				channel = TreateData.matchChannel(nodeList);
+			}
+		} catch (Exception e) {
+			source = null;
+			channel = "新闻";
+		}
+		System.out.println(page.getUrl().get()+"======="+channel+"================="+source);   
+		data.put("url", page.getUrl().get());
+		data.put("channel", channel);
+		data.put("root_source", source);
+		
+		page.putField("data", data);
+	}
+
+	
+}
--- a/src/main/java/com/zhiwei/source_forward/crawler/UrlLivePageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/UrlLivePageProcessor.java
+package com.zhiwei.source_forward.crawler;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import us.codecraft.webmagic.Page;
+import us.codecraft.webmagic.Site;
+import us.codecraft.webmagic.processor.PageProcessor;
+
+public class UrlLivePageProcessor implements PageProcessor{
+	private static Logger logger = LoggerFactory.getLogger(UrlLivePageProcessor.class);
+	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
+							.setTimeOut(15000)
+							.addHeader("User-Agent",
+									"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
+							.addHeader("Accept",
+									"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+	@Override
+	public void process(Page page) {
+		/***验证网页是否能够连通*/
+		boolean f = true;
+		if(page!=null){
+			if(page.getStatusCode()==200){
+				f = matchDel(page);
+			}else if(page.getStatusCode()==404){
+				f = true;
+			}else{
+				f = false;
+			}
+		}
+		
+		Map<String,Object> data = new HashMap<String,Object>();
+		data.put("url", page.getUrl().get());
+		data.put("live", f);
+		page.putField("data", data);
+	}
+
+	@Override
+	public Site getSite() {
+		return site;
+	}
+	
+	
+	
+	/***
+	 * @Title: matchDel 
+	 * @author hero 
+	 * @Description: 验证链接是否有效
+	 * @param @param page
+	 * @param @return 设定文件 
+	 * @return boolean 返回类型
+	 */
+	public boolean matchDel(Page page){
+		int step = 1;
+		Document doc = page.getHtml().getDocument();
+        if(rulerHead(doc)){
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;
+        if (rulerYaoyan(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;
+        if (rulerWeigui(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;
+        if (rulerTousu(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;
+        if (page.getUrl().get().contains("huanqiu.com"))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return rulerHuanqiuWuxiao(doc);
+        }
+        step++;//7
+        if (rulerBucunzai(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;//8
+        if (rulerKong(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;//9
+        if (rulerZhaoshang(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        step++;//11
+        if (rulerYidian(doc))
+        {
+        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            return true;
+        }
+        return false;
+	}
+	
+	  /**
+     * 
+     * @TODO(TODO 微信谣言的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:54:00
+     * @return boolean
+     */
+    private boolean rulerYaoyan(Document doc)
+    {
+        boolean flg = false;
+        if ("谣言".equals(doc.select(".pic_rumor").text()))
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 微信内容违规的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerWeigui(Document doc)
+    {
+        boolean flg = false;
+        if ("此内容因违规无法查看".equals(doc.select(".text_area > p:nth-child(1)")
+                .text()))
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 微信内容违规的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerTousu(Document doc)
+    {
+        boolean flg = false;
+        if (0 < doc.select("i[class=\"icon_msg warn\"]").size())
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 环球的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerHuanqiuWuxiao(Document doc)
+    {
+        boolean flg = false;
+        if (0 < doc.select("div[class=\"errMsg\"]").size())
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 空的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerKong(Document doc)
+    {
+        boolean flg = false;
+        if (14 > doc.select("body").toString().length()
+                &&
+                14 > doc.select("head").toString().length())
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 内容不存在)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerBucunzai(Document doc)
+    {
+        boolean flg = false;
+        if (doc.text().contains("很抱歉，您访问的页面不存在")||doc.text().contains("该内容已被发布者删除"))
+        {
+            flg = true;
+        }
+        return flg;
+    }
+
+    /**
+     * 
+     * @TODO(TODO 招商网的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerZhaoshang(Document doc)
+    {
+        boolean flg = false;
+        try
+        {
+            if ("<a href=\"\"> </a>".equals(doc.select("div[class=\"paths\"]")
+                    .first().child(2).toString()))
+            {
+                flg = true;
+            }
+        }
+        catch (Exception e)
+        {
+            // TODO: handle exception
+        }
+
+        return flg;
+    }
+
+
+    /**
+     * 
+     * @TODO(TODO 一点资讯的无效网址筛选规则)
+     * @author 陈炜涛
+     * @param doc
+     * @return
+     * @time 2016年6月3日上午9:59:54
+     * @return boolean
+     */
+    private boolean rulerYidian(Document doc)
+    {
+        boolean flg = false;
+        try
+        {
+            if (doc.select("div[class=\"content\"]").text().contains("文章没有找到"))
+            {
+                flg = true;
+            }
+        }
+        catch (Exception e)
+        {
+            // TODO: handle exception
+        }
+        return flg;
+    }
+	
+	/**
+	 * @Title: rulerHead 
+	 * @author hero 
+	 * @Description: 验证链接头部
+	 * @param @param doc
+	 * @param @return 设定文件 
+	 * @return boolean 返回类型
+	 */
+    private boolean rulerHead(Document doc)
+    {
+    	List<Node> nodeList = doc.head().childNodes();
+        try {
+			for (Node node : nodeList) {
+				if (node.outerHtml().contains("<title>")) {
+					String title = node.toString().split("<title>")[1].split("</title>")[0];
+					if(title.contains("404")){
+						return true;
+					}
+				}
+			}
+		} catch (Exception e) {
+			return false;
+		}
+        return false;
+    }
+	
+}
--- a/src/main/java/com/zhiwei/source_forward/downloader/MyDownLoader.java
+++ b/src/main/java/com/zhiwei/source_forward/downloader/MyDownLoader.java
--- a/src/main/java/com/zhiwei/source_forward/pipeline/SourceForwardDataPipeline.java
+++ b/src/main/java/com/zhiwei/source_forward/pipeline/SourceForwardDataPipeline.java
+package com.zhiwei.source_forward.pipeline;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import us.codecraft.webmagic.ResultItems;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.pipeline.Pipeline;
+
+public class SourceForwardDataPipeline implements Pipeline {
+    private List<Map<String, Object>> dataList;
+
+	public SourceForwardDataPipeline(List<Map<String, Object>> dataList) {
+		super();
+		this.dataList = dataList;
+	}
+	
+	public SourceForwardDataPipeline() {
+		super();
+		this.dataList = new ArrayList<>();
+	}
+
+	public List<Map<String, Object>> getDataList() {
+		return dataList;
+	}
+
+	public void setDataList(List<Map<String, Object>> dataList) {
+		this.dataList = dataList;
+	}
+
+	@Override
+	public void process(ResultItems resultItems, Task task) {
+		Map<String, Object> data = resultItems.get("data");
+		if (data != null) {
+			dataList.add(data);
+		}
+	}
+
+}
--- a/src/main/java/com/zhiwei/source_forward/pipeline/UrlLivePipeline.java
+++ b/src/main/java/com/zhiwei/source_forward/pipeline/UrlLivePipeline.java
+package com.zhiwei.source_forward.pipeline;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import us.codecraft.webmagic.ResultItems;
+import us.codecraft.webmagic.Task;
+import us.codecraft.webmagic.pipeline.Pipeline;
+
+public class UrlLivePipeline implements Pipeline{
+	
+	 private List<Map<String, Object>> dataList;
+
+		public UrlLivePipeline(List<Map<String, Object>> dataList) {
+			super();
+			this.dataList = dataList;
+		}
+		
+		public UrlLivePipeline() {
+			super();
+			this.dataList = new ArrayList<>();
+		}
+
+		public List<Map<String, Object>> getDataList() {
+			return dataList;
+		}
+
+		public void setDataList(List<Map<String, Object>> dataList) {
+			this.dataList = dataList;
+		}
+
+		@Override
+		public void process(ResultItems resultItems, Task task) {
+			Map<String, Object> data = resultItems.get("data");
+			if (data != null) {
+				dataList.add(data);
+			}
+		}
+}
--- a/src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+++ b/src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+package com.zhiwei.source_forward.run;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.zhiwei.source_forward.crawler.SourceForwardPageProcessor;
+import com.zhiwei.source_forward.downloader.MyDownLoader;
+import com.zhiwei.source_forward.pipeline.SourceForwardDataPipeline;
+
+import us.codecraft.webmagic.Spider;
+
+/**
+ * @ClassName: SourceForward 
+ * @Description: 验证文章是否为转发 
+ * @author hero 
+ * @date 2017年12月5日 下午7:03:57
+ */
+public class SourceForward {
+	
+	/**
+	 * @Title: getSourceForward 
+	 * @author hero 
+	 * @Description: 验证文章是否转发
+	 * @param @param dataMap
+	 * @param @return 设定文件 
+	 * @return Map<String,Map<String,Object>> 返回类型
+	 */
+	public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
+		//启动验证来源程序
+		SourceForwardDataPipeline pipeline = new SourceForwardDataPipeline();
+		Spider spider = Spider.create(new SourceForwardPageProcessor());
+		for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
+			spider.addUrl(entry.getKey());
+		}
+		spider.setDownloader(new MyDownLoader());
+		spider.addPipeline(pipeline);
+		spider.thread(5).run();
+		
+		List<Map<String,Object>> sourceForwardList = pipeline.getDataList();
+		for(Map<String,Object> sourceMap : sourceForwardList){
+			String url = sourceMap.get("url")+"";
+			String root_source = sourceMap.get("root_source")!=null?sourceMap.get("root_source").toString():null;
+			String channel = sourceMap.get("channel")+"";
+			//整合数据及验证转发原创
+			if(dataMap.containsKey(url)){
+				Map<String,Object> data = dataMap.get(url);
+				String source = data.get("来源")+"";
+				String isForward = "转发";
+				if(root_source == null){
+					isForward = "原创";
+				}else if(root_source.equals(source)){
+					isForward = "原创";
+				}
+				data.put("是否转发", isForward);
+				data.put("原来源", root_source);
+				data.put("频道", channel);
+				
+				dataMap.put(url, data);
+			}
+		}
+		
+		return dataMap;
+	} 
+	
+	
+	
+	
+	
+	
+	
+	
+	
+
+}
--- a/src/main/java/com/zhiwei/source_forward/run/URLLive.java
+++ b/src/main/java/com/zhiwei/source_forward/run/URLLive.java
+package com.zhiwei.source_forward.run;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.zhiwei.source_forward.crawler.UrlLivePageProcessor;
+import com.zhiwei.source_forward.pipeline.UrlLivePipeline;
+
+import us.codecraft.webmagic.Spider;
+
+/**
+ * @ClassName: URLLive 
+ * @Description: 验证链接是否已删除
+ * @author hero 
+ * @date 2017年12月6日 上午9:22:49
+ */
+public class URLLive {
+	
+	
+	/**
+	 * @Title: verificationURLLive 
+	 * @author hero 
+	 * @Description: 验证数据是否已删除
+	 * @param @param dataMap
+	 * @param @return 设定文件 
+	 * @return Map<String,Map<String,Object>> 返回类型
+	 */
+	public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){
+		//启动验证链接是否有效程序程序
+		UrlLivePipeline pipeline = new UrlLivePipeline();
+		Spider spider = Spider.create(new UrlLivePageProcessor());
+		for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
+			spider.addUrl(entry.getKey());
+		}
+		spider.addPipeline(pipeline);
+		spider.thread(5).run();
+		
+		//验证数据是否已删除
+		List<Map<String,Object>> dataList = pipeline.getDataList();
+		for(Map<String,Object> data : dataList){
+			String url = data.get("url")+"";
+			if(!url.contains("http")){
+				url = "http://"+url;
+			}
+			if(!url.contains("www")){
+				url = url.replace("://", "://www.");
+			}
+			boolean live = (boolean)data.get("live");
+			if(dataMap.containsKey(url)){
+				Map<String,Object> map = dataMap.get(url);
+				map.put("是否删除", live);
+				dataMap.put(url, map);
+			}
+		}
+		return dataMap;
+	}
+	
+
+}
--- a/src/main/java/com/zhiwei/source_forward/spider/MySpider.java
+++ b/src/main/java/com/zhiwei/source_forward/spider/MySpider.java
--- a/src/main/java/com/zhiwei/source_forward/util/SourceData.java
+++ b/src/main/java/com/zhiwei/source_forward/util/SourceData.java
+package com.zhiwei.source_forward.util;
+
+import java.io.BufferedReader;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.zhiwei.zhiweiTools.order.TreatOrder;
+
+/**
+ * @ClassName: SourceData 
+ * @Description: 来源列表
+ * @author hero 
+ * @date 2017年12月5日 下午6:37:38
+ */
+public class SourceData {
+	
+	private static Map<String,Integer> sourceMap;
+	
+	static {
+		initSourceList();
+	}
+	/**
+	 * @Title: initSourceList 
+	 * @author hero 
+	 * @Description: 初始化基本来源列表 
+	 * @param @return 设定文件 
+	 * @return Map<String,Integer> 返回类型
+	 */
+	private static Map<String,Integer> initSourceList(){
+		try {
+			sourceMap = new HashMap<String,Integer>();
+			InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("sourceList.txt");
+			
+			BufferedReader br = new BufferedReader(new InputStreamReader(is));
+			String line = "";
+			while((line = br.readLine())!=null)
+			{
+				String source = line.toUpperCase();
+				sourceMap.put(source, source.hashCode());
+			}
+			is.close();
+			br.close();
+			return sourceMap;
+		} catch (Exception e) {
+			e.printStackTrace();
+			return null;
+		}
+	}
+	
+	
+	/**
+	 * @Title: addUserSource 
+	 * @author hero 
+	 * @Description: 用户添加自定义来源站 
+	 * @param @return 设定文件 
+	 * @return boolean 返回类型
+	 */
+	public static boolean addUserSource(String source){
+		if(sourceMap!=null){
+			source = source.toUpperCase();
+			if(!sourceMap.containsKey(source)){
+				sourceMap.put(source, source.hashCode());
+				return true;
+			}
+		}
+		return false;
+	}
+	
+	
+	/**
+	 * @Title: getSourceList 
+	 * @author hero 
+	 * @Description: 获取来源列表
+	 * @param @return 设定文件 
+	 * @return List<String> 返回类型
+	 */
+	public static List<String> getSourceList(){
+		List<String> result = null;
+		if(sourceMap!=null && sourceMap.size()>0){
+			result = new ArrayList<String>();
+			List<Entry<String,Integer>> dataList = TreatOrder.treatOrderByCountDesc(sourceMap);
+			for(Entry<String,Integer> entry : dataList){
+				result.add(entry.getKey());
+			}
+		}
+		return result;
+	}
+	
+
+}
--- a/src/main/java/com/zhiwei/source_forward/util/TreateData.java
+++ b/src/main/java/com/zhiwei/source_forward/util/TreateData.java
--- a/src/main/resources/sourceList.txt
+++ b/src/main/resources/sourceList.txt
--- a/src/test/java/com/zhiwei/source_forward/sourceforward/test/SourceForwardTest.java
+++ b/src/test/java/com/zhiwei/source_forward/sourceforward/test/SourceForwardTest.java
+//package com.zhiwei.source_forward.sourceforward.test;
+//
+//import java.util.ArrayList;
+//import java.util.List;
+//import java.util.Map;
+//import java.util.Map.Entry;
+//
+//import org.junit.Test;
+//
+//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
+//import com.zhiwei.source_forward.run.SourceForward;
+//import com.zhiwei.source_forward.util.ReadMediaData;
+//
+///**
+// * @ClassName: SourceForwardTest 
+// * @Description: 来源验证
+// * @author hero 
+// * @date 2017年12月6日 上午9:55:13
+// */
+//public class SourceForwardTest {
+//	
+//	@Test
+//	public void sourceForwardTest(){
+//		String path = "E://稿件汇总网媒数据//JD稿件转载情况-1206.xlsx";
+//		PoiExcelUtil poi = PoiExcelUtil.getInstance();
+//		Map<String,Object> data = poi.importExcel(path, 0);
+//		@SuppressWarnings("unchecked")
+//		List<String> headList = (List<String>)data.get("head");
+//		headList.add("频道");
+//		headList.add("原来源");
+//		headList.add("是否转发");
+//		@SuppressWarnings("unchecked")
+//		List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
+//		
+//		Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrl(dataList);
+//		dataMap = SourceForward.getSourceForward(dataMap);
+//		
+//		List<Map<String,Object>> bodyList = new ArrayList<>();
+//		for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
+//			bodyList.add(dataEntry.getValue());
+//		}
+//		poi.exportExcel(path ,"匹配后数据", headList, bodyList);
+//	}
+//	
+//	
+//	
+//	
+//	
+//	
+//	
+//
+//}
--- a/src/test/java/com/zhiwei/source_forward/sourceforward/test/URLLiveTest.java
+++ b/src/test/java/com/zhiwei/source_forward/sourceforward/test/URLLiveTest.java
+package com.zhiwei.source_forward.sourceforward.test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.junit.Test;
+
+import com.zhiwei.source_forward.run.URLLive;
+
+/**
+ * @ClassName: URLLiveTest 
+ * @Description: 验证链接有效性 
+ * @author hero 
+ * @date 2017年12月6日 下午1:30:26
+ */
+public class URLLiveTest {
+	
+	
+//	@Test
+//	public void urlLiveTest(){
+//		String path = "E://稿件汇总网媒数据//福莱网媒.xlsx";
+//		PoiExcelUtil poi = PoiExcelUtil.getInstance();
+//		Map<String,Object> data = poi.importExcel(path, 0);
+//		@SuppressWarnings("unchecked")
+//		List<String> headList = (List<String>)data.get("head");
+//		headList.add("是否删除");
+//		@SuppressWarnings("unchecked")
+//		List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
+//		Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrlLive(dataList);
+//		dataMap = URLLive.verificationURLLive(dataMap);
+//		
+//		List<Map<String,Object>> bodyList = new ArrayList<>();
+//		for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
+//			bodyList.add(dataEntry.getValue());
+//		}
+//		poi.exportExcel(path ,"匹配后数据", headList, bodyList);
+//	}
+	
+
+}