修改ok初版提交

19bb2414 · yangchen · 76581f38 · 19bb2414 · 19bb2414 · 19bb2414
Commit 19bb2414 authored Aug 22, 2018 by yangchen
32 changed files
--- a/pom.xml
+++ b/pom.xml
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.zhiwei</groupId>
-  <artifactId>source_forward</artifactId>
-  <version>0.0.2-SNAPSHOT</version>
-  <name>source_forward</name>
+    <artifactId>source-forward</artifactId>
+    <version>0.0.3-SNAPSHOT</version>
+    <name>source-forward</name>
    <description>验证网媒的转发关系及链接的有效性（转发验证微信及自媒体匹配率不高）</description>

    <properties>
@@ -21,35 +22,14 @@

    <dependencies>
        <dependency>
-		    <groupId>cn.edu.hfut.dmic.webcollector</groupId>
-		    <artifactId>WebCollector</artifactId>
-		    <version>2.71</version>
-		</dependency>
-		<dependency>
-			<groupId>us.codecraft</groupId>
-			<artifactId>webmagic-core</artifactId>
-			<version>0.6.1</version>
-		</dependency>
-		<dependency>
-			<groupId>us.codecraft</groupId>
-			<artifactId>webmagic-extension</artifactId>
-			<version>0.6.1</version>
-			<exclusions>
-				<exclusion>
-					<groupId>org.slf4j</groupId>
-					<artifactId>slf4j-log4j12</artifactId>
-				</exclusion>
-			</exclusions>
-		</dependency>
-		<dependency>
-			<groupId>us.codecraft</groupId>
-			<artifactId>webmagic-saxon</artifactId>
-			<version>0.6.1</version>
+            <groupId>com.zhiwei.tools</groupId>
+            <artifactId>zhiwei-tools</artifactId>
+            <version>0.0.2-SNAPSHOT</version>
        </dependency>
        <dependency>
-			<groupId>com.zhiwei</groupId>
-			<artifactId>zhiweiTools</artifactId>
-			<version>0.0.6-SNAPSHOT</version>
+            <groupId>com.zhiwei.middleware</groupId>
+            <artifactId>proxy-client</artifactId>
+            <version>0.0.1-RELEASE</version>
        </dependency>
    </dependencies>

@@ -109,4 +89,13 @@



+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>com.squareup.okhttp3</groupId>
+                <artifactId>okhttp</artifactId>
+                <version>3.11.0</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
 </project>
\ No newline at end of file
--- a/src/main/java/com/zhiwei/source_forward/bean/ContentBean.java
+++ b/src/main/java/com/zhiwei/source_forward/bean/ContentBean.java
+package com.zhiwei.source_forward.bean;
+
+public class ContentBean {
+    
+    private String url;
+    
+    private String content;
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getContent() {
+        return content;
+    }
+
+    public void setContent(String content) {
+        this.content = content;
+    }
+
+    @Override
+    public String toString() {
+        return "ContentBean [url=" + url + ", content=" + content + "]";
+    }
+
+    public ContentBean(String url, String content) {
+        super();
+        this.url = url;
+        this.content = content;
+    }
+
+    public ContentBean() {
+        super();
+    }
+    
+    /** 
+     * @ClassName: Attribution 
+     * @Description: 属性
+     * @author 0xff  
+     * @date 2018年7月3日 下午5:53:22  
+     */
+    public static class Attribution {
+        private Object attr;
+        
+        /** 
+         * Constructor
+         * 
+         * @param attr
+         */
+        private Attribution(Object attr) {
+            this.attr = attr;
+        }
+        
+        /** 
+         * 创建属性
+         * 
+         * @param attr
+         * @return Attribution
+         */
+        public static Attribution of(Object attr) {
+            return new Attribution(attr);
+        }
+        
+        /** 
+         * 获取属性
+         * 
+         * @return Object
+         */
+        public Object get() {
+            return attr;
+        }
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/bean/MediaSelfSourceBean.java
+++ b/src/main/java/com/zhiwei/source_forward/bean/MediaSelfSourceBean.java
+package com.zhiwei.source_forward.bean;
+
+public class MediaSelfSourceBean {
+    
+    private String url;
+    
+    private String source;
+    
+    private String channel;
+    
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+    
+    public String getSource() {
+        return source;
+    }
+
+    public void setSource(String source) {
+        this.source = source;
+    }
+
+    public String getChannel() {
+        return channel;
+    }
+
+    public void setChannel(String channel) {
+        this.channel = channel;
+    }
+
+    public MediaSelfSourceBean() {
+        super();
+    }
+
+    public MediaSelfSourceBean(String url, String source, String channel) {
+        super();
+        this.url = url;
+        this.source = source;
+        this.channel = channel;
+    }
+
+    @Override
+    public String toString() {
+        return "MediaSelfSourceBean [url=" + url + ", source=" + source
+                + ", channel=" + channel + "]";
+    }
+
+
+
+    /** 
+     * @ClassName: Attribution 
+     * @Description: 属性
+     * @author 0xff  
+     * @date 2018年7月3日 下午5:53:22  
+     */
+    public static class Attribution {
+        private Object attr;
+        
+        /** 
+         * Constructor
+         * 
+         * @param attr
+         */
+        private Attribution(Object attr) {
+            this.attr = attr;
+        }
+        
+        /** 
+         * 创建属性
+         * 
+         * @param attr
+         * @return Attribution
+         */
+        public static Attribution of(Object attr) {
+            return new Attribution(attr);
+        }
+        
+        /** 
+         * 获取属性
+         * 
+         * @return Object
+         */
+        public Object get() {
+            return attr;
+        }
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/bean/SourceForwardBean.java
+++ b/src/main/java/com/zhiwei/source_forward/bean/SourceForwardBean.java
+package com.zhiwei.source_forward.bean;
+
+public class SourceForwardBean {
+    
+    private String url;
+    
+    private String channel;
+    
+    private String root_source;
+    
+    private String isforward;
+    
+    public String getIsforward() {
+        return isforward;
+    }
+
+    public void setIsforward(String isforward) {
+        this.isforward = isforward;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getChannel() {
+        return channel;
+    }
+
+    public void setChannel(String channel) {
+        this.channel = channel;
+    }
+
+    public String getRoot_source() {
+        return root_source;
+    }
+
+    public void setRoot_source(String root_source) {
+        this.root_source = root_source;
+    }
+
+    @Override
+    public String toString() {
+        return "SourceForwardBean [url=" + url + ", channel=" + channel
+                + ", root_source=" + root_source + "]";
+    }
+
+    public SourceForwardBean(String url, String channel, String root_source,
+            String isforward) {
+        super();
+        this.url = url;
+        this.channel = channel;
+        this.root_source = root_source;
+        this.isforward = isforward;
+    }
+
+    public SourceForwardBean() {
+        super();
+    }
+    
+    /** 
+     * @ClassName: Attribution 
+     * @Description: 属性
+     * @author 0xff  
+     * @date 2018年7月3日 下午5:53:22  
+     */
+    public static class Attribution {
+        private Object attr;
+        
+        /** 
+         * Constructor
+         * 
+         * @param attr
+         */
+        private Attribution(Object attr) {
+            this.attr = attr;
+        }
+        
+        /** 
+         * 创建属性
+         * 
+         * @param attr
+         * @return Attribution
+         */
+        public static Attribution of(Object attr) {
+            return new Attribution(attr);
+        }
+        
+        /** 
+         * 获取属性
+         * 
+         * @return Object
+         */
+        public Object get() {
+            return attr;
+        }
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/bean/UrlLiveBean.java
+++ b/src/main/java/com/zhiwei/source_forward/bean/UrlLiveBean.java
+package com.zhiwei.source_forward.bean;
+
+public class UrlLiveBean {
+    
+    private String url;
+    
+    private boolean isLive;
+    
+    public UrlLiveBean() {
+        super();
+    }
+
+    public UrlLiveBean(String url, boolean isLive) {
+        super();
+        this.url = url;
+        this.isLive = isLive;
+    }
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public boolean isLive() {
+        return isLive;
+    }
+
+    public void setLive(boolean isLive) {
+        this.isLive = isLive;
+    }
+
+    @Override
+    public String toString() {
+        return "UrlLiveBean [url=" + url + ", isLive=" + isLive + "]";
+    }
+    
+    /** 
+     * @ClassName: Attribution 
+     * @Description: 属性
+     * @author 0xff  
+     * @date 2018年7月3日 下午5:53:22  
+     */
+    public static class Attribution {
+        private Object attr;
+        
+        /** 
+         * Constructor
+         * 
+         * @param attr
+         */
+        private Attribution(Object attr) {
+            this.attr = attr;
+        }
+        
+        /** 
+         * 创建属性
+         * 
+         * @param attr
+         * @return Attribution
+         */
+        public static Attribution of(Object attr) {
+            return new Attribution(attr);
+        }
+        
+        /** 
+         * 获取属性
+         * 
+         * @return Object
+         */
+        public Object get() {
+            return attr;
+        }
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/content/ContentExtractor.java
+++ b/src/main/java/com/zhiwei/source_forward/content/ContentExtractor.java
+package com.zhiwei.source_forward.content;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+import org.jsoup.select.Elements;
+import org.jsoup.select.NodeVisitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * ContentExtractor could extract content,title,time from news webpage
+ *
+ * @author hu
+ */
+public class ContentExtractor {
+
+    public static final Logger LOG = LoggerFactory.getLogger(ContentExtractor.class);
+
+    protected Document doc;
+
+    ContentExtractor(Document doc) {
+        this.doc = doc;
+    }
+
+    protected HashMap<Element, CountInfo> infoMap = new HashMap<Element, CountInfo>();
+
+    class CountInfo {
+
+        int textCount = 0;
+        int linkTextCount = 0;
+        int tagCount = 0;
+        int linkTagCount = 0;
+        double density = 0;
+        double densitySum = 0;
+        double score = 0;
+        int pCount = 0;
+        ArrayList<Integer> leafList = new ArrayList<Integer>();
+
+    }
+
+    protected void clean() {
+        doc.select("script,noscript,style,iframe,br").remove();
+    }
+
+    protected CountInfo computeInfo(Node node) {
+
+        if (node instanceof Element) {
+            Element tag = (Element) node;
+
+            CountInfo countInfo = new CountInfo();
+            for (Node childNode : tag.childNodes()) {
+                CountInfo childCountInfo = computeInfo(childNode);
+                countInfo.textCount += childCountInfo.textCount;
+                countInfo.linkTextCount += childCountInfo.linkTextCount;
+                countInfo.tagCount += childCountInfo.tagCount;
+                countInfo.linkTagCount += childCountInfo.linkTagCount;
+                countInfo.leafList.addAll(childCountInfo.leafList);
+                countInfo.densitySum += childCountInfo.density;
+                countInfo.pCount += childCountInfo.pCount;
+            }
+            countInfo.tagCount++;
+            String tagName = tag.tagName();
+            if (tagName.equals("a")) {
+                countInfo.linkTextCount = countInfo.textCount;
+                countInfo.linkTagCount++;
+            } else if (tagName.equals("p")) {
+                countInfo.pCount++;
+            }
+
+            int pureLen = countInfo.textCount - countInfo.linkTextCount;
+            int len = countInfo.tagCount - countInfo.linkTagCount;
+            if (pureLen == 0 || len == 0) {
+                countInfo.density = 0;
+            } else {
+                countInfo.density = (pureLen + 0.0) / len;
+            }
+
+            infoMap.put(tag, countInfo);
+
+            return countInfo;
+        } else if (node instanceof TextNode) {
+            TextNode tn = (TextNode) node;
+            CountInfo countInfo = new CountInfo();
+            String text = tn.text();
+            int len = text.length();
+            countInfo.textCount = len;
+            countInfo.leafList.add(len);
+            return countInfo;
+        } else {
+            return new CountInfo();
+        }
+    }
+
+    protected double computeScore(Element tag) {
+        CountInfo countInfo = infoMap.get(tag);
+        double var = Math.sqrt(computeVar(countInfo.leafList) + 1);
+        double score = Math.log(var) * countInfo.densitySum * Math.log(countInfo.textCount - countInfo.linkTextCount + 1) * Math.log10(countInfo.pCount + 2);
+        return score;
+    }
+
+    protected double computeVar(ArrayList<Integer> data) {
+        if (data.size() == 0) {
+            return 0;
+        }
+        if (data.size() == 1) {
+            return data.get(0) / 2;
+        }
+        double sum = 0;
+        for (Integer i : data) {
+            sum += i;
+        }
+        double ave = sum / data.size();
+        sum = 0;
+        for (Integer i : data) {
+            sum += (i - ave) * (i - ave);
+        }
+        sum = sum / data.size();
+        return sum;
+    }
+
+    public Element getContentElement() throws Exception {
+        clean();
+        computeInfo(doc.body());
+        double maxScore = 0;
+        Element content = null;
+        for (Map.Entry<Element, CountInfo> entry : infoMap.entrySet()) {
+            Element tag = entry.getKey();
+            if (tag.tagName().equals("a") || tag == doc.body()) {
+                continue;
+            }
+            double score = computeScore(tag);
+            if (score > maxScore) {
+                maxScore = score;
+                content = tag;
+            }
+        }
+        if (content == null) {
+            throw new Exception("extraction failed");
+        }
+        return content;
+    }
+
+    public News getNews() throws Exception {
+        News news = new News();
+        Element contentElement;
+        try {
+            contentElement = getContentElement();
+            news.setContentElement(contentElement);
+        } catch (Exception ex) {
+            LOG.info("news content extraction failed,extraction abort", ex);
+            throw new Exception(ex);
+        }
+
+        if (doc.baseUri() != null) {
+            news.setUrl(doc.baseUri());
+        }
+
+        try {
+            news.setTime(getTime(contentElement));
+        } catch (Exception ex) {
+            LOG.info("news title extraction failed", ex);
+        }
+
+        try {
+            news.setTitle(getTitle(contentElement));
+        } catch (Exception ex) {
+            LOG.info("title extraction failed", ex);
+        }
+        return news;
+    }
+
+    protected String getTime(Element contentElement) throws Exception {
+        String regex = "([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-9]{1,2})[^0-9]{1,5}?([0-2]?[1-9])[^0-9]{1,5}?([0-9]{1,2})[^0-9]{1,5}?([0-9]{1,2})";
+        Pattern pattern = Pattern.compile(regex);
+        Element current = contentElement;
+        for (int i = 0; i < 2; i++) {
+            if (current != null && current != doc.body()) {
+                Element parent = current.parent();
+                if (parent != null) {
+                    current = parent;
+                }
+            }
+        }
+        for (int i = 0; i < 6; i++) {
+            if (current == null) {
+                break;
+            }
+            String currentHtml = current.outerHtml();
+            Matcher matcher = pattern.matcher(currentHtml);
+            if (matcher.find()) {
+                return matcher.group(1) + "-" + matcher.group(2) + "-" + matcher.group(3) + " " + matcher.group(4) + ":" + matcher.group(5) + ":" + matcher.group(6);
+            }
+            if (current != doc.body()) {
+                current = current.parent();
+            }
+        }
+
+        try {
+            return getDate(contentElement);
+        } catch (Exception ex) {
+            throw new Exception("time not found");
+        }
+
+    }
+
+    protected String getDate(Element contentElement) throws Exception {
+        String regex = "([1-2][0-9]{3})[^0-9]{1,5}?([0-1]?[0-9])[^0-9]{1,5}?([0-9]{1,2})";
+        Pattern pattern = Pattern.compile(regex);
+        Element current = contentElement;
+        for (int i = 0; i < 2; i++) {
+            if (current != null && current != doc.body()) {
+                Element parent = current.parent();
+                if (parent != null) {
+                    current = parent;
+                }
+            }
+        }
+        for (int i = 0; i < 6; i++) {
+            if (current == null) {
+                break;
+            }
+            String currentHtml = current.outerHtml();
+            Matcher matcher = pattern.matcher(currentHtml);
+            if (matcher.find()) {
+                return matcher.group(1) + "-" + matcher.group(2) + "-" + matcher.group(3);
+            }
+            if (current != doc.body()) {
+                current = current.parent();
+            }
+        }
+        throw new Exception("date not found");
+    }
+
+    protected double strSim(String a, String b) {
+        int len1 = a.length();
+        int len2 = b.length();
+        if (len1 == 0 || len2 == 0) {
+            return 0;
+        }
+        double ratio;
+        if (len1 > len2) {
+            ratio = (len1 + 0.0) / len2;
+        } else {
+            ratio = (len2 + 0.0) / len1;
+        }
+        if (ratio >= 3) {
+            return 0;
+        }
+        return (lcs(a, b) + 0.0) / Math.max(len1, len2);
+    }
+
+    protected String getTitle(final Element contentElement) throws Exception {
+        final ArrayList<Element> titleList = new ArrayList<Element>();
+        final ArrayList<Double> titleSim = new ArrayList<Double>();
+        final AtomicInteger contentIndex = new AtomicInteger();
+        final String metaTitle = doc.title().trim();
+        if (!metaTitle.isEmpty()) {
+            doc.body().traverse(new NodeVisitor() {
+                @Override
+                public void head(Node node, int i) {
+                    if (node instanceof Element) {
+                        Element tag = (Element) node;
+                        if (tag == contentElement) {
+                            contentIndex.set(titleList.size());
+                            return;
+                        }
+                        String tagName = tag.tagName();
+                        if (Pattern.matches("h[1-6]", tagName)) {
+                            String title = tag.text().trim();
+                            double sim = strSim(title, metaTitle);
+                            titleSim.add(sim);
+                            titleList.add(tag);
+                        }
+                    }
+                }
+
+                @Override
+                public void tail(Node node, int i) {
+                }
+            });
+            int index = contentIndex.get();
+            if (index > 0) {
+                double maxScore = 0;
+                int maxIndex = -1;
+                for (int i = 0; i < index; i++) {
+                    double score = (i + 1) * titleSim.get(i);
+                    if (score > maxScore) {
+                        maxScore = score;
+                        maxIndex = i;
+                    }
+                }
+                if (maxIndex != -1) {
+                    return titleList.get(maxIndex).text();
+                }
+            }
+        }
+
+        Elements titles = doc.body().select("*[id^=title],*[id$=title],*[class^=title],*[class$=title]");
+        if (titles.size() > 0) {
+            String title = titles.first().text();
+            if (title.length() > 5 && title.length()<40) {
+                return titles.first().text();
+            }
+        }
+        try {
+            return getTitleByEditDistance(contentElement);
+        } catch (Exception ex) {
+            throw new Exception("title not found");
+        }
+
+    }
+
+    protected String getTitleByEditDistance(Element contentElement) throws Exception {
+        final String metaTitle = doc.title();
+
+        final ArrayList<Double> max = new ArrayList<Double>();
+        max.add(0.0);
+        final StringBuilder sb = new StringBuilder();
+        doc.body().traverse(new NodeVisitor() {
+
+            public void head(Node node, int i) {
+
+                if (node instanceof TextNode) {
+                    TextNode tn = (TextNode) node;
+                    String text = tn.text().trim();
+                    double sim = strSim(text, metaTitle);
+                    if (sim > 0) {
+                        if (sim > max.get(0)) {
+                            max.set(0, sim);
+                            sb.setLength(0);
+                            sb.append(text);
+                        }
+                    }
+
+                }
+            }
+
+            public void tail(Node node, int i) {
+            }
+        });
+        if (sb.length() > 0) {
+            return sb.toString();
+        }
+        throw new Exception();
+
+    }
+
+    protected int lcs(String x, String y) {
+
+        int M = x.length();
+        int N = y.length();
+        if (M == 0 || N == 0) {
+            return 0;
+        }
+        int[][] opt = new int[M + 1][N + 1];
+
+        for (int i = M - 1; i >= 0; i--) {
+            for (int j = N - 1; j >= 0; j--) {
+                if (x.charAt(i) == y.charAt(j)) {
+                    opt[i][j] = opt[i + 1][j + 1] + 1;
+                } else {
+                    opt[i][j] = Math.max(opt[i + 1][j], opt[i][j + 1]);
+                }
+            }
+        }
+
+        return opt[0][0];
+
+    }
+
+    protected int editDistance(String word1, String word2) {
+        int len1 = word1.length();
+        int len2 = word2.length();
+
+        int[][] dp = new int[len1 + 1][len2 + 1];
+
+        for (int i = 0; i <= len1; i++) {
+            dp[i][0] = i;
+        }
+
+        for (int j = 0; j <= len2; j++) {
+            dp[0][j] = j;
+        }
+
+        for (int i = 0; i < len1; i++) {
+            char c1 = word1.charAt(i);
+            for (int j = 0; j < len2; j++) {
+                char c2 = word2.charAt(j);
+
+                if (c1 == c2) {
+                    dp[i + 1][j + 1] = dp[i][j];
+                } else {
+                    int replace = dp[i][j] + 1;
+                    int insert = dp[i][j + 1] + 1;
+                    int delete = dp[i + 1][j] + 1;
+
+                    int min = replace > insert ? insert : replace;
+                    min = delete > min ? min : delete;
+                    dp[i + 1][j + 1] = min;
+                }
+            }
+        }
+
+        return dp[len1][len2];
+    }
+
+    /*输入Jsoup的Document，获取正文所在Element*/
+    public static Element getContentElementByDoc(Document doc) throws Exception {
+        ContentExtractor ce = new ContentExtractor(doc);
+        return ce.getContentElement();
+    }
+
+    /*输入HTML，获取正文所在Element*/
+    public static Element getContentElementByHtml(String html) throws Exception {
+        Document doc = Jsoup.parse(html);
+        return getContentElementByDoc(doc);
+    }
+
+    /*输入HTML和URL，获取正文所在Element*/
+    public static Element getContentElementByHtml(String html, String url) throws Exception {
+        Document doc = Jsoup.parse(html, url);
+        return getContentElementByDoc(doc);
+    }
+
+    /*输入Jsoup的Document，获取正文文本*/
+    public static String getContentByDoc(Document doc) throws Exception {
+        ContentExtractor ce = new ContentExtractor(doc);
+        return ce.getContentElement().text();
+    }
+
+    /*输入HTML，获取正文文本*/
+    public static String getContentByHtml(String html) throws Exception {
+        Document doc = Jsoup.parse(html);
+        return getContentElementByDoc(doc).text();
+    }
+
+    /*输入HTML和URL，获取正文文本*/
+    public static String getContentByHtml(String html, String url) throws Exception {
+        Document doc = Jsoup.parse(html, url);
+        return getContentElementByDoc(doc).text();
+    }
+
+    /*输入Jsoup的Document，获取结构化新闻信息*/
+    public static News getNewsByDoc(Document doc) throws Exception {
+        ContentExtractor ce = new ContentExtractor(doc);
+        return ce.getNews();
+    }
+
+    /*输入HTML，获取结构化新闻信息*/
+    public static News getNewsByHtml(String html) throws Exception {
+        Document doc = Jsoup.parse(html);
+        return getNewsByDoc(doc);
+    }
+
+    /*输入HTML和URL，获取结构化新闻信息*/
+    public static News getNewsByHtml(String html, String url) throws Exception {
+        Document doc = Jsoup.parse(html, url);
+        return getNewsByDoc(doc);
+    }
+
+
+}
--- a/src/main/java/com/zhiwei/source_forward/content/News.java
+++ b/src/main/java/com/zhiwei/source_forward/content/News.java
+package com.zhiwei.source_forward.content;
+
+import org.jsoup.nodes.Element;
+
+/**
+ *
+ * @author hu
+ */
+public class News {
+
+    protected String url = null;
+    protected String title = null;
+    protected String content = null;
+    protected String time = null;
+
+    protected Element contentElement = null;
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public void setTitle(String title) {
+        this.title = title;
+    }
+
+    public String getContent() {
+        if (content == null) {
+            if (contentElement != null) {
+                content = contentElement.text();
+            }
+        }
+        return content;
+    }
+    
+    
+
+    public void setContent(String content) {
+        this.content = content;
+    }
+
+    public String getTime() {
+        return time;
+    }
+
+    public void setTime(String time) {
+        this.time = time;
+    }
+
+    @Override
+    public String toString() {
+        return "URL:\n" + url + "\nTITLE:\n" + title + "\nTIME:\n" + time + "\nCONTENT:\n" + getContent() + "\nCONTENT(SOURCE):\n" + contentElement;
+    }
+
+    public Element getContentElement() {
+        return contentElement;
+    }
+
+    public void setContentElement(Element contentElement) {
+        this.contentElement = contentElement;
+    }
+
+   
+}
--- a/src/main/java/com/zhiwei/source_forward/crawler/ContentCrawler.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/ContentCrawler.java
+package com.zhiwei.source_forward.crawler;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.zhiwei.source_forward.bean.ContentBean;
+import com.zhiwei.source_forward.bean.ContentBean.Attribution;
+import com.zhiwei.source_forward.util.ContentDataCallback;
+import com.zhiwei.source_forward.util.MatchContent;
+import com.zhiwei.source_forward.util.ProxyClientUtil;
+import com.zhiwei.tools.httpclient.HttpBoot;
+import com.zhiwei.tools.httpclient.HttpRequestBuilder;
+import com.zhiwei.tools.httpclient.asyn.MultiThreadingCounter;
+
+import okhttp3.Request;
+import okhttp3.Response;
+
+public class ContentCrawler {
+    
+    private static Logger logger = LogManager.getLogger(ContentCrawler.class);
+    
+    /**
+     * 
+     * @Description 链接传入 并 返回采集完信号
+     * @param callback
+     * @param urls
+     * @return
+     * @throws Exception
+     */
+    public MultiThreadingCounter submitTask(ContentDataCallback callback,String... urls) throws Exception {
+        MultiThreadingCounter counter = new MultiThreadingCounter();
+        start(counter, callback, urls);
+        return counter;
+    }
+    
+    /**
+     * 
+     * @Description 提交链接
+     * @param counter
+     * @param callback
+     * @param urls
+     */
+    private void start(MultiThreadingCounter counter,ContentDataCallback callback, String... urls) {
+        if (urls != null && urls.length > 0) {
+            for (String url : urls) {
+                if (url != null) {
+                    try {
+                        counter.increase();
+                        search(counter, url, Attribution.of(url), callback);
+                    } catch (Exception e) {
+                        logger.error("关键词 {} 搜索创建出错: {}", e.getMessage());
+                    } finally {
+                        counter.reduce();
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * 
+     * @Description 链接获取文章信息
+     * @param counter
+     * @param url
+     * @param attr
+     * @param callback
+     * @return
+     */
+    private MultiThreadingCounter search(MultiThreadingCounter counter, String url,Attribution attr, ContentDataCallback callback) {
+        logger.info("当前处理 URL: {}", url);
+        Request request = HttpRequestBuilder.newGetRequest(url, null);
+        counter.increase();
+        HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false).addListeners(future -> {
+            if (future.isSuccess()) {
+                Response response = future.result();
+                try {
+                    parseHtml(response, attr, callback);
+                } catch (Exception e) {
+                    logger.error("解析出错", e);
+                }
+            } else {
+                logger.info("{} 搜索结果访问失败: {}", request.url().url(), future.cause().getMessage());
+            }
+            counter.reduce();
+        });
+        return counter;
+    }
+    
+    /**
+     * 
+     * 
+     * @Description 获取正文解析 
+     * @param response
+     * @param attr
+     * @param callback
+     */
+    private void parseHtml(Response response, Attribution attr,
+            ContentDataCallback callback) {
+        String content = null;
+        try {
+            if(response.isSuccessful()){
+                String html = response.body().string();
+                content = MatchContent.matchContent(attr.get().toString(), html);
+            }
+        } catch (Exception e) {
+            logger.info("网页链接失效",e.fillInStackTrace());
+        }finally {
+            if(response != null) {
+                response.close();
+            }
+        }
+        ContentBean cb = new ContentBean(attr.get().toString(), content);
+        if (callback == null) {
+            logger.warn("DataCallback 对象为 null，无法保存数据");
+        } else {
+            callback.onData(cb, attr);
+        }
+        
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/crawler/ContentPageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/ContentPageProcessor.java
-package com.zhiwei.source_forward.crawler;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.zhiwei.source_forward.util.MatchContent;
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.processor.PageProcessor;
-
-/**
- * @ClassName: ContentPageProcessor 
- * @Description: 获取文章内容 
- * @author hero 
- * @date 2018年6月30日 上午9:54:02
- */
-public class ContentPageProcessor implements PageProcessor {
-
-	private static Logger logger = LoggerFactory.getLogger(ContentPageProcessor.class);
-	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
-			.setTimeOut(10000)
-			.setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
-			.addHeader("Accept-Encoding", "deflate, br")
-			;
-	
-	@Override
-	public Site getSite() {
-		return site;
-	}
-	
-	@Override
-	public void process(Page page) {
-		Map<String,String> data = new HashMap<String,String>();
-		String content = null;
-		try {
-			if(page.getStatusCode()!=404){
-				content = MatchContent.matchContent(page.getUrl().get(), page.getHtml().toString());
-			}
-		} catch (Exception e) {
-			logger.info("网页链接失效",e.fillInStackTrace());
-			content = null;
-		}
-		data.put("url", page.getUrl().get());
-		data.put("content", content);
-		page.putField("content", data);
-	}
-	
-}
--- a/src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+package com.zhiwei.source_forward.crawler;
+
+import java.util.List;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Node;
+
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
+import com.zhiwei.source_forward.util.MatchChannel;
+import com.zhiwei.source_forward.util.MatchSource;
+import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack;
+import com.zhiwei.source_forward.util.ProxyClientUtil;
+import com.zhiwei.tools.httpclient.HttpBoot;
+import com.zhiwei.tools.httpclient.HttpRequestBuilder;
+import com.zhiwei.tools.httpclient.asyn.MultiThreadingCounter;
+
+import okhttp3.Request;
+import okhttp3.Response;
+
+/**
+ * 
+ * @ClassName MediaSelfSourceCrawler
+ * @Description 自媒体号匹配
+ * @author byte-zbs
+ * @Date 2018年8月21日 下午3:54:03
+ * @version 1.0.0
+ */
+public class MediaSelfSourceCrawler {
+    
+    private static final Logger logger = LogManager.getLogger(MediaSelfSourceCrawler.class);
+    
+    /**
+     * 
+     * @Description 链接传入 并 返回采集完信号
+     * @param callback
+     * @param urls
+     * @return
+     * @throws Exception
+     */
+    public MultiThreadingCounter submitTask(MediaSelfSourceDataCallBack callback,String... urls) throws Exception {
+        MultiThreadingCounter counter = new MultiThreadingCounter();
+        start(counter, callback, urls);
+        return counter;
+    }
+    
+    /**
+     * 
+     * @Description 提交链接
+     * @param counter
+     * @param callback
+     * @param urls
+     */
+    private void start(MultiThreadingCounter counter,MediaSelfSourceDataCallBack callback, String... urls) {
+        if (urls != null && urls.length > 0) {
+            for (String url : urls) {
+                if (url != null) {
+                    try {
+                        counter.increase();
+                        search(counter, url, Attribution.of(url), callback);
+                    } catch (Exception e) {
+                        logger.error("关键词 {} 搜索创建出错: {}", e.getMessage());
+                    } finally {
+                        counter.reduce();
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * 
+     * @Description 链接获取文章信息
+     * @param counter
+     * @param url
+     * @param attr
+     * @param callback
+     * @return
+     */
+    private MultiThreadingCounter search(MultiThreadingCounter counter, String url,Attribution attr, MediaSelfSourceDataCallBack callback) {
+        logger.info("当前处理 URL: {}", url);
+        Request request = HttpRequestBuilder.newGetRequest(url, null);
+        counter.increase();
+        HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false).addListeners(future -> {
+            if (future.isSuccess()) {
+                Response response = future.result();
+                try {
+                    parseHtml(response, attr, callback);
+                } catch (Exception e) {
+                    logger.error("解析出错", e);
+                }
+            } else {
+                logger.info("{} 搜索结果访问失败: {}", request.url().url(), future.cause().getMessage());
+            }
+            counter.reduce();
+        });
+        return counter;
+    }
+    
+    /**
+     * 
+     * @Description 解析文章获取相关数据
+     * @param response
+     * @param attr
+     * @param callback
+     */
+    private void parseHtml(Response response, Attribution attr,
+            MediaSelfSourceDataCallBack callback) {
+        String source = null;
+        String channel = null;
+        try {
+            if(response.isSuccessful()){
+                String html = response.body().string();
+                source = MatchSource.matchMediaSelfSource(attr.get().toString(),html);
+                if(source==null || source.equals("")){
+                    source = null;
+                }
+                channel = MatchChannel.verifyChannel(attr.get().toString());
+                if(channel==null){
+                    List<Node> nodeList = Jsoup.parse(html).head().childNodes();
+                    channel = MatchChannel.matchChannel(nodeList);
+                }
+            }
+        } catch (Exception e) {
+            source = null;
+        }finally {
+            if(response != null) {
+                response.close();
+            }
+        }
+        logger.info(attr.get()+"================="+source);   
+        MediaSelfSourceBean msfb = new MediaSelfSourceBean(attr.get().toString(), source, channel);
+        if (callback == null) {
+            logger.warn("DataCallback 对象为 null，无法保存数据");
+        } else {
+            callback.onData(msfb, attr);
+        }
+        
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourcePageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourcePageProcessor.java
-package com.zhiwei.source_forward.crawler;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.jsoup.nodes.Node;
-
-import com.zhiwei.source_forward.util.MatchChannel;
-import com.zhiwei.source_forward.util.MatchSource;
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.processor.PageProcessor;
-
-public class MediaSelfSourcePageProcessor implements PageProcessor {
-
-	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
-			.setTimeOut(10000)
-			.setUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
-			.addHeader("Accept-Encoding", "gzip, deflate, br")
-			;
-	
-	@Override
-	public Site getSite() {
-		return site;
-	}
-	
-	@Override
-	public void process(Page page) {
-		Map<String,String> data = new HashMap<String,String>();
-		String source = null;
-		String channel = null;
-		try {
-			if(page.getStatusCode()!=404){
-				source = MatchSource.matchMediaSelfSource(page.getUrl().get(),page.getHtml().toString());
-				if(source==null || source.equals("")){
-					source = null;
-				}
-				channel = MatchChannel.verifyChannel(page.getUrl().get());
-				if(channel==null){
-					List<Node> nodeList = page.getHtml().getDocument().head().childNodes();
-					channel = MatchChannel.matchChannel(nodeList);
-				}
-			}
-		} catch (Exception e) {
-			source = null;
-		}
-		System.out.println(page.getUrl().get()+"================="+source);   
-		data.put("url", page.getUrl().get());
-		data.put("mediaself", source);
-		data.put("channel", channel);
-		
-		page.putField("mediaSelf", data);
-	}
-	
-}
--- a/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
+package com.zhiwei.source_forward.crawler;
+
+import java.util.List;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Node;
+
+import com.zhiwei.source_forward.bean.SourceForwardBean;
+import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
+import com.zhiwei.source_forward.util.MatchChannel;
+import com.zhiwei.source_forward.util.MatchSource;
+import com.zhiwei.source_forward.util.ProxyClientUtil;
+import com.zhiwei.source_forward.util.SourceData;
+import com.zhiwei.source_forward.util.SourceForwardDataCallBack;
+import com.zhiwei.tools.httpclient.HttpBoot;
+import com.zhiwei.tools.httpclient.HttpRequestBuilder;
+import com.zhiwei.tools.httpclient.asyn.MultiThreadingCounter;
+
+import okhttp3.Request;
+import okhttp3.Response;
+
+public class SourceForwardCrawler {
+    
+    private static final Logger logger = LogManager.getLogger(SourceForwardCrawler.class);
+    
+    private static List<String> sourceList = SourceData.getSourceList();
+    
+    public MultiThreadingCounter submitTask(SourceForwardDataCallBack callback,String... urls) throws Exception {
+        MultiThreadingCounter counter = new MultiThreadingCounter();
+        start(counter, callback, urls);
+        return counter;
+    }
+
+    private void start(MultiThreadingCounter counter,SourceForwardDataCallBack callback, String... urls) {
+        if (urls != null && urls.length > 0) {
+            for (String url : urls) {
+                if (url != null) {
+                    try {
+                        counter.increase();
+                        search(counter, url, Attribution.of(url), callback);
+                    } catch (Exception e) {
+                        logger.error("关键词 {} 搜索创建出错: {}", e.getMessage());
+                    } finally {
+                        counter.reduce();
+                    }
+                }
+            }
+        }
+    }
+
+    private MultiThreadingCounter search(MultiThreadingCounter counter, String url,Attribution attr, SourceForwardDataCallBack callback) {
+        logger.info("当前处理 URL: {}", url);
+        Request request = HttpRequestBuilder.newGetRequest(url, null);
+        counter.increase();
+        HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false).addListeners(future -> {
+            if (future.isSuccess()) {
+                Response response = future.result();
+                try {
+                    parseHtml(response, attr, callback);
+                } catch (Exception e) {
+                    logger.error("解析出错", e);
+                }
+            } else {
+                logger.info("{} 搜索结果访问失败: {}", request.url().url(), future.cause().getMessage());
+            }
+            counter.reduce();
+        });
+        return counter;
+    }
+
+    private void parseHtml(Response response, Attribution attr,
+            SourceForwardDataCallBack callback) {
+        String source = null;
+        String channel = "新闻";
+        String isforward = "未知";
+        try {
+            if(response.isSuccessful()){
+                Document document = Jsoup.parse(response.body().string());
+                if(attr.get().toString().contains("mp.weixin.qq.com")){
+                    isforward = document.select("div#meta_content").select("span#copyright_logo").text();
+                    if(!"原创".equals(isforward)){
+                        isforward = "未知";
+                    }
+                }else{
+                    channel = MatchChannel.verifyChannel(attr.get().toString());
+                    if(channel==null){
+                        List<Node> nodeList = document.head().childNodes();
+                        channel = MatchChannel.matchChannel(nodeList);
+                    }
+                    source = MatchSource.matchSource(attr.get().toString(),document.toString(), sourceList);
+                }
+            }
+        } catch (Exception e) {
+            source = null;
+            channel = "新闻";
+        }finally {
+            if(response != null) {
+                response.close();
+            }
+        }
+        logger.info(attr.get().toString()+"======="+channel+"================="+source);   
+        SourceForwardBean sfb = new SourceForwardBean(attr.get().toString(), channel, source,isforward);
+        if (callback == null) {
+            logger.warn("DataCallback 对象为 null，无法保存数据");
+        } else {
+            callback.onData(sfb, attr);
+        }
+    }
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
-package com.zhiwei.source_forward.crawler;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Node;
-
-import com.zhiwei.source_forward.util.MatchChannel;
-import com.zhiwei.source_forward.util.MatchSource;
-import com.zhiwei.source_forward.util.SourceData;
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.processor.PageProcessor;
-
-public class SourceForwardPageProcessor implements PageProcessor {
-
-	private static List<String> sourceList = SourceData.getSourceList();
-	
-	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
-			.setTimeOut(10000)
-			.addHeader("User-Agent",
-					"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
-			.addHeader("Accept",
-					"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
-			;
-	
-	@Override
-	public Site getSite() {
-		return site;
-	}
-	
-	@Override
-	public void process(Page page) {
-		Map<String,String> data = new HashMap<String,String>();
-		String source = null;
-		String channel = "新闻";
-		try {
-			if(page.getStatusCode()!=404){
-				if(page.getUrl().get().contains("mp.weixin.qq.com")){
-					String isforward = "未知";
-					Document document = page.getHtml().getDocument();
-					isforward = document.select("div#meta_content").select("span#copyright_logo").text();
-					if(!"原创".equals(isforward)){
-						isforward = "未知";
-					}
-					data.put("isforward", isforward);
-				}else{
-					channel = MatchChannel.verifyChannel(page.getUrl().get());
-					if(channel==null){
-						List<Node> nodeList = page.getHtml().getDocument().head().childNodes();
-						channel = MatchChannel.matchChannel(nodeList);
-					}
-					source = MatchSource.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList);
-				}
-			}
-		} catch (Exception e) {
-			source = null;
-			channel = "新闻";
-		}
-		System.out.println(page.getUrl().get()+"======="+channel+"================="+source);   
-		data.put("url", page.getUrl().get());
-		data.put("channel", channel);
-		data.put("root_source", source);
-		
-		page.putField("sourceForward", data);
-	}
-	
-}
--- a/src/main/java/com/zhiwei/source_forward/crawler/UrlLivePageProcessor.java
+++ b/src/main/java/com/zhiwei/source_forward/crawler/UrlLivePageProcessor.java
 package com.zhiwei.source_forward.crawler;
-import java.util.HashMap;
+
+import java.io.IOException;
 import java.util.List;
-import java.util.Map;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Node;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.processor.PageProcessor;

-public class UrlLivePageProcessor implements PageProcessor{
-	private static Logger logger = LoggerFactory.getLogger(UrlLivePageProcessor.class);
-	private Site site = Site.me().setCycleRetryTimes(3).setSleepTime(1500)
-							.setTimeOut(15000)
-							.addHeader("User-Agent",
-									"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0")
-							.addHeader("Accept",
-									"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
-	@Override
-	public void process(Page page) {
+import com.zhiwei.source_forward.bean.UrlLiveBean;
+import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
+import com.zhiwei.source_forward.util.ProxyClientUtil;
+import com.zhiwei.source_forward.util.UrlLiveDataCallback;
+import com.zhiwei.tools.httpclient.HttpBoot;
+import com.zhiwei.tools.httpclient.HttpRequestBuilder;
+import com.zhiwei.tools.httpclient.asyn.MultiThreadingCounter;
+
+import okhttp3.Request;
+import okhttp3.Response;
+
+/**
+ * 
+ * @ClassName UrlLiveCrawler
+ * @Description 判断页面是否存在
+ * @author byte-zbs
+ * @Date 2018年8月20日 下午3:34:57
+ * @version 1.0.0
+ */
+public class UrlLiveCrawler {
+
+    private static final Logger logger = LogManager.getLogger(UrlLiveCrawler.class);
+
+    public MultiThreadingCounter submitTask(UrlLiveDataCallback callback,String... urls) throws Exception {
+        MultiThreadingCounter counter = new MultiThreadingCounter();
+        start(counter, callback, urls);
+        return counter;
+    }
+
+    private void start(MultiThreadingCounter counter,UrlLiveDataCallback callback, String... urls) {
+        if (urls != null && urls.length > 0) {
+            for (String url : urls) {
+                if (url != null) {
+                    try {
+                        counter.increase();
+                        search(counter, url, Attribution.of(url), callback);
+                    } catch (Exception e) {
+                        logger.error("关键词 {} 搜索创建出错: {}", e.getMessage());
+                    } finally {
+                        counter.reduce();
+                    }
+                }
+            }
+        }
+    }
+
+    private MultiThreadingCounter search(MultiThreadingCounter counter, String url,
+            Attribution attr, UrlLiveDataCallback callback) {
+        logger.info("当前处理 URL: {}", url);
+        Request request = HttpRequestBuilder.newGetRequest(url, null);
+        counter.increase();
+        HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false).addListeners(future -> {
+            if (future.isSuccess()) {
+                Response response = future.result();
+                try {
+                    parseHtml(response, attr, callback);
+                } catch (Exception e) {
+                    logger.error("解析出错", e);
+                }
+            } else {
+                logger.info("{} 搜索结果访问失败: {}", request.url().url(), future.cause().getMessage());
+            }
+            counter.reduce();
+        });
+        return counter;
+    }
+
+    private void parseHtml(Response response, Attribution attr,
+            UrlLiveDataCallback callback) {
        /***验证网页是否能够连通*/
        boolean f = true;
-		if(page!=null){
-			if(page.getStatusCode()==200){
-				f = matchDel(page);
-			}else if(page.getStatusCode()==404){
-				f = true;
+        if(!response.isSuccessful()){
+            try {
+                f = matchDel(response.body().string(),attr.get().toString());
+            } catch (IOException e) {
+                logger.info("数据判断出错 {}",e.getMessage());
+            }finally {
+                if(response != null) {
+                    response.close();
+                }
+            }
        }else{
            f = false;
        }
+        UrlLiveBean ulb = new UrlLiveBean(attr.get().toString(), f);
+        if (callback == null) {
+            logger.warn("DataCallback 对象为 null，无法保存数据");
+        } else {
+            callback.onData(ulb, attr);
        }
-		
-		Map<String,Object> data = new HashMap<String,Object>();
-		data.put("url", page.getUrl().get());
-		data.put("live", f);
-		page.putField("urlLive", data);
-	}
-
-	@Override
-	public Site getSite() {
-		return site;
    }
    
-	
-	
    /***
     * @Title: matchDel 
     * @author hero 
@@ -53,59 +109,59 @@ public class UrlLivePageProcessor implements PageProcessor{
     * @param @return 设定文件 
     * @return boolean 返回类型
     */
-	public boolean matchDel(Page page){
+    public boolean matchDel(String result,String url){
        int step = 1;
-		Document doc = page.getHtml().getDocument();
+        Document doc = Jsoup.parse(result);
        if(rulerHead(doc)){
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;
        if (rulerYaoyan(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;
        if (rulerWeigui(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;
        if (rulerTousu(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;
-        if (page.getUrl().get().contains("huanqiu.com"))
+        if (url.contains("huanqiu.com"))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return rulerHuanqiuWuxiao(doc);
        }
        step++;//7
        if (rulerBucunzai(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;//8
        if (rulerKong(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;//9
        if (rulerZhaoshang(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        step++;//11
        if (rulerYidian(doc))
        {
-        	logger.info("{}检测规则：第{}步",page.getUrl(),step);
+            logger.info("{}检测规则：第{}步",url,step);
            return true;
        }
        return false;
@@ -302,6 +358,12 @@ public class UrlLivePageProcessor implements PageProcessor{
                        return true;
                    }
                }
+                if (node.outerHtml().contains("meta")) {
+                    String meta = node.toString();
+                    if(meta.contains("公益404页面")) {
+                        return true;
+                    }
+                }
            }
        } catch (Exception e) {
            return false;

--- a/src/main/java/com/zhiwei/source_forward/downloader/MyDownLoader.java
+++ b/src/main/java/com/zhiwei/source_forward/downloader/MyDownLoader.java
-package com.zhiwei.source_forward.downloader;
-
-import java.io.IOException;
-import java.net.UnknownHostException;
-import java.nio.charset.Charset;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.NameValuePair;
-import org.apache.http.client.config.CookieSpecs;
-import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpUriRequest;
-import org.apache.http.client.methods.RequestBuilder;
-import org.apache.http.conn.ConnectTimeoutException;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.util.EntityUtils;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Request;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.downloader.AbstractDownloader;
-import us.codecraft.webmagic.downloader.HttpClientGenerator;
-import us.codecraft.webmagic.proxy.Proxy;
-import us.codecraft.webmagic.selector.PlainText;
-import us.codecraft.webmagic.utils.HttpConstant;
-import us.codecraft.webmagic.utils.UrlUtils;
-import us.codecraft.webmagic.utils.WMCollections;
-
-public class MyDownLoader extends AbstractDownloader{
-	 private Logger logger = LoggerFactory.getLogger(getClass());
-
-	    private final Map<String, CloseableHttpClient> httpClients = new HashMap<String, CloseableHttpClient>();
-
-	    private HttpClientGenerator httpClientGenerator = new HttpClientGenerator();
-
-	    private CloseableHttpClient getHttpClient(Site site, Proxy proxy) {
-	        if (site == null) {
-	            return httpClientGenerator.getClient(null, proxy);
-	        }
-	        
-	        String domain = site.getDomain();
-	        CloseableHttpClient httpClient = httpClients.get(domain);
-	        if (httpClient == null) {
-	            synchronized (this) {
-	                httpClient = httpClients.get(domain);
-	                if (httpClient == null) {
-	                    httpClient = httpClientGenerator.getClient(site, proxy);
-	                    httpClients.put(domain, httpClient);
-	                }
-	            }
-	        }
-	        return httpClient;
-	    }
-
-	    @Override
-	    public Page download(Request request, Task task){
-	        Site site = null;
-	        if (task != null) {
-	            site = task.getSite();
-	        }
-	        Set<Integer> acceptStatCode;
-	        String charset = null;
-	        Map<String, String> headers = null;
-	        if (site != null) {
-	            acceptStatCode = site.getAcceptStatCode();
-	            charset = site.getCharset();
-	            headers = site.getHeaders();
-	        } else {
-	            acceptStatCode = WMCollections.newHashSet(200);
-	        }
-	        logger.info("downloading page {}", request.getUrl());
-	        CloseableHttpResponse httpResponse = null;
-	        int statusCode=0;
-	        try {
-	            HttpHost proxyHost = null;
-	            Proxy proxy = null; //TODO
-	            if (site.getHttpProxyPool() != null && site.getHttpProxyPool().isEnable()) {
-	                proxy = site.getHttpProxyFromPool();
-	                proxyHost = proxy.getHttpHost();
-	            } else if(site.getHttpProxy()!= null){
-	                proxyHost = site.getHttpProxy();
-	            }
-	            
-	            HttpUriRequest httpUriRequest = getHttpUriRequest(request, site, headers, proxyHost);//���������˴���
-	            httpResponse = getHttpClient(site, proxy).execute(httpUriRequest);//getHttpClient�������˴�����֤
-	            statusCode = httpResponse.getStatusLine().getStatusCode();
-	            request.putExtra(Request.STATUS_CODE, statusCode);
-	            if (statusAccept(acceptStatCode, statusCode)) {
-	                Page page = handleResponse(request, charset, httpResponse, task);
-	                onSuccess(request);
-	                return page;
-	            } else {
-	                logger.warn("get page {} error, status code {} ",request.getUrl(),statusCode);
-	                return null;
-	            }
-	        }catch (ConnectTimeoutException e ) {
-	            logger.warn("download page {} error", request.getUrl(), e);
-	            onError(request);
-	            Page page = new Page();
-	            page.setStatusCode(404);
-	            page.setUrl(new PlainText(request.getUrl()));
-	            page.setRawText(null);
-	            return page;
-	        }catch (UnknownHostException e ) {
-	            logger.warn("download page {} error", request.getUrl(), e);
-	            onError(request);
-	            Page page = new Page();
-	            page.setStatusCode(404);
-	            page.setUrl(new PlainText(request.getUrl()));
-	            page.setRawText(null);
-	            return page;
-	        }catch (IOException e ) {
-	            logger.warn("download page {} error", request.getUrl(), e);
-	            if (site.getCycleRetryTimes() > 0) {
-	                return addToCycleRetry(request, site);
-	            }
-	            onError(request);
-	            return null;
-	        } finally {
-	        	request.putExtra(Request.STATUS_CODE, statusCode);
-	            if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
-	                site.returnHttpProxyToPool((HttpHost) request.getExtra(Request.PROXY), (Integer) request
-	                        .getExtra(Request.STATUS_CODE));
-	            }
-	            try {
-	                if (httpResponse != null) {
-	                    //ensure the connection is released back to pool
-	                    EntityUtils.consume(httpResponse.getEntity());
-	                }
-	            } catch (IOException e) {
-	            	logger.warn("close response fail", e);
-	            }
-	        }
-	    }
-
-	    @Override
-	    public void setThread(int thread) {
-	        httpClientGenerator.setPoolSize(thread);
-	    }
-
-	    protected boolean statusAccept(Set<Integer> acceptStatCode, int statusCode) {
-	        return acceptStatCode.contains(statusCode);
-	    }
-
-	    protected HttpUriRequest getHttpUriRequest(Request request, Site site, Map<String, String> headers,HttpHost proxy) {
-	        RequestBuilder requestBuilder = selectRequestMethod(request).setUri(request.getUrl());
-	        if (headers != null) {
-	            for (Map.Entry<String, String> headerEntry : headers.entrySet()) {
-	                requestBuilder.addHeader(headerEntry.getKey(), headerEntry.getValue());
-	            }
-	        }
-	        @SuppressWarnings("deprecation")
-			RequestConfig.Builder requestConfigBuilder = RequestConfig.custom()
-	                .setConnectionRequestTimeout(site.getTimeOut())
-	                .setSocketTimeout(site.getTimeOut())
-	                .setConnectTimeout(site.getTimeOut())
-	                .setCookieSpec(CookieSpecs.BEST_MATCH);
-	        if (proxy !=null) {
-				requestConfigBuilder.setProxy(proxy);
-				request.putExtra(Request.PROXY, proxy);
-			}
-	        requestBuilder.setConfig(requestConfigBuilder.build());
-	        return requestBuilder.build();
-	    }
-
-	    protected RequestBuilder selectRequestMethod(Request request) {
-	        String method = request.getMethod();
-	        if (method == null || method.equalsIgnoreCase(HttpConstant.Method.GET)) {
-	            //default get
-	            return RequestBuilder.get();
-	        } else if (method.equalsIgnoreCase(HttpConstant.Method.POST)) {
-	            RequestBuilder requestBuilder = RequestBuilder.post();
-	            NameValuePair[] nameValuePair = (NameValuePair[]) request.getExtra("nameValuePair");
-	            if (nameValuePair != null && nameValuePair.length > 0) {
-	                requestBuilder.addParameters(nameValuePair);
-	            }
-	            return requestBuilder;
-	        } else if (method.equalsIgnoreCase(HttpConstant.Method.HEAD)) {
-	            return RequestBuilder.head();
-	        } else if (method.equalsIgnoreCase(HttpConstant.Method.PUT)) {
-	            return RequestBuilder.put();
-	        } else if (method.equalsIgnoreCase(HttpConstant.Method.DELETE)) {
-	            return RequestBuilder.delete();
-	        } else if (method.equalsIgnoreCase(HttpConstant.Method.TRACE)) {
-	            return RequestBuilder.trace();
-	        }
-	        throw new IllegalArgumentException("Illegal HTTP Method " + method);
-	    }
-
-	    protected Page handleResponse(Request request, String charset, HttpResponse httpResponse, Task task) throws IOException {
-	        String content = getContent(charset, httpResponse);
-	        
-	        Page page = new Page();
-	        page.setRawText(content);
-	        page.setUrl(new PlainText(request.getUrl()));
-	        page.setRequest(request);
-	        page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
-	        return page;
-	    }
-
-	    protected String getContent(String charset, HttpResponse httpResponse) throws IOException {
-	        if (charset == null) {
-	            byte[] contentBytes = IOUtils.toByteArray(httpResponse.getEntity().getContent());
-	            
-	            String htmlCharset = getHtmlCharset(httpResponse, contentBytes);
-	            if (htmlCharset != null) {
-	                return new String(contentBytes, htmlCharset);
-	            } else {
-	                logger.warn("Charset autodetect failed, use {} as charset. Please specify charset in Site.setCharset()", Charset.defaultCharset());
-	                return new String(contentBytes);
-	            }
-	        } else {
-	            return IOUtils.toString(httpResponse.getEntity().getContent(), charset);
-	        }
-	    }
-
-	    protected String getHtmlCharset(HttpResponse httpResponse, byte[] contentBytes) throws IOException {
-	        String charset;
-	        // charset
-	        // 1、encoding in http header Content-Type
-	        String value = httpResponse.getEntity().getContentType().getValue();
-	        charset = UrlUtils.getCharset(value);
-	        if (StringUtils.isNotBlank(charset)) {
-	            logger.debug("Auto get charset: {}", charset);
-	            return charset;
-	        }
-	        // use default charset to decode first time
-	        Charset defaultCharset = Charset.defaultCharset();
-	        String content = new String(contentBytes, defaultCharset.name());
-	        // 2、charset in meta
-	        if (StringUtils.isNotEmpty(content)) {
-	            Document document = Jsoup.parse(content);
-	            Elements links = document.select("meta");
-	            for (Element link : links) {
-	                // 2.1、html4.01 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
-	                String metaContent = link.attr("content");
-	                String metaCharset = link.attr("charset");
-	                if (metaContent.indexOf("charset") != -1) {
-	                    metaContent = metaContent.substring(metaContent.indexOf("charset"), metaContent.length());
-	                    charset = metaContent.split("=")[1];
-	                    break;
-	                }
-	                // 2.2、html5 <meta charset="UTF-8" />
-	                else if (StringUtils.isNotEmpty(metaCharset)) {
-	                    charset = metaCharset;
-	                    break;
-	                }
-	            }
-	        }
-	        logger.debug("Auto get charset: {}", charset);
-	        // 3、todo use tools as cpdetector for content decode
-	        return charset;
-	    }
-}
--- a/src/main/java/com/zhiwei/source_forward/pipeline/DataPipeline.java
+++ b/src/main/java/com/zhiwei/source_forward/pipeline/DataPipeline.java
-package com.zhiwei.source_forward.pipeline;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import us.codecraft.webmagic.ResultItems;
-import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.pipeline.Pipeline;
-/**
- * @ClassName: ContentDataPipeline 
- * @Description: 存储文章位置
- * @author hero 
- * @date 2018年6月30日 上午9:54:27
- */
-public class DataPipeline implements Pipeline {
-    private List<Map<String, Object>> contentDataList = new ArrayList<Map<String, Object>>();
-    private List<Map<String, Object>> mediaSelfDataList = new ArrayList<Map<String, Object>>();
-    private List<Map<String, Object>> sourceForwardDataList = new ArrayList<Map<String, Object>>();
-    private List<Map<String, Object>> urlLivedataList = new ArrayList<Map<String, Object>>();
-
-	
-	public DataPipeline() {
-		super();
-	}
-
-	
-	@Override
-	public void process(ResultItems resultItems, Task task) {
-		Map<String, Object> contentData = resultItems.get("content");
-		Map<String, Object> mediaSelfData = resultItems.get("mediaSelf");
-		Map<String, Object> sourceForwardData = resultItems.get("sourceForward");
-		Map<String, Object> urlLivedata = resultItems.get("urlLive");
-		if (contentData != null) {
-			contentDataList.add(contentData);
-		}
-		if (mediaSelfData != null) {
-			mediaSelfDataList.add(mediaSelfData);
-		}
-		if (sourceForwardData != null) {
-			sourceForwardDataList.add(sourceForwardData);
-		}
-		if (urlLivedata != null) {
-			urlLivedataList.add(urlLivedata);
-		}
-	}
-	
-	public List<Map<String, Object>> getContentDataList() {
-		return contentDataList;
-	}
-
-
-	public void setContentDataList(List<Map<String, Object>> contentDataList) {
-		this.contentDataList = contentDataList;
-	}
-
-
-	public List<Map<String, Object>> getMediaSelfDataList() {
-		return mediaSelfDataList;
-	}
-
-
-	public void setMediaSelfDataList(List<Map<String, Object>> mediaSelfDataList) {
-		this.mediaSelfDataList = mediaSelfDataList;
-	}
-
-
-	public List<Map<String, Object>> getSourceForwardDataList() {
-		return sourceForwardDataList;
-	}
-
-
-	public void setSourceForwardDataList(List<Map<String, Object>> sourceForwardDataList) {
-		this.sourceForwardDataList = sourceForwardDataList;
-	}
-
-	public List<Map<String, Object>> getUrlLivedataList() {
-		return urlLivedataList;
-	}
-
-
-	public void setUrlLivedataList(List<Map<String, Object>> urlLivedataList) {
-		this.urlLivedataList = urlLivedataList;
-	}
-
-
-
-
-	
-
-}
--- a/src/main/java/com/zhiwei/source_forward/run/ContentMatch.java
+++ b/src/main/java/com/zhiwei/source_forward/run/ContentMatch.java
 package com.zhiwei.source_forward.run;

+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;

-import com.zhiwei.source_forward.crawler.ContentPageProcessor;
-import com.zhiwei.source_forward.downloader.MyDownLoader;
-import com.zhiwei.source_forward.pipeline.DataPipeline;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;

-import us.codecraft.webmagic.Spider;
+import com.zhiwei.source_forward.bean.ContentBean;
+import com.zhiwei.source_forward.bean.ContentBean.Attribution;
+import com.zhiwei.source_forward.crawler.ContentCrawler;
+import com.zhiwei.source_forward.util.ContentDataCallback;

 public class ContentMatch {
-	/**
-	 * @Title: getSourceForward 
-	 * @author hero 
-	 * @Description: 验证文章是否转发
-	 * @param @param dataMap
-	 * @param @return 设定文件 
-	 * @return Map<String,Map<String,Object>> 返回类型
-	 */
-	public static Map<String,Map<String,Object>> getContent(Map<String,Map<String,Object>> dataMap){
-		//启动验证来源程序
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new ContentPageProcessor());
-		for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
-			spider.addUrl(entry.getKey());
+    
+    
+    private static Logger logger = LogManager.getLogger(ContentMatch.class);
+    
+    public static List<ContentBean> getContentMatch(List<String> urlList){
+        //启动获取链接来源
+        List<ContentBean> dataList = ContentMatchCrawlerThread.getContentMatch(urlList);
+        return dataList;
    }
-		spider.setDownloader(new MyDownLoader());
-		spider.addPipeline(pipeline);
-		spider.thread(5).run();
-		
-		List<Map<String,Object>> contentList = pipeline.getContentDataList();
-		for(Map<String,Object> contentMap : contentList){
-			String url = contentMap.get("url")+"";
-			//搜集原文
-			if(dataMap.containsKey(url)){
-				Map<String,Object> data = dataMap.get(url);
-				String content = contentMap.get("content")+"";
-				data.put("content", content);
-				dataMap.put(url, data);
+    
+    public static void main(String[] args) {
+        List<String> urlList = new ArrayList<>();
+        urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
+        urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
+        urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
+        urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
+        urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
+        urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
+        urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
+        urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
+        urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
+        urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
+        urlList.add("https://www.toutiao.com/i6573922350037729796/");
+        urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
+        urlList.add("https://www.toutiao.com/a6573774143949373956/");
+        List<ContentBean> da = ContentMatch.getContentMatch(urlList);
+        for(ContentBean sfb : da) {
+            System.out.println(sfb.toString());
        }
    }
-		return dataMap;
+    
+    static class ContentMatchCrawlerThread extends Thread{
+
+      private static List<ContentBean> getContentMatch(List<String> urlList){
+          try{
+              ContentCrawler crawler = new ContentCrawler();
+              List<ContentBean> list = Collections.synchronizedList(new ArrayList<ContentBean>());
+              ContentDataCallback callback = new ContentDataCallback() {
+
+                    @Override
+                    public void onData(ContentBean data, Attribution attr) {
+                        list.add(data);
+                        logger.info("列表大小：：：{}",list.size());
                    }
+                    
+              };
+              crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
+              return list;
+          }catch (Exception e){
+              e.fillInStackTrace();
+          }
+          return null;
+      }
+  }
+    
+    
 }
--- a/src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+++ b/src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+package com.zhiwei.source_forward.run;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
+import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
+import com.zhiwei.source_forward.util.MediaSelfSourceDataCallBack;
+
+public class MediaSelfSource {
+    
+    private static Logger logger = LogManager.getLogger(MediaSelfSource.class);
+    
+    public static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList) {
+        List<MediaSelfSourceBean> list = MediaSelfSourceCrawlerThread.getMediaSelfSource(urlList);
+        return list;
+    }
+    
+    public static void main(String[] args) {
+        List<String> urlList = new ArrayList<>();
+        urlList.add("https://baijiahao.baidu.com/s?id=1606950814338460255&wfr=spider&for=pc&qq-pf-to=pcqq.c2c");
+        List<MediaSelfSourceBean> da = MediaSelfSource.getMediaSelfSource(urlList);
+        for(MediaSelfSourceBean mssb : da) {
+            System.out.println(mssb.toString());
+        }
+    }
+    
+    static class MediaSelfSourceCrawlerThread extends Thread{
+
+        private static List<MediaSelfSourceBean> getMediaSelfSource(List<String> urlList){
+            try{
+                MediaSelfSourceCrawler crawler = new MediaSelfSourceCrawler();
+                List<MediaSelfSourceBean> list = Collections.synchronizedList(new ArrayList<MediaSelfSourceBean>());
+                MediaSelfSourceDataCallBack callback = new MediaSelfSourceDataCallBack() {
+
+                      @Override
+                      public void onData(MediaSelfSourceBean data, Attribution attr) {
+                          list.add(data);
+                          logger.info("列表大小：：：{}",list.size());
+                      }
+                      
+                };
+                crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
+                return list;
+            }catch (Exception e){
+                e.fillInStackTrace();
+            }
+            return null;
+        }
+    }
+       
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+++ b/src/main/java/com/zhiwei/source_forward/run/SourceForward.java
 package com.zhiwei.source_forward.run;

-import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;

-import com.zhiwei.source_forward.crawler.MediaSelfSourcePageProcessor;
-import com.zhiwei.source_forward.crawler.SourceForwardPageProcessor;
-import com.zhiwei.source_forward.downloader.MyDownLoader;
-import com.zhiwei.source_forward.pipeline.DataPipeline;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;

-import us.codecraft.webmagic.Spider;
+import com.zhiwei.source_forward.bean.SourceForwardBean;
+import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
+import com.zhiwei.source_forward.crawler.SourceForwardCrawler;
+import com.zhiwei.source_forward.util.SourceForwardDataCallBack;

 /**
 * @ClassName: SourceForward 
@@ -20,149 +20,57 @@ import us.codecraft.webmagic.Spider;
 */
 public class SourceForward {
 	
-	/**
-	 * @Title: getSourceForward 
-	 * @author hero 
-	 * @Description: 验证文章是否转发
-	 * @param @param dataMap
-	 * @param @return 设定文件 
-	 * @return Map<String,Map<String,Object>> 返回类型
-	 */
-	public static Map<String,Map<String,Object>> getSourceForward(Map<String,Map<String,Object>> dataMap){
-		//启动验证来源程序
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new SourceForwardPageProcessor());
-		for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
-			spider.addUrl(entry.getKey());
-		}
-		spider.setDownloader(new MyDownLoader());
-		spider.addPipeline(pipeline);
-		spider.thread(5).run();
+    private static Logger logger = LogManager.getLogger(SourceForward.class);
    
-		List<Map<String,Object>> sourceForwardList = pipeline.getSourceForwardDataList();
-		for(Map<String,Object> sourceMap : sourceForwardList){
-			String url = sourceMap.get("url")+"";
-			String root_source = sourceMap.get("root_source")!=null?sourceMap.get("root_source").toString():null;
-			String isForwardWX = sourceMap.get("isforward")!=null?sourceMap.get("isforward").toString():null;
-			String channel = sourceMap.get("channel")+"";
-			//整合数据及验证转发原创
-			if(dataMap.containsKey(url)){
-				Map<String,Object> data = dataMap.get(url);
-				String source = data.get("来源")+"";
-				String isForward = "转发";
-				if(root_source == null){
-					isForward = "原创";
-				}else if(root_source.toUpperCase().trim().equals(source.toUpperCase().trim())){
-					isForward = "原创";
+	public static List<SourceForwardBean> getSourceForward(List<String> urlList){
+        //启动获取链接来源
+        List<SourceForwardBean> dataList = SourceForwardCrawlerThread.getSourceForward(urlList);
+        return dataList;
    }
    
-				if(url.contains("mp.weixin.qq.com")){
-					isForward = isForwardWX;
-				}else{
-					data.put("原来源", root_source);
-					data.put("频道", channel);
+	public static void main(String[] args) {
+        List<String> urlList = new ArrayList<>();
+        urlList.add("http://sh.qihoo.com/pc/99493b3bf136d8e20?sign=360_e39369d1");
+        urlList.add("http://news.ctocio.com.cn/383/14543883.shtml");
+        urlList.add("http://www.jn001.com/news/2018-07/05/content_561091.htm");
+        urlList.add("http://www.ca800.com/fFa8D/bOTUBC1QfF/40944.aspx");
+        urlList.add("http://sh.qihoo.com/pc/988470164f6c5ca14?sign=360_e39369d1");
+        urlList.add("http://news.jstv.com/a/20180705/1530731642686.shtml?jsbcApp=1");
+        urlList.add("https://tech.sina.cn/i/gn/2018-07-05/detail-ihexfcvi8155439.d.html?pos=18");
+        urlList.add("http://sh.qihoo.com/pc/983b3d157f91af18b?sign=360_e39369d1");
+        urlList.add("http://china.rednet.cn/c/2018/07/05/4671927.htm");
+        urlList.add("http://news.enorth.com.cn/system/2018/07/05/035782857.shtml");
+        urlList.add("https://www.toutiao.com/i6573922350037729796/");
+        urlList.add("http://news.cnhubei.com/xw/sh/201807/t4132048.shtml");
+        urlList.add("https://www.toutiao.com/a6573774143949373956/");
+        List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
+        for(SourceForwardBean sfb : da) {
+            System.out.println(sfb.toString());
        }
-				
-				data.put("是否转发", isForward);
-				dataMap.put(url, data);
-			}
-		}
-		
-		return dataMap;
    }
 	
+    static class SourceForwardCrawlerThread extends Thread{

+      private static List<SourceForwardBean> getSourceForward(List<String> urlList){
+          try{
+              SourceForwardCrawler crawler = new SourceForwardCrawler();
+              List<SourceForwardBean> list = Collections.synchronizedList(new ArrayList<SourceForwardBean>());
+              SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {

-	
-	/**
-	 * @Title: getMediaSelfSource 
-	 * @author hero 
-	 * @Description: 根据链接匹配自媒体号名称 
-	 * @param @param dataMap
-	 * @param @return 设定文件 
-	 * @return Map<String,Map<String,Object>> 返回类型
-	 */
-	public static Map<String,Map<String,Object>> getMediaSelfSource(Map<String,Map<String,Object>> dataMap){
-		//启动验证来源程序
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new MediaSelfSourcePageProcessor());
-		for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
-			spider.addUrl(entry.getKey());
+                    @Override
+                    public void onData(SourceForwardBean data, Attribution attr) {
+                        list.add(data);
+                        logger.info("列表大小：：：{}",list.size());
                    }
-		spider.setDownloader(new MyDownLoader());
-		spider.addPipeline(pipeline);
-		spider.thread(5).run();
                    
-		List<Map<String,Object>> sourceForwardList = pipeline.getMediaSelfDataList();
-		for(Map<String,Object> sourceMap : sourceForwardList){
-			String url = sourceMap.get("url")+"";
-			//整合数据及验证转发原创
-			if(dataMap.containsKey(url)){
-				Map<String,Object> data = dataMap.get(url);
-				data.put("自媒体号", sourceMap.get("mediaself"));
-				data.put("频道", sourceMap.get("channel"));
-				dataMap.put(url, data);
-			}
-		}
-		return dataMap;
-	}
-	
-	/**
-	 * @Title: getMediaSelfSource 
-	 * @author hero 
-	 * @Description: 根据链接匹配自媒体账号
-	 * @param @param urlList
-	 * @param @return 设定文件 
-	 * @return Map<String,String> 返回类型
-	 */
-	public static Map<String,String> getMediaSelfSource(List<String> urlList){
-		//启动验证来源程序
-		Map<String,String> dataMap = new HashMap<String,String>();
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new MediaSelfSourcePageProcessor());
-		for(String url : urlList){
-			spider.addUrl(url);
-			dataMap.put(url, null);
-		}
-		spider.setDownloader(new MyDownLoader());
-		spider.addPipeline(pipeline);
-		spider.thread(5).run();
-		
-		List<Map<String,Object>> sourceForwardList = pipeline.getMediaSelfDataList();
-		for(Map<String,Object> sourceMap : sourceForwardList){
-			String url = sourceMap.get("url")+"";
-			//整合数据及验证转发原创
-			if(dataMap.containsKey(url)){
-				dataMap.put(url, sourceMap.get("mediaself").toString());
-			}
-		}
-		return dataMap;
-	}
-	
-	
-	/**
-	 * 
-	 * @Title: getMediaSelfSource 
-	 * @author hero 
-	 * @Description: 根据链接匹配自媒体账号
-	 * @param @param url
-	 * @param @return 设定文件 
-	 * @return String 返回类型
-	 */
-	public static String getMediaSelfSource(String url){
-		//启动验证来源程序
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new MediaSelfSourcePageProcessor());
-		spider.addUrl(url);
-		spider.setDownloader(new MyDownLoader());
-		spider.addPipeline(pipeline);
-		spider.thread(1).run();
-		
-		List<Map<String,Object>> sourceForwardList = pipeline.getMediaSelfDataList();
-		for(Map<String,Object> sourceMap : sourceForwardList){
-			return sourceMap.get("mediaself").toString();
+              };
+              crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
+              return list;
+          }catch (Exception e){
+              e.fillInStackTrace();
          }
          return null;
      }
+  }
 	
 }
--- a/src/main/java/com/zhiwei/source_forward/run/URLLive.java
+++ b/src/main/java/com/zhiwei/source_forward/run/URLLive.java
 package com.zhiwei.source_forward.run;

+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;

-import com.zhiwei.source_forward.crawler.UrlLivePageProcessor;
-import com.zhiwei.source_forward.pipeline.DataPipeline;
-
-import us.codecraft.webmagic.Spider;
+import com.zhiwei.source_forward.bean.UrlLiveBean;
+import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
+import com.zhiwei.source_forward.crawler.UrlLiveCrawler;
+import com.zhiwei.source_forward.util.UrlLiveDataCallback;

 /**
 * @ClassName: URLLive 
@@ -17,7 +19,6 @@ import us.codecraft.webmagic.Spider;
 */
 public class URLLive {
    
-	
    /**
     * @Title: verificationURLLive 
     * @author hero 
@@ -27,35 +28,60 @@ public class URLLive {
     * @return Map<String,Map<String,Object>> 返回类型
     */
    public static Map<String,Map<String,Object>> verificationURLLive(Map<String,Map<String,Object>> dataMap){
+        List<String> urlList = new ArrayList<>();
        //启动验证链接是否有效程序程序
-		DataPipeline pipeline = new DataPipeline();
-		Spider spider = Spider.create(new UrlLivePageProcessor());
        for(Entry<String,Map<String,Object>> entry : dataMap.entrySet()){
-			spider.addUrl(entry.getKey());
+            urlList.add(entry.getKey());
        }
-		spider.addPipeline(pipeline);
-		spider.thread(5).run();
        
        //验证数据是否已删除
-		List<Map<String,Object>> dataList = pipeline.getUrlLivedataList();
-		for(Map<String,Object> data : dataList){
-			String url = data.get("url")+"";
+        List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
+        for(UrlLiveBean ub : dataList){
+            String url = ub.getUrl();
            if(!url.contains("http")){
                url = "http://"+url;
            }
            if(!url.contains("www")){
                url = url.replace("://", "://www.");
            }
-			boolean live = (boolean)data.get("live");
+            boolean live = ub.isLive();
            if(dataMap.containsKey(url)){
                Map<String,Object> map = dataMap.get(url);
                map.put("是否删除", live);
                dataMap.put(url, map);
            }
-			
        }
        return dataMap;
    }

+	public static List<UrlLiveBean> verificationURLLive(List<String> urlList){
+        //启动验证链接是否有效程序程序
+	    List<UrlLiveBean> dataList = UrlLiveCrawlerThread.getUrlLiveCrawle(urlList);
+	    return dataList;
+    }
+	
+	static class UrlLiveCrawlerThread extends Thread{
+
+      private static List<UrlLiveBean> getUrlLiveCrawle(List<String> urlList){
+          try{
+              UrlLiveCrawler crawler = new UrlLiveCrawler();
+              List<UrlLiveBean> list = Collections.synchronizedList(new ArrayList<UrlLiveBean>());
+              UrlLiveDataCallback callback = new UrlLiveDataCallback() {
+
+                    @Override
+                    public void onData(UrlLiveBean data, Attribution attr) {
+                        list.add(data);
+                        System.out.println("列表大小：：："+list.size());
+                    }
+                    
+              };
+              crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
+              return list;
+          }catch (Exception e){
+              e.fillInStackTrace();
+          }
+          return null;
+      }
+  }
 	
 }
--- a/src/main/java/com/zhiwei/source_forward/spider/MySpider.java
+++ b/src/main/java/com/zhiwei/source_forward/spider/MySpider.java
-package com.zhiwei.source_forward.spider;
-
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Date;
-import java.util.List;
-import java.util.UUID;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.ReentrantLock;
-import org.apache.commons.collections.CollectionUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import us.codecraft.webmagic.Page;
-import us.codecraft.webmagic.Request;
-import us.codecraft.webmagic.Site;
-import us.codecraft.webmagic.SpiderListener;
-import us.codecraft.webmagic.Task;
-import us.codecraft.webmagic.downloader.Downloader;
-import us.codecraft.webmagic.downloader.HttpClientDownloader;
-import us.codecraft.webmagic.pipeline.CollectorPipeline;
-import us.codecraft.webmagic.pipeline.ConsolePipeline;
-import us.codecraft.webmagic.pipeline.Pipeline;
-import us.codecraft.webmagic.pipeline.ResultItemsCollectorPipeline;
-import us.codecraft.webmagic.processor.PageProcessor;
-import us.codecraft.webmagic.scheduler.QueueScheduler;
-import us.codecraft.webmagic.scheduler.Scheduler;
-import us.codecraft.webmagic.thread.CountableThreadPool;
-import us.codecraft.webmagic.utils.UrlUtils;
-import us.codecraft.webmagic.utils.WMCollections;
-
-public class MySpider implements Runnable, Task {
-
-	    protected Downloader downloader;
-
-	    protected List<Pipeline> pipelines = new ArrayList<Pipeline>();
-
-	    protected PageProcessor pageProcessor;
-
-	    protected List<Request> startRequests;
-
-	    protected Site site;
-
-	    protected String uuid;
-
-	    protected Scheduler scheduler = new QueueScheduler();
-
-	    protected Logger logger = LoggerFactory.getLogger(getClass());
-
-	    protected CountableThreadPool threadPool;
-
-	    protected ExecutorService executorService;
-
-	    protected int threadNum = 1;
-
-	    protected AtomicInteger stat = new AtomicInteger(STAT_INIT);
-
-	    protected boolean exitWhenComplete = true;
-
-	    protected final static int STAT_INIT = 0;
-
-	    protected final static int STAT_RUNNING = 1;
-
-	    protected final static int STAT_STOPPED = 2;
-
-	    protected boolean spawnUrl = true;
-
-	    protected boolean destroyWhenExit = true;
-
-	    private ReentrantLock newUrlLock = new ReentrantLock();
-
-	    private Condition newUrlCondition = newUrlLock.newCondition();
-
-	    private List<SpiderListener> spiderListeners;
-
-	    private final AtomicLong pageCount = new AtomicLong(0);
-
-	    private Date startTime;
-
-	    private int emptySleepTime = 30000;
-
-	    /**
-	     * create a spider with pageProcessor.
-	     *
-	     * @param pageProcessor pageProcessor
-	     * @return new spider
-	     * @see PageProcessor
-	     */
-	    public static MySpider create(PageProcessor pageProcessor) {
-	        return new MySpider(pageProcessor);
-	    }
-
-	    /**
-	     * create a spider with pageProcessor.
-	     *
-	     * @param pageProcessor pageProcessor
-	     */
-	    public MySpider(PageProcessor pageProcessor) {
-	        this.pageProcessor = pageProcessor;
-	        this.site = pageProcessor.getSite();
-	        this.startRequests = pageProcessor.getSite().getStartRequests();
-	    }
-
-	    /**
-	     * Set startUrls of Spider.<br>
-	     * Prior to startUrls of Site.
-	     *
-	     * @param startUrls startUrls
-	     * @return this
-	     */
-	    public MySpider startUrls(List<String> startUrls) {
-	        checkIfRunning();
-	        this.startRequests = UrlUtils.convertToRequests(startUrls);
-	        return this;
-	    }
-
-	    /**
-	     * Set startUrls of Spider.<br>
-	     * Prior to startUrls of Site.
-	     *
-	     * @param startRequests startRequests
-	     * @return this
-	     */
-	    public MySpider startRequest(List<Request> startRequests) {
-	        checkIfRunning();
-	        this.startRequests = startRequests;
-	        return this;
-	    }
-
-	    /**
-	     * Set an uuid for spider.<br>
-	     * Default uuid is domain of site.<br>
-	     *
-	     * @param uuid uuid
-	     * @return this
-	     */
-	    public MySpider setUUID(String uuid) {
-	        this.uuid = uuid;
-	        return this;
-	    }
-
-	    /**
-	     * set scheduler for Spider
-	     *
-	     * @param scheduler scheduler
-	     * @return this
-	     * @see #setScheduler(us.codecraft.webmagic.scheduler.Scheduler)
-	     */
-	    @Deprecated
-	    public MySpider scheduler(Scheduler scheduler) {
-	        return setScheduler(scheduler);
-	    }
-
-	    /**
-	     * set scheduler for Spider
-	     *
-	     * @param scheduler scheduler
-	     * @return this
-	     * @see Scheduler
-	     * @since 0.2.1
-	     */
-	    public MySpider setScheduler(Scheduler scheduler) {
-	        checkIfRunning();
-	        Scheduler oldScheduler = this.scheduler;
-	        this.scheduler = scheduler;
-	        if (oldScheduler != null) {
-	            Request request;
-	            while ((request = oldScheduler.poll(this)) != null) {
-	                this.scheduler.push(request, this);
-	            }
-	        }
-	        return this;
-	    }
-
-	    /**
-	     * add a pipeline for Spider
-	     *
-	     * @param pipeline pipeline
-	     * @return this
-	     * @see #addPipeline(us.codecraft.webmagic.pipeline.Pipeline)
-	     * @deprecated
-	     */
-	    public MySpider pipeline(Pipeline pipeline) {
-	        return addPipeline(pipeline);
-	    }
-
-	    /**
-	     * add a pipeline for Spider
-	     *
-	     * @param pipeline pipeline
-	     * @return this
-	     * @see Pipeline
-	     * @since 0.2.1
-	     */
-	    public MySpider addPipeline(Pipeline pipeline) {
-	        checkIfRunning();
-	        this.pipelines.add(pipeline);
-	        return this;
-	    }
-
-	    /**
-	     * set pipelines for Spider
-	     *
-	     * @param pipelines pipelines
-	     * @return this
-	     * @see Pipeline
-	     * @since 0.4.1
-	     */
-	    public MySpider setPipelines(List<Pipeline> pipelines) {
-	        checkIfRunning();
-	        this.pipelines = pipelines;
-	        return this;
-	    }
-
-	    /**
-	     * clear the pipelines set
-	     *
-	     * @return this
-	     */
-	    public MySpider clearPipeline() {
-	        pipelines = new ArrayList<Pipeline>();
-	        return this;
-	    }
-
-	    /**
-	     * set the downloader of spider
-	     *
-	     * @param downloader downloader
-	     * @return this
-	     * @see #setDownloader(us.codecraft.webmagic.downloader.Downloader)
-	     * @deprecated
-	     */
-	    public MySpider downloader(Downloader downloader) {
-	        return setDownloader(downloader);
-	    }
-
-	    /**
-	     * set the downloader of spider
-	     *
-	     * @param downloader downloader
-	     * @return this
-	     * @see Downloader
-	     */
-	    public MySpider setDownloader(Downloader downloader) {
-	        checkIfRunning();
-	        this.downloader = downloader;
-	        return this;
-	    }
-
-	    protected void initComponent() {
-	        if (downloader == null) {
-	            this.downloader = new HttpClientDownloader();
-	        }
-	        if (pipelines.isEmpty()) {
-	            pipelines.add(new ConsolePipeline());
-	        }
-	        downloader.setThread(threadNum);
-	        if (threadPool == null || threadPool.isShutdown()) {
-	            if (executorService != null && !executorService.isShutdown()) {
-	                threadPool = new CountableThreadPool(threadNum, executorService);
-	            } else {
-	                threadPool = new CountableThreadPool(threadNum);
-	            }
-	        }
-	        if (startRequests != null) {
-	            for (Request request : startRequests) {
-	                addRequest(request);
-	            }
-	            startRequests.clear();
-	        }
-	        startTime = new Date();
-	    }
-
-	    @Override
-	    public void run() {
-	        checkRunningStat();
-	        initComponent();
-	        logger.info("Spider " + getUUID() + " started!");
-	        while (!Thread.currentThread().isInterrupted() && stat.get() == STAT_RUNNING) {
-	            Request request = scheduler.poll(this);
-	            if (request == null) {
-	                if (threadPool.getThreadAlive() == 0 && exitWhenComplete) {
-	                    break;
-	                }
-	                // wait until new url added
-	                waitNewUrl();
-	            } else {
-	                final Request requestFinal = request;
-	                threadPool.execute(new Runnable() {
-	                    @Override
-	                    public void run() {
-	                        try {
-	                            processRequest(requestFinal);
-	                            onSuccess(requestFinal);
-	                        } catch (Exception e) {
-	                            onError(requestFinal);
-	                            logger.error("process request " + requestFinal + " error", e);
-	                        } finally {
-	                            pageCount.incrementAndGet();
-	                            signalNewUrl();
-	                        }
-	                    }
-	                });
-	            }
-	        }
-	        stat.set(STAT_STOPPED);
-	        // release some resources
-	        if (destroyWhenExit) {
-	            close();
-	        }
-	    }
-
-	    protected void onError(Request request) {
-	        if (CollectionUtils.isNotEmpty(spiderListeners)) {
-	            for (SpiderListener spiderListener : spiderListeners) {
-	                spiderListener.onError(request);
-	            }
-	        }
-	    }
-
-	    protected void onSuccess(Request request) {
-	        if (CollectionUtils.isNotEmpty(spiderListeners)) {
-	            for (SpiderListener spiderListener : spiderListeners) {
-	                spiderListener.onSuccess(request);
-	            }
-	        }
-	    }
-
-	    private void checkRunningStat() {
-	        while (true) {
-	            int statNow = stat.get();
-	            if (statNow == STAT_RUNNING) {
-	                throw new IllegalStateException("Spider is already running!");
-	            }
-	            if (stat.compareAndSet(statNow, STAT_RUNNING)) {
-	                break;
-	            }
-	        }
-	    }
-
-	    public void close() {
-	        destroyEach(downloader);
-	        destroyEach(pageProcessor);
-	        destroyEach(scheduler);
-	        for (Pipeline pipeline : pipelines) {
-	            destroyEach(pipeline);
-	        }
-	        threadPool.shutdown();
-	    }
-
-	    private void destroyEach(Object object) {
-	        if (object instanceof Closeable) {
-	            try {
-	                ((Closeable) object).close();
-	            } catch (IOException e) {
-	                e.printStackTrace();
-	            }
-	        }
-	    }
-
-	    /**
-	     * Process specific urls without url discovering.
-	     *
-	     * @param urls urls to process
-	     */
-	    public void test(String... urls) {
-	        initComponent();
-	        if (urls.length > 0) {
-	            for (String url : urls) {
-	                processRequest(new Request(url));
-	            }
-	        }
-	    }
-
-	    protected void processRequest(Request request) {
-	        Page page = downloader.download(request, this);
-	        if (page == null) {
-	            sleep(site.getSleepTime());
-	            onError(request);
-	            return;
-	        }
-	        // for cycle retry
-	        if (page.isNeedCycleRetry()) {
-	            extractAndAddRequests(page, true);
-	            sleep(site.getRetrySleepTime());
-	            return;
-	        }
-	        pageProcessor.process(page);
-	        extractAndAddRequests(page, spawnUrl);
-	        if (!page.getResultItems().isSkip()) {
-	            for (Pipeline pipeline : pipelines) {
-	                pipeline.process(page.getResultItems(), this);
-	            }
-	        }
-	        //for proxy status management
-	        request.putExtra(Request.STATUS_CODE, page.getStatusCode());
-	        sleep(site.getSleepTime());
-	    }
-
-	    protected void sleep(int time) {
-	        try {
-	            Thread.sleep(time);
-	        } catch (InterruptedException e) {
-	            e.printStackTrace();
-	        }
-	    }
-
-	    protected void extractAndAddRequests(Page page, boolean spawnUrl) {
-	        if (spawnUrl && CollectionUtils.isNotEmpty(page.getTargetRequests())) {
-	            for (Request request : page.getTargetRequests()) {
-	                addRequest(request);
-	            }
-	        }
-	    }
-
-	    private void addRequest(Request request) {
-	        if (site.getDomain() == null && request != null && request.getUrl() != null) {
-	            site.setDomain(UrlUtils.getDomain(request.getUrl()));
-	        }
-	        scheduler.push(request, this);
-	    }
-
-	    protected void checkIfRunning() {
-	        if (stat.get() == STAT_RUNNING) {
-	            throw new IllegalStateException("Spider is already running!");
-	        }
-	    }
-
-	    public void runAsync() {
-	        Thread thread = new Thread(this);
-	        thread.setDaemon(false);
-	        thread.start();
-	    }
-
-	    /**
-	     * Add urls to crawl. <br>
-	     *
-	     * @param urls urls
-	     * @return this
-	     */
-	    public MySpider addUrl(String... urls) {
-	        for (String url : urls) {
-	            addRequest(new Request(url));
-	        }
-	        signalNewUrl();
-	        return this;
-	    }
-
-	    /**
-	     * Download urls synchronizing.
-	     *
-	     * @param urls urls
-	     * @return list downloaded
-	     */
-	    @SuppressWarnings({ "rawtypes", "unchecked" })
-	    public <T> List<T> getAll(Collection<String> urls) {
-	        destroyWhenExit = false;
-	        spawnUrl = false;
-	        startRequests.clear();
-	        for (Request request : UrlUtils.convertToRequests(urls)) {
-	            addRequest(request);
-	        }
-			CollectorPipeline collectorPipeline = getCollectorPipeline();
-	        pipelines.add(collectorPipeline);
-	        run();
-	        spawnUrl = true;
-	        destroyWhenExit = true;
-	        return collectorPipeline.getCollected();
-	    }
-
-	    @SuppressWarnings("rawtypes")
-		protected CollectorPipeline getCollectorPipeline() {
-	        return new ResultItemsCollectorPipeline();
-	    }
-
-		public <T> T get(String url) {
-	        List<String> urls = WMCollections.newArrayList(url);
-	        List<T> resultItemses = getAll(urls);
-	        if (resultItemses != null && resultItemses.size() > 0) {
-	            return resultItemses.get(0);
-	        } else {
-	            return null;
-	        }
-	    }
-
-	    /**
-	     * Add urls with information to crawl.<br>
-	     *
-	     * @param requests requests
-	     * @return this
-	     */
-	    public MySpider addRequest(Request... requests) {
-	        for (Request request : requests) {
-	            addRequest(request);
-	        }
-	        signalNewUrl();
-	        return this;
-	    }
-
-	    private void waitNewUrl() {
-	        newUrlLock.lock();
-	        try {
-	            //double check
-	            if (threadPool.getThreadAlive() == 0 && exitWhenComplete) {
-	                return;
-	            }
-	            newUrlCondition.await(emptySleepTime, TimeUnit.MILLISECONDS);
-	        } catch (InterruptedException e) {
-	            logger.warn("waitNewUrl - interrupted, error {}", e);
-	        } finally {
-	            newUrlLock.unlock();
-	        }
-	    }
-
-	    private void signalNewUrl() {
-	        try {
-	            newUrlLock.lock();
-	            newUrlCondition.signalAll();
-	        } finally {
-	            newUrlLock.unlock();
-	        }
-	    }
-
-	    public void start() {
-	        runAsync();
-	    }
-
-	    public void stop() {
-	        if (stat.compareAndSet(STAT_RUNNING, STAT_STOPPED)) {
-	            logger.info("Spider " + getUUID() + " stop success!");
-	        } else {
-	            logger.info("Spider " + getUUID() + " stop fail!");
-	        }
-	    }
-
-	    /**
-	     * start with more than one threads
-	     *
-	     * @param threadNum threadNum
-	     * @return this
-	     */
-	    public MySpider thread(int threadNum) {
-	        checkIfRunning();
-	        this.threadNum = threadNum;
-	        if (threadNum <= 0) {
-	            throw new IllegalArgumentException("threadNum should be more than one!");
-	        }
-	        return this;
-	    }
-
-	    /**
-	     * start with more than one threads
-	     *
-	     * @param executorService executorService to run the spider
-	     * @param threadNum threadNum
-	     * @return this
-	     */
-	    public MySpider thread(ExecutorService executorService, int threadNum) {
-	        checkIfRunning();
-	        this.threadNum = threadNum;
-	        if (threadNum <= 0) {
-	            throw new IllegalArgumentException("threadNum should be more than one!");
-	        }
-	        return this;
-	    }
-
-	    public boolean isExitWhenComplete() {
-	        return exitWhenComplete;
-	    }
-
-	    /**
-	     * Exit when complete. <br>
-	     * True: exit when all url of the site is downloaded. <br>
-	     * False: not exit until call stop() manually.<br>
-	     *
-	     * @param exitWhenComplete exitWhenComplete
-	     * @return this
-	     */
-	    public MySpider setExitWhenComplete(boolean exitWhenComplete) {
-	        this.exitWhenComplete = exitWhenComplete;
-	        return this;
-	    }
-
-	    public boolean isSpawnUrl() {
-	        return spawnUrl;
-	    }
-
-	    /**
-	     * Get page count downloaded by spider.
-	     *
-	     * @return total downloaded page count
-	     * @since 0.4.1
-	     */
-	    public long getPageCount() {
-	        return pageCount.get();
-	    }
-
-	    /**
-	     * Get running status by spider.
-	     *
-	     * @return running status
-	     * @see Status
-	     * @since 0.4.1
-	     */
-	    public Status getStatus() {
-	        return Status.fromValue(stat.get());
-	    }
-
-
-	    public enum Status {
-	        Init(0), Running(1), Stopped(2);
-
-	        private Status(int value) {
-	            this.value = value;
-	        }
-
-	        private int value;
-
-	        int getValue() {
-	            return value;
-	        }
-
-	        public static Status fromValue(int value) {
-	            for (Status status : Status.values()) {
-	                if (status.getValue() == value) {
-	                    return status;
-	                }
-	            }
-	            //default value
-	            return Init;
-	        }
-	    }
-
-	    /**
-	     * Get thread count which is running
-	     *
-	     * @return thread count which is running
-	     * @since 0.4.1
-	     */
-	    public int getThreadAlive() {
-	        if (threadPool == null) {
-	            return 0;
-	        }
-	        return threadPool.getThreadAlive();
-	    }
-
-	    /**
-	     * Whether add urls extracted to download.<br>
-	     * Add urls to download when it is true, and just download seed urls when it is false. <br>
-	     * DO NOT set it unless you know what it means!
-	     *
-	     * @param spawnUrl spawnUrl
-	     * @return this
-	     * @since 0.4.0
-	     */
-	    public MySpider setSpawnUrl(boolean spawnUrl) {
-	        this.spawnUrl = spawnUrl;
-	        return this;
-	    }
-
-	    @Override
-	    public String getUUID() {
-	        if (uuid != null) {
-	            return uuid;
-	        }
-	        if (site != null) {
-	            return site.getDomain();
-	        }
-	        uuid = UUID.randomUUID().toString();
-	        return uuid;
-	    }
-
-	    public MySpider setExecutorService(ExecutorService executorService) {
-	        checkIfRunning();
-	        this.executorService = executorService;
-	        return this;
-	    }
-
-	    @Override
-	    public Site getSite() {
-	        return site;
-	    }
-
-	    public List<SpiderListener> getSpiderListeners() {
-	        return spiderListeners;
-	    }
-
-	    public MySpider setSpiderListeners(List<SpiderListener> spiderListeners) {
-	        this.spiderListeners = spiderListeners;
-	        return this;
-	    }
-
-	    public Date getStartTime() {
-	        return startTime;
-	    }
-
-	    public Scheduler getScheduler() {
-	        return scheduler;
-	    }
-
-	    /**
-	     * Set wait time when no url is polled.<br><br>
-	     *
-	     * @param emptySleepTime In MILLISECONDS.
-	     */
-	    public void setEmptySleepTime(int emptySleepTime) {
-	        this.emptySleepTime = emptySleepTime;
-	    }
-}
--- a/src/main/java/com/zhiwei/source_forward/util/ContentDataCallback.java
+++ b/src/main/java/com/zhiwei/source_forward/util/ContentDataCallback.java
+package com.zhiwei.source_forward.util;
+
+import com.zhiwei.source_forward.bean.ContentBean;
+import com.zhiwei.source_forward.bean.ContentBean.Attribution;
+
+public interface ContentDataCallback {
+    
+    void onData(ContentBean data, Attribution attr);
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/util/MatchContent.java
+++ b/src/main/java/com/zhiwei/source_forward/util/MatchContent.java
@@ -5,8 +5,8 @@ import org.jsoup.nodes.Document;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

-import cn.edu.hfut.dmic.contentextractor.ContentExtractor;
-import cn.edu.hfut.dmic.contentextractor.News;
+import com.zhiwei.source_forward.content.ContentExtractor;
+import com.zhiwei.source_forward.content.News;

 /**
 * @ClassName: MatchChannel 

--- a/src/main/java/com/zhiwei/source_forward/util/MatchSource.java
+++ b/src/main/java/com/zhiwei/source_forward/util/MatchSource.java
@@ -5,8 +5,8 @@ import java.util.List;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;

-import cn.edu.hfut.dmic.contentextractor.ContentExtractor;
-import cn.edu.hfut.dmic.contentextractor.News;
+import com.zhiwei.source_forward.content.ContentExtractor;
+import com.zhiwei.source_forward.content.News;

 /**
 * @ClassName: MatchSource 

--- a/src/main/java/com/zhiwei/source_forward/util/MediaSelfSourceDataCallBack.java
+++ b/src/main/java/com/zhiwei/source_forward/util/MediaSelfSourceDataCallBack.java
+package com.zhiwei.source_forward.util;
+
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
+import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
+
+public interface MediaSelfSourceDataCallBack {
+    
+    void onData(MediaSelfSourceBean data, Attribution attr);
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/util/ProxyClientUtil.java
+++ b/src/main/java/com/zhiwei/source_forward/util/ProxyClientUtil.java
+package com.zhiwei.source_forward.util;
+
+import java.net.Proxy;
+
+import com.zhiwei.proxy.common.Definition.GroupType;
+import com.zhiwei.proxy.core.ProxyClient;
+import com.zhiwei.proxy.core.ProxyClientFactory;
+
+
+public class ProxyClientUtil {
+	
+	private static volatile ProxyClient client;
+	
+	/**
+	 * @Title: getNATProxy 
+	 * @author hero 
+	 * @Description: 获取NAT机代理IP 
+	 * @param @return 设定文件 
+	 * @return Proxy 返回类型
+	 */
+	public static Proxy getNATProxy(){
+		return getClient().getNATProxy();
+	}
+	
+	
+	public static ProxyClient getClient() {
+		if(client==null) {
+			synchronized (ProxyClientUtil.class) {
+				if(client==null) {
+					client = ProxyClientFactory.build("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
+				}
+			}			
+		}
+		return client;
+	}
+
+}
--- a/src/main/java/com/zhiwei/source_forward/util/SourceData.java
+++ b/src/main/java/com/zhiwei/source_forward/util/SourceData.java
@@ -9,7 +9,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;

-import com.zhiwei.zhiweiTools.order.TreatOrder;
+import com.zhiwei.tools.order.TreatOrder;
+

 /**
 * @ClassName: SourceData 
@@ -82,7 +83,7 @@ public class SourceData {
 	public static List<String> getSourceList(){
 		List<String> result = null;
 		if(sourceMap!=null && sourceMap.size()>0){
-			result = new ArrayList<String>();
+			result = new ArrayList<>();
 			List<Entry<String,Integer>> dataList = TreatOrder.treatOrderByCountDesc(sourceMap);
 			for(Entry<String,Integer> entry : dataList){
 				result.add(entry.getKey());

--- a/src/main/java/com/zhiwei/source_forward/util/SourceForwardDataCallBack.java
+++ b/src/main/java/com/zhiwei/source_forward/util/SourceForwardDataCallBack.java
+package com.zhiwei.source_forward.util;
+
+import com.zhiwei.source_forward.bean.SourceForwardBean;
+import com.zhiwei.source_forward.bean.SourceForwardBean.Attribution;
+
+public interface SourceForwardDataCallBack {
+    
+    /**
+     * 当有输入传入调度
+     *
+     * @param data
+     * @param attr
+     * @return void
+     */
+    void onData(SourceForwardBean data, Attribution attr);
+    
+}
--- a/src/main/java/com/zhiwei/source_forward/util/TreateData.java
+++ b/src/main/java/com/zhiwei/source_forward/util/TreateData.java
--- a/src/main/java/com/zhiwei/source_forward/util/UrlLiveDataCallback.java
+++ b/src/main/java/com/zhiwei/source_forward/util/UrlLiveDataCallback.java
+/** 
+ * @Title: DataCallback.java 
+ * @Package com.zhiwei.crawler.baidu 
+ * @author 0xff 
+ * @date 2018年6月29日 下午4:44:38 
+ */
+package com.zhiwei.source_forward.util;
+
+import com.zhiwei.source_forward.bean.UrlLiveBean;
+import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
+
+/**
+ * @ClassName: UrlLiveDataCallback
+ * @Description: 链接是否删除保存接口
+ * @author 0xff
+ * @date 2018年6月29日 下午4:44:38
+ */
+public interface UrlLiveDataCallback {
+
+	/**
+	 * 当有输入传入调度
+	 *
+	 * @param data
+	 * @param attr
+	 * @return void
+	 */
+	void onData(UrlLiveBean data, Attribution attr);
+}
--- a/src/main/resources/log4j2.xml
+++ b/src/main/resources/log4j2.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- log4j2 自身的日志级别 -->
+<Configuration status="WARN">
+	<properties>
+		<property name="LOG_HOME">Log/</property>
+		<property name="LOG_FILE">crawler</property>
+	</properties>
+	<Appenders>
+		<!-- 定义日志输出地 -->
+		<Console name="Console" target="SYSTEM_OUT">
+			<PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} - %msg%n" />
+		</Console>
+		<RollingRandomAccessFile name="LogFile"
+			fileName="${LOG_HOME}/${LOG_FILE}.log"
+			filePattern="${LOG_HOME}/$${date:yyyy-MM}/${LOG_FILE}-%d{yyyy-MM-dd}-%i.log">
+			<PatternLayout
+				pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} - %msg%n" />
+			<Policies>
+				<TimeBasedTriggeringPolicy interval="1" />
+				<SizeBasedTriggeringPolicy size="20 MB" />
+			</Policies>
+			<DefaultRolloverStrategy max="20" />
+		</RollingRandomAccessFile>
+	</Appenders>
+	<Loggers>
+		<Root level="all">
+			<AppenderRef ref="Console" level="info" />
+			<AppenderRef ref="LogFile" level="info" />
+		</Root>
+	</Loggers>
+</Configuration>
\ No newline at end of file
--- a/src/test/java/com/zhiwei/source_forward/sourceforward/test/URLLiveTest.java
+++ b/src/test/java/com/zhiwei/source_forward/sourceforward/test/URLLiveTest.java
-package com.zhiwei.source_forward.sourceforward.test;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.junit.Test;
-
-import com.zhiwei.source_forward.run.URLLive;
-
-/**
- * @ClassName: URLLiveTest 
- * @Description: 验证链接有效性 
- * @author hero 
- * @date 2017年12月6日 下午1:30:26
- */
-public class URLLiveTest {
-	
-	
-//	@Test
-//	public void urlLiveTest(){
-//		String path = "E://稿件汇总网媒数据//福莱网媒.xlsx";
-//		PoiExcelUtil poi = PoiExcelUtil.getInstance();
-//		Map<String,Object> data = poi.importExcel(path, 0);
-//		@SuppressWarnings("unchecked")
-//		List<String> headList = (List<String>)data.get("head");
-//		headList.add("是否删除");
-//		@SuppressWarnings("unchecked")
-//		List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
-//		Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrlLive(dataList);
-//		dataMap = URLLive.verificationURLLive(dataMap);
-//		
-//		List<Map<String,Object>> bodyList = new ArrayList<>();
-//		for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
-//			bodyList.add(dataEntry.getValue());
-//		}
-//		poi.exportExcel(path ,"匹配后数据", headList, bodyList);
-//	}
-	
-
-}
+//package com.zhiwei.source_forward.sourceforward.test;
+//
+//import java.util.ArrayList;
+//import java.util.List;
+//import java.util.Map;
+//import java.util.Map.Entry;
+//
+//import org.junit.Test;
+//
+//import com.zhiwei.source_forward.run.URLLive;
+//
+///**
+// * @ClassName: URLLiveTest 
+// * @Description: 验证链接有效性 
+// * @author hero 
+// * @date 2017年12月6日 下午1:30:26
+// */
+//public class URLLiveTest {
+//	
+//	
+////	@Test
+////	public void urlLiveTest(){
+////		String path = "E://稿件汇总网媒数据//福莱网媒.xlsx";
+////		PoiExcelUtil poi = PoiExcelUtil.getInstance();
+////		Map<String,Object> data = poi.importExcel(path, 0);
+////		@SuppressWarnings("unchecked")
+////		List<String> headList = (List<String>)data.get("head");
+////		headList.add("是否删除");
+////		@SuppressWarnings("unchecked")
+////		List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
+////		Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrlLive(dataList);
+////		dataMap = URLLive.verificationURLLive(dataMap);
+////		
+////		List<Map<String,Object>> bodyList = new ArrayList<>();
+////		for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
+////			bodyList.add(dataEntry.getValue());
+////		}
+////		poi.exportExcel(path ,"匹配后数据", headList, bodyList);
+////	}
+//	
+//
+//}