Commit 4fafcc87 by zhiwei

1.添加微信文章原创识别

parent 4ac6afdf
...@@ -3,6 +3,8 @@ package com.zhiwei.source_forward.crawler; ...@@ -3,6 +3,8 @@ package com.zhiwei.source_forward.crawler;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node; import org.jsoup.nodes.Node;
import com.zhiwei.source_forward.util.SourceData; import com.zhiwei.source_forward.util.SourceData;
import com.zhiwei.source_forward.util.TreateData; import com.zhiwei.source_forward.util.TreateData;
...@@ -40,6 +42,16 @@ public class SourceForwardPageProcessor implements PageProcessor { ...@@ -40,6 +42,16 @@ public class SourceForwardPageProcessor implements PageProcessor {
channel = TreateData.matchChannel(nodeList); channel = TreateData.matchChannel(nodeList);
} }
source = TreateData.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList); source = TreateData.matchSource(page.getUrl().get(),page.getHtml().toString(), sourceList);
if(page.getUrl().get().contains("mp.weixin.qq.com")){
String isforward = "未知";
Document document = page.getHtml().getDocument();
if(document.select("div#meta_content").select("span.rich_media_meta meta_original_tag")!=null &&
!"".equals(document.select("div#meta_content").select("span.rich_media_meta meta_original_tag"))){
isforward = document.select("div#meta_content").select("span.rich_media_meta meta_original_tag").text();
data.put("isforward", isforward);
}
}
} }
} catch (Exception e) { } catch (Exception e) {
source = null; source = null;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment