Commit 252ee982 by zhiwei

来源转发添加腾讯自选股

parent dd6b6b30
...@@ -5,6 +5,7 @@ import java.util.List; ...@@ -5,6 +5,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
...@@ -70,6 +71,11 @@ public class SourceForwardCrawler { ...@@ -70,6 +71,11 @@ public class SourceForwardCrawler {
if(url.contains("china.prcfe.com")) { if(url.contains("china.prcfe.com")) {
url = "http://china.prcfe.com/e/extend/ShowSource/?id=" + url.split("/")[url.split("/").length-1].split("\\.")[0]; url = "http://china.prcfe.com/e/extend/ShowSource/?id=" + url.split("/")[url.split("/").length-1].split("\\.")[0];
} }
if(url.contains("gu.qq.com")) {
String id = url.split("\\?id=")[1];
url = "https://snp.tenpay.com/cgi-bin/snpgw_unified_newsinfo.fcgi?&filter=0&zappid=zxg_h5&sign=b2aceeb8a8ef093862608d806c1d6ab8&nonce=8464&reserve=1572995&&channel=zxg&user_openid=undefined&user_skey=undefined&&news_id=" + id;
headers.put("referer", "https://gu.qq.com/resources/shy/news/detail-v2/index.html");
}
Request request = RequestUtils.wrapGet(url, headers); Request request = RequestUtils.wrapGet(url, headers);
counter.add(); counter.add();
httpBoot.asyncCall(request, ProxyHolder.NAT_HEAVY_PROXY, true).whenComplete((rs,ex) -> { httpBoot.asyncCall(request, ProxyHolder.NAT_HEAVY_PROXY, true).whenComplete((rs,ex) -> {
...@@ -94,8 +100,8 @@ public class SourceForwardCrawler { ...@@ -94,8 +100,8 @@ public class SourceForwardCrawler {
String channel = "新闻"; String channel = "新闻";
String isforward = "未知"; String isforward = "未知";
try { try {
Document document = Jsoup.parse(body);
if(attr.get().toString().contains("mp.weixin.qq.com")){ if(attr.get().toString().contains("mp.weixin.qq.com")){
Document document = Jsoup.parse(body);
isforward = document.select("div#meta_content").select("span#copyright_logo").text(); isforward = document.select("div#meta_content").select("span#copyright_logo").text();
if(isforward.contains("原创")){ if(isforward.contains("原创")){
isforward = "原创"; isforward = "原创";
...@@ -106,15 +112,21 @@ public class SourceForwardCrawler { ...@@ -106,15 +112,21 @@ public class SourceForwardCrawler {
if(body.contains("isOriginal") && body.contains("isOriginal: true")){ if(body.contains("isOriginal") && body.contains("isOriginal: true")){
isforward = "原创"; isforward = "原创";
} }
}else if(attr.get().toString().contains("snp.tenpay.com") || attr.get().toString().contains("gu.qq.com")){
if(body.contains("source")){
source = body.split("\"media_name\":\"")[1].split("\"")[0];
}
}else{ }else{
Document document = Jsoup.parse(body);
source = MatchSource.matchSource(attr.get().toString(),document.toString(), sourceList);
channel = MatchChannel.verifyChannel(attr.get().toString()); channel = MatchChannel.verifyChannel(attr.get().toString());
if(channel==null){ if(channel==null){
List<Node> nodeList = document.head().childNodes(); List<Node> nodeList = document.head().childNodes();
channel = MatchChannel.matchChannel(nodeList); channel = MatchChannel.matchChannel(nodeList);
} }
source = MatchSource.matchSource(attr.get().toString(),document.toString(), sourceList);
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace();
source = null; source = null;
channel = "新闻"; channel = "新闻";
} }
......
...@@ -80,10 +80,10 @@ public class SourceForward { ...@@ -80,10 +80,10 @@ public class SourceForward {
public static void main(String[] args) { public static void main(String[] args) {
ProxyInit.initProxy(); ProxyInit.initProxy();
List<String> urlList = new ArrayList<>(); List<String> urlList = new ArrayList<>();
urlList.add("http://www.wangjiaozixun.com/html/zx20/2020/0730/1396388.html"); urlList.add("http://gu.qq.com/resources/shy/news/detail-v2/index.html?#/index?id=SN202006091653447945411f");
List<SourceForwardBean> da = SourceForward.getSourceForward(urlList); List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
for(SourceForwardBean sfb : da) { for(SourceForwardBean sfb : da) {
System.out.println(sfb.toString()); System.out.println("=============="+sfb.toString());
} }
} }
...@@ -94,7 +94,6 @@ public class SourceForward { ...@@ -94,7 +94,6 @@ public class SourceForward {
try{ try{
SourceForwardCrawler crawler = new SourceForwardCrawler(); SourceForwardCrawler crawler = new SourceForwardCrawler();
SourceForwardDataCallBack callback = new SourceForwardDataCallBack() { SourceForwardDataCallBack callback = new SourceForwardDataCallBack() {
@Override @Override
public void onData(SourceForwardBean data, Attribution attr) { public void onData(SourceForwardBean data, Attribution attr) {
list.add(data); list.add(data);
......
//package com.zhiwei.source_forward.sourceforward.test; //package com.zhiwei.source_forward.sourceforward.test;
// //
//import java.util.ArrayList; //import java.util.ArrayList;
//import java.util.List; //import java.util.List;
//import java.util.Map; //import java.util.Map;
//import java.util.Map.Entry; //import java.util.Map.Entry;
// //
//import org.junit.Test; //import org.junit.Test;
// //
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.source_forward.run.SourceForward; //import com.zhiwei.source_forward.run.SourceForward;
//import com.zhiwei.source_forward.util.ReadMediaData; //import com.zhiwei.source_forward.util.ReadMediaData;
// //
///** ///**
// * @ClassName: SourceForwardTest // * @ClassName: SourceForwardTest
// * @Description: 来源验证 // * @Description: 来源验证
// * @author hero // * @author hero
// * @date 2017年12月6日 上午9:55:13 // * @date 2017年12月6日 上午9:55:13
// */ // */
//public class SourceForwardTest { //public class SourceForwardTest {
// //
// @Test // @Test
// public void sourceForwardTest(){ // public void sourceForwardTest(){
// String path = "E://稿件汇总网媒数据//JD稿件转载情况-1206.xlsx"; // String path = "E://稿件汇总网媒数据//JD稿件转载情况-1206.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
// Map<String,Object> data = poi.importExcel(path, 0); // Map<String,Object> data = poi.importExcel(path, 0);
// @SuppressWarnings("unchecked") // @SuppressWarnings("unchecked")
// List<String> headList = (List<String>)data.get("head"); // List<String> headList = (List<String>)data.get("head");
// headList.add("频道"); // headList.add("频道");
// headList.add("原来源"); // headList.add("原来源");
// headList.add("是否转发"); // headList.add("是否转发");
// @SuppressWarnings("unchecked") // @SuppressWarnings("unchecked")
// List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body"); // List<Map<String,Object>> dataList = (List<Map<String,Object>>)data.get("body");
// //
// Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrl(dataList); // Map<String,Map<String,Object>> dataMap = ReadMediaData.getUrl(dataList);
// dataMap = SourceForward.getSourceForward(dataMap); // dataMap = SourceForward.getSourceForward(dataMap);
// //
// List<Map<String,Object>> bodyList = new ArrayList<>(); // List<Map<String,Object>> bodyList = new ArrayList<>();
// for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){ // for(Entry<String,Map<String,Object>> dataEntry : dataMap.entrySet()){
// bodyList.add(dataEntry.getValue()); // bodyList.add(dataEntry.getValue());
// } // }
// poi.exportExcel(path ,"匹配后数据", headList, bodyList); // poi.exportExcel(path ,"匹配后数据", headList, bodyList);
// } // }
// //
// //}
//
//
//
//
//
//
//}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment