Commit 1b20782c by yangchen

修改 企鹅号解析方式

parent ac11f629
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId>
<version>0.0.9-SNAPSHOT</version>
<version>0.1.1-SNAPSHOT</version>
<name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
......
......@@ -139,7 +139,7 @@ public class MediaSelfSourceCrawler {
}
}
} catch (Exception e) {
e.printStackTrace();
logger.error("exception ",e);
source = null;
}finally {
if(response != null) {
......
......@@ -7,6 +7,8 @@ import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
......@@ -21,13 +23,13 @@ public class MediaSelfSource {
}
public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// List<String> urlList = new ArrayList<>();
// urlList.add("http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1");
// List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
// for(MediaSelfSourceBean b : u) {
// System.out.println(b.toString());
// }
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>();
urlList.add("https://sports.qq.com/a/20190227/001177.htm");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) {
System.out.println(b.toString());
}
}
static class MediaSelfSourceCrawlerThread extends Thread{
......
......@@ -201,9 +201,9 @@ public class MatchSource {
if(source!=null && source.length()>1){
source = "北京时间-" + source;
}
}else if(url.contains("new.qq.com/omn/")){
}else if(url.contains("qq.com/")){
//腾讯网-企鹅号
source = html.split("media\":\"")[1].split("\",\"")[0];
source = html.split("media\": \"")[1].split("\",")[0];
if(source!=null && source.length()>1){
source = "企鹅号-" + source;
}
......@@ -240,6 +240,7 @@ public class MatchSource {
}
return source;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment