Commit 1b20782c by yangchen

修改 企鹅号解析方式

parent ac11f629
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>source-forward</artifactId> <artifactId>source-forward</artifactId>
<version>0.0.9-SNAPSHOT</version> <version>0.1.1-SNAPSHOT</version>
<name>source-forward</name> <name>source-forward</name>
<description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description> <description>验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)</description>
......
...@@ -139,7 +139,7 @@ public class MediaSelfSourceCrawler { ...@@ -139,7 +139,7 @@ public class MediaSelfSourceCrawler {
} }
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("exception ",e);
source = null; source = null;
}finally { }finally {
if(response != null) { if(response != null) {
......
...@@ -7,6 +7,8 @@ import java.util.List; ...@@ -7,6 +7,8 @@ import java.util.List;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean; import com.zhiwei.source_forward.bean.MediaSelfSourceBean;
import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution; import com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution;
import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler; import com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler;
...@@ -21,13 +23,13 @@ public class MediaSelfSource { ...@@ -21,13 +23,13 @@ public class MediaSelfSource {
} }
public static void main(String[] args) { public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// List<String> urlList = new ArrayList<>(); List<String> urlList = new ArrayList<>();
// urlList.add("http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1"); urlList.add("https://sports.qq.com/a/20190227/001177.htm");
// List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList); List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
// for(MediaSelfSourceBean b : u) { for(MediaSelfSourceBean b : u) {
// System.out.println(b.toString()); System.out.println(b.toString());
// } }
} }
static class MediaSelfSourceCrawlerThread extends Thread{ static class MediaSelfSourceCrawlerThread extends Thread{
......
...@@ -201,9 +201,9 @@ public class MatchSource { ...@@ -201,9 +201,9 @@ public class MatchSource {
if(source!=null && source.length()>1){ if(source!=null && source.length()>1){
source = "北京时间-" + source; source = "北京时间-" + source;
} }
}else if(url.contains("new.qq.com/omn/")){ }else if(url.contains("qq.com/")){
//腾讯网-企鹅号 //腾讯网-企鹅号
source = html.split("media\":\"")[1].split("\",\"")[0]; source = html.split("media\": \"")[1].split("\",")[0];
if(source!=null && source.length()>1){ if(source!=null && source.length()>1){
source = "企鹅号-" + source; source = "企鹅号-" + source;
} }
...@@ -240,6 +240,7 @@ public class MatchSource { ...@@ -240,6 +240,7 @@ public class MatchSource {
} }
return source; return source;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace();
return null; return null;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment