Commit 09b58307 by zhiwei

处理搜狗微信搜索链接中出现两次https的问题

parent 6c9f649a
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId> <artifactId>wechat</artifactId>
<version>1.3.1-SNAPSHOT</version> <version>1.3.2-SNAPSHOT</version>
<description> <description>
知微微信采集程序,包含 知微微信采集程序,包含
1.微信历史文章采集 1.微信历史文章采集
...@@ -85,13 +85,13 @@ ...@@ -85,13 +85,13 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.4-SNAPSHOT</version> <version>0.1.5-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.5.6-SNAPSHOT</version> <version>0.5.6.3-RELEASE</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -301,11 +301,9 @@ public class WechatAritcleSearch { ...@@ -301,11 +301,9 @@ public class WechatAritcleSearch {
try { try {
title = element.select("div.txt-box").select("h3").text(); title = element.select("div.txt-box").select("h3").text();
link = element.select("div.txt-box").select("h3 >a").attr("href"); link = element.select("div.txt-box").select("h3 >a").attr("href");
if(!link.contains("https")){ if(!link.contains("weixin.sogou.com")){
link = "https://weixin.sogou.com" + link; link = "https://weixin.sogou.com" + link;
} }
content = "";
if (element.select("p.txt-info").isEmpty()) { if (element.select("p.txt-info").isEmpty()) {
content = element.select("p.txt-info").text(); content = element.select("p.txt-info").text();
} else { } else {
...@@ -325,8 +323,10 @@ public class WechatAritcleSearch { ...@@ -325,8 +323,10 @@ public class WechatAritcleSearch {
} }
title = ZhiWeiTools.SBC2DBC(title); title = ZhiWeiTools.SBC2DBC(title);
content = ZhiWeiTools.SBC2DBC(content); content = ZhiWeiTools.SBC2DBC(content);
if(StringUtils.isNotBlank(title)){
wechat = new WechatAricle(link, title, source, content, date, readNum, 0, openid, "unknow"); wechat = new WechatAricle(link, title, source, content, date, readNum, 0, openid, "unknow");
result.add(wechat); result.add(wechat);
}
} catch (Exception e) { } catch (Exception e) {
logger.debug("解析数据出现错误:{}", e.getMessage()); logger.debug("解析数据出现错误:{}", e.getMessage());
continue; continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment