Commit e43ea617 by zhiwei

修复头条根据链接获取文章bug

parent 5578cebf
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>toutiao</artifactId>
<version>0.3.8-SNAPSHOT</version>
<version>0.3.9-SNAPSHOT</version>
<dependencies>
<dependency>
......@@ -18,6 +18,16 @@
<version>0.5.2-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>excelpoi</artifactId>
<version>0.0.5-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.8.1</version>
</dependency>
</dependencies>
<!-- 打包管理 -->
......
......@@ -12,11 +12,7 @@ package com.zhiwei.toutiao.bean;
import java.io.Serializable;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
/**
* @Description:
......
......@@ -605,7 +605,7 @@ public class TouTiaoArticleParse {
//通过正则截取需要的js代码
Matcher matcher = Pattern.compile(regex).matcher(htmlBody);
if(matcher.find()) {
String content = matcher.group().replace("<script>var BASE_DATA = |;</script>", "");
String content = matcher.group().replaceAll("<script>var BASE_DATA = |;</script>", "");
//通过js引擎执行js代码
String jsContent = "eval(("+ content +")).articleInfo.content.toString();";
String contentHtml = scriptEngine.eval(jsContent).toString();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment