Commit 27063da5 by yangchen

修复快咨询 自媒体 来源解析失效问题

parent 633bfac9
......@@ -2,10 +2,13 @@ package com.zhiwei.source_forward.util;
import java.util.List;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.source_forward.content.ContentExtractor;
import com.zhiwei.source_forward.content.News;
......@@ -388,10 +391,18 @@ public class MatchSource {
}else if(url.contains("sh.qihoo.com") || url.contains("360kuai.com")){
//今日报点解析
source = document.select("span.source").text().trim();
if(source.length() < 2) {
if(source.length() < 1) {
source = document.select("p.article-info").select("a").text().trim();
}
if(source!=null && source.length()>1){
if(source.length() < 1 && html.contains("window.__INITIAL_DATA__ =")) {
Matcher ma = Pattern.compile("window.__INITIAL_DATA__ =[\\s\\S]+?\\</script\\>").matcher(html);
if(ma.find()) {
String result = ma.group().replaceAll("window.__INITIAL_DATA__ =|\\</script\\>|", "").trim();
JSONObject json = JSONObject.parseObject(result.substring(0,result.length()-1));
source = json.getJSONObject("detail").getString("src");
}
}
if(Objects.nonNull(source) && source.length()>1){
source = "快资讯-" + source;
}
}else if(url.contains("cj.sina.com.cn") || url.contains("finance.sina.cn") ||
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment