Commit 50524098 by zk

增加k.sina.com.cn网址的source字段的解析规则

parent 2da35a84
......@@ -52,7 +52,8 @@ public class MatchSource {
* 设定文件
* @return String 返回类型
*/
public static String matchSource(String url,String html, List<String> sourceList) {
public static String
matchSource(String url,String html, List<String> sourceList) {
String source = null;
Document document = Jsoup.parse(html);
String htmlBody = TreateData.filterSpecialCharacter(document.select("body").text().toUpperCase());
......@@ -452,11 +453,14 @@ public class MatchSource {
source = "快资讯-" + source;
}
}else if(url.contains("cj.sina.com.cn") || url.contains("finance.sina.cn") ||
url.contains("tech.sina.cn") || url.contains("news.sina.cn") || url.contains("k.sina.cn")){
url.contains("tech.sina.cn") || url.contains("news.sina.cn") || url.contains("k.sina.cn") || url.contains("k.sina.com.cn")){
source = document.select("h2.weibo_user").text();
if(Objects.isNull(source) || source.length() < 1) {
source = document.select("#top_bar > div > div.date-source > a").text();
}
if(Objects.isNull(source) || source.length() < 1) {
source = document.select("#top_bar > div > div.date-source >span > a").text();
}
if((Objects.isNull(source) || source.length() < 1) && html.contains("<meta name=\"mediaid\"")){
//新浪科技头条号
source = html.split("<meta name=\"mediaid\" content=\"")[1].split("\"")[0].trim();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment