Commit 633bfac9 by yangchen

新浪 网易 自媒体获取修改

parent 210aee67
......@@ -25,7 +25,7 @@ public class MediaSelfSource {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> urlList = new ArrayList<>();
urlList.add("http://auto.sina.com.cn/j_kandian.d.html?docid=hytcerm4907505&subch=bauto&hpid=00032");
urlList.add("https://tech.sina.cn/2019-07-30/detail-ihytcerm7194587.d.html");
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for(MediaSelfSourceBean b : u) {
System.out.println(b.toString());
......
......@@ -394,16 +394,21 @@ public class MatchSource {
if(source!=null && source.length()>1){
source = "快资讯-" + source;
}
}else if(url.contains("cj.sina.com.cn") || url.contains("finance.sina.cn")){
}else if(url.contains("cj.sina.com.cn") || url.contains("finance.sina.cn") ||
url.contains("tech.sina.cn") || url.contains("news.sina.cn")){
source = document.select("h2.weibo_user").text();
if(Objects.isNull(source) || source.length() < 1){
//新浪财经头条号
//新浪科技头条号
if(html.contains("<meta name=\"mediaid\"")){
source = html.split("<meta name=\"mediaid\" content=\"")[1].split("\"")[0].trim();
}
}
if(Objects.isNull(source) || source.length() < 1){
//新浪财经头条号
source = document.select("body > main > section.j_main_art > section > article > time > cite").text();
}
if(source!=null && source.length()>1){
source = "财经头条-" + source;
source = "新浪-" + source;
}
}else if(url.contains("auto.sina.cn") || url.contains("auto.sina.com.cn")){
source = document.select("div.art_title > div > span:nth-child(1)").text();
......@@ -476,7 +481,7 @@ public class MatchSource {
//网易订阅-网易号
source = document.select("div.normal > div.colum_info > h4").text();
if(source!=null && source.length()>1){
source = "网易-" + source;
source = "网易-" + source;
}
}else if(url.contains("qctt.cn")){
//汽车头条
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment