Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
9557316d
Commit
9557316d
authored
May 23, 2019
by
win 10
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增了txt文件的来源和大部分平台来源解析
parent
6e7f47cf
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
262 additions
and
23 deletions
+262
-23
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+1
-1
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+1
-1
src/main/java/com/zhiwei/source_forward/run/URLLive.java
+13
-11
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
+221
-10
src/main/resources/sourceList.txt
+26
-0
No files found.
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
View file @
9557316d
...
@@ -25,7 +25,7 @@ public class MediaSelfSource {
...
@@ -25,7 +25,7 @@ public class MediaSelfSource {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
urlList
=
new
ArrayList
<>();
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"http
s://www.360kuai.com/pc/9277f65f68bba0265?cota=3&kuai_so
=1&sign=360_e39369d1&refer_scene=so_3"
);
urlList
.
add
(
"http
://sh.qihoo.com/pc/9dcfa48989d33df34?cota
=1&sign=360_e39369d1&refer_scene=so_3"
);
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
b
:
u
)
{
for
(
MediaSelfSourceBean
b
:
u
)
{
System
.
out
.
println
(
b
.
toString
());
System
.
out
.
println
(
b
.
toString
());
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
9557316d
...
@@ -81,7 +81,7 @@ public class SourceForward {
...
@@ -81,7 +81,7 @@ public class SourceForward {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
urlList
=
new
ArrayList
<>();
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"http
://www.northnews.cn/2019/0419/3080909.s
html"
);
urlList
.
add
(
"http
s://www.jiemian.com/article/2782869.
html"
);
List
<
SourceForwardBean
>
da
=
SourceForward
.
getSourceForward
(
urlList
);
List
<
SourceForwardBean
>
da
=
SourceForward
.
getSourceForward
(
urlList
);
for
(
SourceForwardBean
sfb
:
da
)
{
for
(
SourceForwardBean
sfb
:
da
)
{
System
.
out
.
println
(
sfb
.
toString
());
System
.
out
.
println
(
sfb
.
toString
());
...
...
src/main/java/com/zhiwei/source_forward/run/URLLive.java
View file @
9557316d
...
@@ -9,6 +9,8 @@ import java.util.Map.Entry;
...
@@ -9,6 +9,8 @@ import java.util.Map.Entry;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.source_forward.bean.UrlLiveBean
;
import
com.zhiwei.source_forward.bean.UrlLiveBean
;
import
com.zhiwei.source_forward.bean.UrlLiveBean.Attribution
;
import
com.zhiwei.source_forward.bean.UrlLiveBean.Attribution
;
import
com.zhiwei.source_forward.crawler.UrlLiveCrawler
;
import
com.zhiwei.source_forward.crawler.UrlLiveCrawler
;
...
@@ -69,17 +71,17 @@ public class URLLive {
...
@@ -69,17 +71,17 @@ public class URLLive {
return
UrlLiveCrawlerThread
.
getUrlLiveCrawle
(
urlList
);
return
UrlLiveCrawlerThread
.
getUrlLiveCrawle
(
urlList
);
}
}
//
public static void main(String[] args) {
public
static
void
main
(
String
[]
args
)
{
//
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
List<String> urlList = new ArrayList<>();
List
<
String
>
urlList
=
new
ArrayList
<>();
// urlList.add("https://www.toutiao.com/a6680674354260345355
");
urlList
.
add
(
"http://www.ebrun.com/ebrungo/zb/316384.shtml
"
);
//
//
//
List<UrlLiveBean> u = URLLive.verificationURLLive(urlList);
List
<
UrlLiveBean
>
u
=
URLLive
.
verificationURLLive
(
urlList
);
//
for(UrlLiveBean b : u) {
for
(
UrlLiveBean
b
:
u
)
{
//
System.out.println(b.toString());
System
.
out
.
println
(
b
.
toString
());
//
}
}
//
}
}
static
class
UrlLiveCrawlerThread
extends
Thread
{
static
class
UrlLiveCrawlerThread
extends
Thread
{
...
...
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
View file @
9557316d
...
@@ -3,6 +3,7 @@ package com.zhiwei.source_forward.util;
...
@@ -3,6 +3,7 @@ package com.zhiwei.source_forward.util;
import
java.util.List
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.Objects
;
import
org.checkerframework.checker.units.qual.s
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
...
@@ -56,7 +57,10 @@ public class MatchSource {
...
@@ -56,7 +57,10 @@ public class MatchSource {
/***特定网站单独处理**/
/***特定网站单独处理**/
if
(
url
.
contains
(
"thepaper.cn"
)){
if
(
url
.
contains
(
"thepaper.cn"
)){
//单独处理澎湃数据
//单独处理澎湃数据
source
=
document
.
select
(
"div.news_about"
).
text
();
source
=
document
.
select
(
"div.news_about"
).
select
(
"p"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:"
,
""
);
if
(
source
.
length
()
==
0
)
{
source
=
document
.
select
(
"div.news_about"
).
text
().
replaceAll
(
" \\d{4}.*|.*/"
,
""
);
}
}
else
if
(
url
.
contains
(
"sports.eastday.com"
)){
}
else
if
(
url
.
contains
(
"sports.eastday.com"
)){
//单独处理东方体育网
//单独处理东方体育网
source
=
document
.
select
(
"div.article"
).
select
(
"span"
).
text
();
source
=
document
.
select
(
"div.article"
).
select
(
"span"
).
text
();
...
@@ -97,18 +101,220 @@ public class MatchSource {
...
@@ -97,18 +101,220 @@ public class MatchSource {
}
else
if
(
url
.
contains
(
"caijing.com.cn"
)){
}
else
if
(
url
.
contains
(
"caijing.com.cn"
)){
//财经网产经
//财经网产经
source
=
document
.
select
(
"#source_baidu"
).
text
();
source
=
document
.
select
(
"#source_baidu"
).
text
();
}
else
{
}
else
if
(
url
.
contains
(
"news.eastday.com"
)){
//单独处理东方网
source
=
document
.
select
(
"div#sectionleft"
).
select
(
"div"
).
select
(
"p"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"ny.chinacenn.com"
)){
//单独处理中企网
source
=
document
.
select
(
"td"
).
select
(
"span.ltutext3"
).
text
().
replaceAll
(
" \\d{4}.*"
,
""
);
}
else
if
(
url
.
contains
(
"ebrun.com"
)){
//单独处理亿邦动力网
source
=
document
.
select
(
"div.post-header"
).
select
(
"p.source"
).
select
(
"span.f-left"
).
text
().
replaceAll
(
".*来源: "
,
""
);
}
else
if
(
url
.
contains
(
"www.mnw.cn"
)){
//单独处理闽南网
source
=
document
.
select
(
"div.il"
).
select
(
"span"
).
text
().
replaceAll
(
"来源:|\\d{4}.*"
,
""
);
}
else
if
(
url
.
contains
(
"sn.cri.cn"
)){
//单独处理国际在线
source
=
document
.
select
(
"span.asource"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"sh.sina.com.cn"
)){
//单独处理新浪上海
source
=
document
.
select
(
"p.source-time"
).
select
(
"span"
).
get
(
1
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"kaixian.tv"
)){
//单独处理汉丰网
source
=
document
.
select
(
"div.content"
).
select
(
"h2.font_gray"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"lanjingtmt.com"
)){
//单独处理蓝鲸TMT
source
=
"蓝鲸TMT网"
;
}
else
if
(
url
.
contains
(
"tech.huanqiu.com"
)){
//单独处理环球网
source
=
document
.
select
(
"span.la_t_b"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"china.qianlong.com"
)){
//单独处理千龙网
source
=
document
.
select
(
"span.source"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"m.mnw.cn"
)){
//单独处理手机闽南网
source
=
document
.
select
(
"article.info"
).
select
(
"header"
).
select
(
"div"
).
select
(
"span"
).
text
().
replaceAll
(
"\\d{4}.*| "
,
""
);
}
else
if
(
url
.
contains
(
"mydrivers.com"
)){
//单独处理快科技
source
=
document
.
select
(
"div.news_bt1_left"
).
text
().
replaceAll
(
".*出处:| 作者:[\\w\\W]*"
,
""
);
}
else
if
(
url
.
contains
(
"3dmgame.com"
)){
//单独处理3DMGAME
source
=
document
.
select
(
"ul.intem"
).
select
(
"li"
).
select
(
"span.weibo"
).
text
();
}
else
if
(
url
.
contains
(
"99it.com.cn"
)){
//单独处理99科技
source
=
document
.
select
(
"div.mate"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:|编辑.*"
,
""
);
}
else
if
(
url
.
contains
(
"ciotimes.com"
)){
//单独处理CIO时代网
source
=
document
.
select
(
"p.ly.visible-xs.text-left"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"ithome.com"
)){
//单独处理IT之家
source
=
document
.
select
(
"span#source_baidu"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"techweb.com.cn"
)){
//单独处理TechWeb
source
=
document
.
select
(
"span.from"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"cniteyes.com"
)){
//单独处理T客帮
source
=
document
.
select
(
"div.item-date"
).
select
(
"span"
).
text
();
}
else
if
(
url
.
contains
(
"enorth.com.cn"
)){
//单独处理北方网
source
=
document
.
select
(
"p.col-sm-8.info"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:|编辑.*"
,
""
);
}
else
if
(
url
.
contains
(
"btime.com"
)){
//单独处理北京时间
source
=
document
.
select
(
"span.col.cite"
).
text
();
}
else
if
(
url
.
contains
(
"bianews.com"
)){
//单独处理鞭牛士
source
=
document
.
select
(
"span.name.fl"
).
text
();
}
else
if
(
url
.
contains
(
"dzwww.com"
)){
//单独处理大众网
source
=
document
.
select
(
"div.layout"
).
select
(
"div.left"
).
text
().
replaceAll
(
".*来源: |作者.*"
,
""
);
}
else
if
(
url
.
contains
(
"dsb.cn"
)){
//单独处理电商报
source
=
document
.
select
(
"div.new-content-info.clearfix"
).
select
(
"span"
).
text
().
replaceAll
(
".*作者:"
,
""
);
}
else
if
(
url
.
contains
(
"finance.eastmoney.com"
)){
//单独处理东方财富网
source
=
document
.
select
(
"div.source.data-source"
).
attr
(
"data-source"
).
toString
();
}
else
if
(
url
.
contains
(
"emwap.eastmoney.com"
)){
//单独处理东方财富网客户端
source
=
document
.
select
(
"div.where"
).
select
(
"span.source"
).
attr
(
"title"
);
}
else
if
(
url
.
contains
(
"mini.eastday.com"
)){
//单独处理东方头条
source
=
document
.
select
(
"div.article-src-time"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"tech.ifeng.com"
)){
//单独处理凤凰科技
source
=
document
.
select
(
"p.p_time"
).
select
(
"span"
).
select
(
"span.ss03"
).
text
();
}
else
if
(
url
.
contains
(
"finance.ifeng.com"
)){
//单独处理凤凰网
source
=
document
.
select
(
"p.p_time"
).
select
(
"span"
).
select
(
"span"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"iphone.265g.com"
)){
//单独处理265G网
source
=
document
.
select
(
"div.article_info"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:|QQ群号.*"
,
""
);
}
else
if
(
url
.
contains
(
"yicai.com"
)){
//单独处理第一财经
source
=
document
.
select
(
"div.title.f-pr"
).
select
(
"p"
).
select
(
"span"
).
text
();
}
else
if
(
url
.
contains
(
"cnblogs.com"
)){
//单独处理博客园
source
=
document
.
select
(
"div#come_from"
).
text
().
replaceAll
(
".*来自:"
,
""
);
}
else
if
(
url
.
contains
(
"chinaxiaokang.com"
)){
//单独处理中国小康网
source
=
document
.
select
(
"span#arturl"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"chinabaogao.com"
))
{
//单独处理中国报告网
source
=
document
.
select
(
"p.cbg-a-d-info"
).
select
(
"a"
).
text
().
replaceAll
(
"大 中 小 | "
,
""
);
}
else
if
(
url
.
contains
(
"anyv.net"
))
{
//单独处理爱妮微
source
=
document
.
select
(
"span.cor666"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"yingxiao360.com"
)){
//单独处理第一赢销网
source
=
"第一赢销网"
;
}
else
if
(
url
.
contains
(
"cctime.com"
)){
//单独处理飞象网
source
=
document
.
select
(
"td.dateAndSource"
).
text
().
replaceAll
(
".*\\d{2}|作 者.*| "
,
""
);
}
else
if
(
url
.
contains
(
"news.hexun.com"
)){
//单独处理和讯网
source
=
document
.
select
(
"div.tip.fl"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"finance.jrj.com.cn"
)){
//单独处理金融界
source
=
document
.
select
(
"p.inftop"
).
select
(
"span"
).
select
(
"a"
).
text
().
replaceAll
(
"价值.*| "
,
""
);
}
else
if
(
url
.
contains
(
"tech.china.com.cn"
)){
//单独处理中国网
source
=
document
.
select
(
"span.fl.time2"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"news.china.com.cn"
)){
//单独处理中国网
source
=
document
.
select
(
"div.pub_date"
).
select
(
"span#source_baidu"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"admin5.com"
)){
//单独处理站长网
source
=
document
.
select
(
"div.source"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:| "
,
""
);
}
else
if
(
url
.
contains
(
"stock.qq.com"
)){
//单独处理腾讯证券
source
=
document
.
select
(
"div.a_Info"
).
select
(
"span.a_source"
).
text
();
}
else
if
(
url
.
contains
(
"n.cztv.com"
)){
//单独处理新蓝网
source
=
document
.
select
(
"div.publish"
).
select
(
"ul"
).
select
(
"li"
).
text
().
replaceAll
(
"\\d{4}.*"
,
""
);
}
else
if
(
url
.
contains
(
"news.paidai.com"
)){
//单独处理派代网
source
=
document
.
select
(
"p.t_info"
).
select
(
"span"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"news.mydrivers.com"
)){
//单独处理快科技
source
=
document
.
select
(
"div.news_bt1_left"
).
text
().
replaceAll
(
".*出处:| 作者.*"
,
""
);
}
else
if
(
url
.
contains
(
"www.chinaz.com"
)){
//单独处理站长之家
source
=
document
.
select
(
"div.meta"
).
select
(
"span.source"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"yuncaijing.com"
)){
//单独处理云财经
source
=
document
.
select
(
"section.news-wrap"
).
select
(
"header"
).
select
(
"div"
).
text
().
replaceAll
(
".*消息来源: |\\[阅读原文.*| "
,
""
);
}
else
if
(
url
.
contains
(
"itmsc.cn"
)){
//单独处理科技传媒网
source
=
document
.
select
(
"div.arc_sc"
).
select
(
"p"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"nbd.com.cn"
)){
//单独处理每日经济新闻
source
=
document
.
select
(
"span.source"
).
text
();
}
else
if
(
url
.
contains
(
"pintu360.com"
)){
//单独处理品途商业评论
source
=
"品途商业评论"
;
}
else
if
(
url
.
contains
(
"news.qudong.com"
)){
//单独处理驱动中国
source
=
document
.
select
(
"div.news_right"
).
select
(
"dd"
).
select
(
"li"
).
select
(
"span"
).
select
(
"a"
).
text
().
replaceAll
(
" .*"
,
""
);
}
else
if
(
url
.
contains
(
"shobserver.com"
)){
//单独处理上海观察
source
=
document
.
select
(
"span.max-words"
).
get
(
0
).
text
();
}
else
if
(
url
.
contains
(
"g.pconline.com.cn"
)){
//单独处理太平洋电脑网
source
=
document
.
select
(
"div.art-info"
).
text
().
replaceAll
(
"手机|\\d{4}.*| "
,
""
);
}
else
if
(
url
.
contains
(
"news.xtol.cn"
)){
//单独处理湘潭在线
source
=
document
.
select
(
"span.date"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"bjnews.com.cn"
)){
//单独处理新京报网
source
=
document
.
select
(
"span.author"
).
text
().
replaceAll
(
" 记者.*"
,
""
);
}
else
if
(
url
.
contains
(
"telworld.com.cn"
)){
//单独处理运营商世界
source
=
document
.
select
(
"div.news_xiang_tit_2_left"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"thehour.cn"
)){
//单独处理浙江24小时
source
=
document
.
select
(
"div.newsInfo"
).
select
(
"span"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"sh.zol.com.cn"
)){
//单独处理中关村在线
source
=
document
.
select
(
"div.article-aboute"
).
select
(
"span.source_baidu"
).
text
();
}
else
if
(
url
.
contains
(
"ec.com.cn"
)){
//单独处理中国国际电子商务网
source
=
document
.
select
(
"span.article_resource"
).
text
().
replaceAll
(
".*来源:"
,
""
);
}
else
if
(
url
.
contains
(
"cqn.com.cn"
)){
//单独处理中国质量新闻网
source
=
document
.
select
(
"span.from"
).
text
().
replaceAll
(
"-.*"
,
""
);
}
else
if
(
url
.
contains
(
"sc.stock.cnfol.com"
)){
//单独处理中金在线
source
=
document
.
select
(
"div.artDes"
).
select
(
"span"
).
select
(
"a"
).
text
();
}
else
if
(
url
.
contains
(
"zczj.com"
)){
//单独处理众筹之家
source
=
document
.
select
(
"div.news-info"
).
select
(
"span"
).
text
().
replaceAll
(
"来源:|作者.*"
,
""
);
}
else
if
(
url
.
contains
(
"cqcb.com"
)){
//单独处理重庆晨报
source
=
document
.
select
(
"span.label_nr"
).
text
();
}
else
if
(
url
.
contains
(
"stock.10jqka.com.cn"
)){
//单独处理重庆晨报
source
=
document
.
select
(
"span.label_nr"
).
text
();
}
else
if
(
url
.
contains
(
"jiemian.com"
)){
//单独处理界面新闻
source
=
document
.
select
(
"div.article-info"
).
select
(
"span"
).
text
().
replaceAll
(
".*来源:| 字体[\\w\\W]*"
,
""
);
}
if
(
Objects
.
nonNull
(
source
)
&&
source
.
length
()
!=
0
)
{
return
source
;
}
else
{
//其他网站处理
//其他网站处理
source
=
mathchOtherSource
(
html
,
htmlBody
,
sourceList
);
source
=
mathchOtherSource
(
html
,
htmlBody
,
sourceList
);
}
if
(
source
!=
null
){
if
(
source
!=
null
){
//验证来源
//验证来源
//
for (String sourceMatch : sourceList) {
for
(
String
sourceMatch
:
sourceList
)
{
//
if (source.contains(sourceMatch)) {
if
(
source
.
contains
(
sourceMatch
))
{
//
return sourceMatch;
return
sourceMatch
;
//
}
}
//
}
}
return
source
;
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
@@ -213,12 +419,17 @@ public class MatchSource {
...
@@ -213,12 +419,17 @@ public class MatchSource {
if
(
source
!=
null
&&
source
.
length
()>
1
){
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"汽车之家-"
+
source
;
source
=
"汽车之家-"
+
source
;
}
}
}
else
if
(
url
.
contains
(
"item.btime.com"
)){
}
else
if
(
url
.
contains
(
"item.btime.com"
)){
//北京时间
//北京时间
source
=
document
.
select
(
"a.author"
).
text
();
source
=
document
.
select
(
"a.author"
).
text
();
if
(
source
!=
null
&&
source
.
length
()>
1
){
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"北京时间-"
+
source
;
source
=
"北京时间-"
+
source
;
}
}
}
else
if
(
url
.
contains
(
"item.btime.com"
)){
//北京时间
source
=
document
.
select
(
"span.col cite"
).
text
();
}
else
if
(
url
.
contains
(
"qq.com/"
)){
}
else
if
(
url
.
contains
(
"qq.com/"
)){
//腾讯网-企鹅号
//腾讯网-企鹅号
source
=
html
.
split
(
"media\": \""
)[
1
].
split
(
"\","
)[
0
];
source
=
html
.
split
(
"media\": \""
)[
1
].
split
(
"\","
)[
0
];
...
...
src/main/resources/sourceList.txt
View file @
9557316d
...
@@ -893,6 +893,7 @@ ZOL中关村在线
...
@@ -893,6 +893,7 @@ ZOL中关村在线
华东理工大学
华东理工大学
华东在线
华东在线
华尔街见闻
华尔街见闻
华尔街见闻网
华股财经
华股财经
华龙网
华龙网
华龙网法律频道
华龙网法律频道
...
@@ -2488,6 +2489,7 @@ ZOL中关村在线
...
@@ -2488,6 +2489,7 @@ ZOL中关村在线
智慧长沙
智慧长沙
智慧长沙资讯
智慧长沙资讯
智能派
智能派
智通财经
智通财经网
智通财经网
置家网
置家网
中安在线
中安在线
...
@@ -3053,3 +3055,26 @@ ZOL中关村在线
...
@@ -3053,3 +3055,26 @@ ZOL中关村在线
最高人民法院网
最高人民法院网
最高人民检察院
最高人民检察院
今日湖北
今日湖北
中国经营报
三言财经
TechWeb.com.cn
中企网
央视新闻移动网
新浪财经-自媒体综合
T媒体
《法人》
国是直通车
科技小肆
雷帝触网
铅笔道
三秦都市报
新浪财经综合
央视财经
第一财经
第一赢销网
国际金融报
A5创业网
运营商世界网讯
中外管理杂志
上游新闻综合
新蓝网·浙江网络广播电视台
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment