Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
3342069b
Commit
3342069b
authored
Jun 06, 2019
by
win 10
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增了QQ看点、文汇APP、博客中国三个自媒体的来源
parent
9557316d
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
6 deletions
+24
-6
Log/crawler.log
+0
-0
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+1
-1
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+1
-1
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
+22
-4
No files found.
Log/crawler.log
View file @
3342069b
This source diff could not be displayed because it is too large. You can
view the blob
instead.
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
View file @
3342069b
...
@@ -141,7 +141,7 @@ public class MediaSelfSourceCrawler {
...
@@ -141,7 +141,7 @@ public class MediaSelfSourceCrawler {
logger
.
error
(
"exception "
,
e
);
logger
.
error
(
"exception "
,
e
);
source
=
null
;
source
=
null
;
}
}
logger
.
info
(
attr
.
get
()+
"=================
"
+
source
);
logger
.
info
(
attr
.
get
()+
"=================
来源"
+
source
);
MediaSelfSourceBean
msfb
=
new
MediaSelfSourceBean
(
attr
.
get
().
toString
(),
source
,
channel
);
MediaSelfSourceBean
msfb
=
new
MediaSelfSourceBean
(
attr
.
get
().
toString
(),
source
,
channel
);
if
(
callback
==
null
)
{
if
(
callback
==
null
)
{
logger
.
warn
(
"DataCallback 对象为 null,无法保存数据"
);
logger
.
warn
(
"DataCallback 对象为 null,无法保存数据"
);
...
...
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
View file @
3342069b
...
@@ -25,7 +25,7 @@ public class MediaSelfSource {
...
@@ -25,7 +25,7 @@ public class MediaSelfSource {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
urlList
=
new
ArrayList
<>();
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"http://
sh.qihoo.com/pc/9dcfa48989d33df34?cota=1&sign=360_e39369d1&refer_scene=so_3
"
);
urlList
.
add
(
"http://
yugang.blogchina.com/713055888.html
"
);
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
b
:
u
)
{
for
(
MediaSelfSourceBean
b
:
u
)
{
System
.
out
.
println
(
b
.
toString
());
System
.
out
.
println
(
b
.
toString
());
...
...
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
View file @
3342069b
...
@@ -419,17 +419,20 @@ public class MatchSource {
...
@@ -419,17 +419,20 @@ public class MatchSource {
if
(
source
!=
null
&&
source
.
length
()>
1
){
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"汽车之家-"
+
source
;
source
=
"汽车之家-"
+
source
;
}
}
}
}
else
if
(
url
.
contains
(
"item.btime.com"
)){
else
if
(
url
.
contains
(
"item.btime.com"
)){
//北京时间
//北京时间
source
=
document
.
select
(
"a.author"
).
text
();
source
=
document
.
select
(
"a.author"
).
text
();
if
(
source
!=
null
&&
source
.
length
()>
1
){
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"北京时间-"
+
source
;
source
=
"北京时间-"
+
source
;
}
}
}
}
else
if
(
url
.
contains
(
"item.btime.com"
)){
else
if
(
url
.
contains
(
"item.btime.com"
)){
//北京时间
//北京时间
source
=
document
.
select
(
"span.col cite"
).
text
();
source
=
document
.
select
(
"span.col cite"
).
text
();
}
else
if
(
url
.
contains
(
"mp.qq.com"
)){
source
=
document
.
select
(
"div#account_top > div.puin_text > div.pname"
).
text
();
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
source
=
"QQ看点-"
+
source
;
}
}
else
if
(
url
.
contains
(
"qq.com/"
)){
}
else
if
(
url
.
contains
(
"qq.com/"
)){
//腾讯网-企鹅号
//腾讯网-企鹅号
source
=
html
.
split
(
"media\": \""
)[
1
].
split
(
"\","
)[
0
];
source
=
html
.
split
(
"media\": \""
)[
1
].
split
(
"\","
)[
0
];
...
@@ -514,6 +517,21 @@ public class MatchSource {
...
@@ -514,6 +517,21 @@ public class MatchSource {
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
source
=
"连线家-"
+
source
;
source
=
"连线家-"
+
source
;
}
}
}
else
if
(
url
.
contains
(
"itouchtv.cn"
)){
source
=
document
.
select
(
"div.index__article-media-20Tg_ > span:nth-child(1)"
).
text
();
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
source
=
"触电新闻-"
+
source
;
}
}
else
if
(
url
.
contains
(
"whb.cn"
)){
source
=
document
.
select
(
"div.yidian-info > span:nth-child(1)"
).
text
();
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
source
=
"文汇APP-"
+
source
;
}
}
else
if
(
url
.
contains
(
"blogchina.com"
)){
source
=
document
.
select
(
"div.meta-top > label.lm_name > span > a"
).
text
();
if
(
source
!=
null
&&
!
source
.
equals
(
""
)){
source
=
"博客中国-"
+
source
;
}
}
}
return
source
;
return
source
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment