Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
947a2179
Commit
947a2179
authored
Dec 24, 2019
by
cwy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加zaker客户端获取
parent
0c98f43b
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
1 deletions
+36
-1
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+22
-0
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+1
-1
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
+13
-0
No files found.
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
View file @
947a2179
...
...
@@ -93,6 +93,8 @@ public class MediaSelfSourceCrawler {
map
.
put
(
"referer"
,
url
);
}
map
.
put
(
"Connection"
,
"close"
);
url
=
dealUrl
(
url
);
if
(
Objects
.
nonNull
(
url
))
{
Request
request
=
RequestUtils
.
wrapGet
(
url
,
map
);
counter
.
add
();
...
...
@@ -111,10 +113,30 @@ public class MediaSelfSourceCrawler {
counter
.
done
();
}
});
}
return
counter
;
}
/**
** 链接处理
* @param url
* @return
* @return String
*/
private
String
dealUrl
(
String
url
)
{
try
{
if
(
url
.
startsWith
(
"http"
))
{
if
(
url
.
contains
(
"wap.peopleapp.com/article"
))
{
return
"https://app.peopleapp.com/WapApi/610/ArtInfoApi/getInfoUp?id="
+
url
.
split
(
"article"
)[
1
].
split
(
"/"
)[
1
];
}
return
url
;
}
}
catch
(
Exception
e
)
{
}
return
null
;
}
/**
*
* @Description 解析文章获取相关数据
* @param response
...
...
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
View file @
947a2179
...
...
@@ -32,7 +32,7 @@ public class MediaSelfSource {
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181"
,
"local"
,
GroupType
.
PROVIDER
,
10000002L
);
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
"http
s://www.360kuai.com/pc/922e4596800e5ef0a?cota=3&kuai_so=1&sign=360_e39369d1&refer_scene=so_
3"
);
urlList
.
add
(
"http
://app.myzaker.com/news/article.php?pk=5dbef675b15ec0307572750
3"
);
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
b
:
u
)
{
System
.
out
.
println
(
b
.
toString
());
...
...
src/main/java/com/zhiwei/source_forward/util/MatchSource.java
View file @
947a2179
...
...
@@ -439,6 +439,12 @@ public class MatchSource {
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"百度百家-"
+
source
;
}
}
else
if
(
url
.
contains
(
"app.myzaker.com"
)){
// zaker客户端
source
=
document
.
select
(
"#tpl_author"
).
first
().
text
().
trim
();
if
(
source
!=
null
&&
source
.
length
()>
1
){
source
=
"zaker客户端-"
+
source
;
}
}
else
if
(
url
.
contains
(
"yidianzixun.com"
)){
//一点资讯
if
(
html
.
contains
(
"related_wemedia"
)){
...
...
@@ -462,6 +468,13 @@ public class MatchSource {
if
(
Objects
.
nonNull
(
source
)
&&
!
source
.
isEmpty
()){
source
=
"it时代网-"
+
source
;
}
}
else
if
(
url
.
contains
(
"wap.peopleapp.com"
)){
// 人民日报客户端
JSONObject
json
=
JSONObject
.
parseObject
(
html
);
source
=
json
.
getJSONObject
(
"data"
).
getString
(
"authors"
);
if
(
Objects
.
nonNull
(
source
)
&&
!
source
.
isEmpty
()){
source
=
"人民日报客户端-"
+
source
;
}
}
else
if
(
url
.
contains
(
"guancha.cn"
)){
// 风闻社区
source
=
document
.
select
(
"div.main-tow > div.box-left > div.article-content > div:nth-child(3) > div.user-main > h4 > a"
).
text
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment