Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
9f440187
Commit
9f440187
authored
Mar 26, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加微信文章验证是否原创功能
parent
4fafcc87
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
14 deletions
+20
-14
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
+11
-11
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+9
-3
No files found.
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
View file @
9f440187
...
...
@@ -36,21 +36,21 @@ public class SourceForwardPageProcessor implements PageProcessor {
String
channel
=
"新闻"
;
try
{
if
(
page
.
getStatusCode
()!=
404
){
channel
=
TreateData
.
verifyChannel
(
page
.
getUrl
().
get
());
if
(
channel
==
null
){
List
<
Node
>
nodeList
=
page
.
getHtml
().
getDocument
().
head
().
childNodes
();
channel
=
TreateData
.
matchChannel
(
nodeList
);
}
source
=
TreateData
.
matchSource
(
page
.
getUrl
().
get
(),
page
.
getHtml
().
toString
(),
sourceList
);
if
(
page
.
getUrl
().
get
().
contains
(
"mp.weixin.qq.com"
)){
String
isforward
=
"未知"
;
Document
document
=
page
.
getHtml
().
getDocument
();
if
(
document
.
select
(
"div#meta_content"
).
select
(
"span.rich_media_meta meta_original_tag"
)!=
null
&&
!
""
.
equals
(
document
.
select
(
"div#meta_content"
).
select
(
"span.rich_media_meta meta_original_tag"
))){
isforward
=
document
.
select
(
"div#meta_content"
).
select
(
"span.rich_media_meta meta_original_tag"
).
text
();
data
.
put
(
"isforward"
,
isforward
);
isforward
=
document
.
select
(
"div#meta_content"
).
select
(
"span#copyright_logo"
).
text
();
if
(!
"原创"
.
equals
(
isforward
)){
isforward
=
"未知"
;
}
data
.
put
(
"isforward"
,
isforward
);
}
else
{
channel
=
TreateData
.
verifyChannel
(
page
.
getUrl
().
get
());
if
(
channel
==
null
){
List
<
Node
>
nodeList
=
page
.
getHtml
().
getDocument
().
head
().
childNodes
();
channel
=
TreateData
.
matchChannel
(
nodeList
);
}
source
=
TreateData
.
matchSource
(
page
.
getUrl
().
get
(),
page
.
getHtml
().
toString
(),
sourceList
);
}
}
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
9f440187
...
...
@@ -43,6 +43,7 @@ public class SourceForward {
for
(
Map
<
String
,
Object
>
sourceMap
:
sourceForwardList
){
String
url
=
sourceMap
.
get
(
"url"
)+
""
;
String
root_source
=
sourceMap
.
get
(
"root_source"
)!=
null
?
sourceMap
.
get
(
"root_source"
).
toString
():
null
;
String
isForwardWX
=
sourceMap
.
get
(
"isforward"
)!=
null
?
sourceMap
.
get
(
"isforward"
).
toString
():
null
;
String
channel
=
sourceMap
.
get
(
"channel"
)+
""
;
//整合数据及验证转发原创
if
(
dataMap
.
containsKey
(
url
)){
...
...
@@ -54,10 +55,15 @@ public class SourceForward {
}
else
if
(
root_source
.
toUpperCase
().
trim
().
equals
(
source
.
toUpperCase
().
trim
())){
isForward
=
"原创"
;
}
data
.
put
(
"是否转发"
,
isForward
);
data
.
put
(
"原来源"
,
root_source
);
data
.
put
(
"频道"
,
channel
);
if
(
url
.
contains
(
"mp.weixin.qq.com"
)){
isForward
=
isForwardWX
;
}
else
{
data
.
put
(
"原来源"
,
root_source
);
data
.
put
(
"频道"
,
channel
);
}
data
.
put
(
"是否转发"
,
isForward
);
dataMap
.
put
(
url
,
data
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment