Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
90fc68cf
Commit
90fc68cf
authored
Jan 03, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1.添加渠道匹配规则
2.添加英文来源匹配规则
parent
f67a402c
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
9 deletions
+16
-9
pom.xml
+0
-5
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
+15
-3
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+1
-1
No files found.
pom.xml
View file @
90fc68cf
...
@@ -25,15 +25,11 @@
...
@@ -25,15 +25,11 @@
<artifactId>
WebCollector
</artifactId>
<artifactId>
WebCollector
</artifactId>
<version>
2.71
</version>
<version>
2.71
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
us.codecraft
</groupId>
<groupId>
us.codecraft
</groupId>
<artifactId>
webmagic-core
</artifactId>
<artifactId>
webmagic-core
</artifactId>
<version>
0.6.1
</version>
<version>
0.6.1
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
us.codecraft
</groupId>
<groupId>
us.codecraft
</groupId>
<artifactId>
webmagic-extension
</artifactId>
<artifactId>
webmagic-extension
</artifactId>
...
@@ -45,7 +41,6 @@
...
@@ -45,7 +41,6 @@
</exclusion>
</exclusion>
</exclusions>
</exclusions>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
us.codecraft
</groupId>
<groupId>
us.codecraft
</groupId>
<artifactId>
webmagic-saxon
</artifactId>
<artifactId>
webmagic-saxon
</artifactId>
...
...
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardPageProcessor.java
View file @
90fc68cf
...
@@ -66,12 +66,16 @@ public class SourceForwardPageProcessor implements PageProcessor {
...
@@ -66,12 +66,16 @@ public class SourceForwardPageProcessor implements PageProcessor {
*/
*/
private
static
String
verifyChannel
(
String
url
){
private
static
String
verifyChannel
(
String
url
){
String
channel
=
null
;
String
channel
=
null
;
if
(
url
.
contains
(
"news."
)){
if
(
url
.
contains
(
"news."
)
||
url
.
contains
(
"cj.sina.com.cn"
)
||
url
.
contains
(
"wemedia.ifeng.com"
)){
channel
=
"新闻"
;
channel
=
"新闻"
;
}
else
if
(
url
.
contains
(
"finance."
)
||
url
.
contains
(
"business."
)
}
else
if
(
url
.
contains
(
"finance."
)
||
url
.
contains
(
"business."
)
||
url
.
contains
(
"money."
)){
||
url
.
contains
(
"money."
)
||
url
.
contains
(
"stock."
)
||
url
.
contains
(
"10jqka.com.cn"
)){
channel
=
"财经"
;
channel
=
"财经"
;
}
else
if
(
url
.
contains
(
"tech."
)
||
url
.
contains
(
"it."
)){
}
else
if
(
url
.
contains
(
"tech."
)
||
url
.
contains
(
"it."
)
||
url
.
contains
(
"pcedu."
)
||
url
.
contains
(
"mobile."
)
||
url
.
contains
(
"vr."
)){
channel
=
"科技"
;
channel
=
"科技"
;
}
else
if
(
url
.
contains
(
"sports."
)){
}
else
if
(
url
.
contains
(
"sports."
)){
channel
=
"体育"
;
channel
=
"体育"
;
...
@@ -90,6 +94,14 @@ public class SourceForwardPageProcessor implements PageProcessor {
...
@@ -90,6 +94,14 @@ public class SourceForwardPageProcessor implements PageProcessor {
channel
=
"房产"
;
channel
=
"房产"
;
}
else
if
(
url
.
contains
(
"games."
)){
}
else
if
(
url
.
contains
(
"games."
)){
channel
=
"游戏"
;
channel
=
"游戏"
;
}
else
if
(
url
.
contains
(
"intl."
)){
channel
=
"国际"
;
}
else
if
(
url
.
contains
(
"science."
)){
channel
=
"科学"
;
}
else
if
(
url
.
contains
(
"city."
)){
channel
=
"城市"
;
}
else
if
(
url
.
contains
(
"sc."
)){
channel
=
"市场"
;
}
}
return
channel
;
return
channel
;
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
90fc68cf
...
@@ -49,7 +49,7 @@ public class SourceForward {
...
@@ -49,7 +49,7 @@ public class SourceForward {
String
isForward
=
"转发"
;
String
isForward
=
"转发"
;
if
(
root_source
==
null
){
if
(
root_source
==
null
){
isForward
=
"原创"
;
isForward
=
"原创"
;
}
else
if
(
root_source
.
equals
(
source
)){
}
else
if
(
root_source
.
toUpperCase
().
trim
().
equals
(
source
.
toUpperCase
().
trim
()
)){
isForward
=
"原创"
;
isForward
=
"原创"
;
}
}
data
.
put
(
"是否转发"
,
isForward
);
data
.
put
(
"是否转发"
,
isForward
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment