Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
37ac4e23
Commit
37ac4e23
authored
Dec 21, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提交修改后版本
parent
bde825dd
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
12 additions
and
98 deletions
+12
-98
pom.xml
+1
-1
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+1
-1
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
+1
-3
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+8
-11
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+1
-82
No files found.
pom.xml
View file @
37ac4e23
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
source-forward
</artifactId>
<artifactId>
source-forward
</artifactId>
<version>
0.0.
7
-SNAPSHOT
</version>
<version>
0.0.
8
-SNAPSHOT
</version>
<name>
source-forward
</name>
<name>
source-forward
</name>
<description>
验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)
</description>
<description>
验证网媒的转发关系及链接的有效性(转发验证微信及自媒体匹配率不高)
</description>
...
...
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
View file @
37ac4e23
...
@@ -45,7 +45,7 @@ public class MediaSelfSourceCrawler {
...
@@ -45,7 +45,7 @@ public class MediaSelfSourceCrawler {
* @throws Exception
* @throws Exception
*/
*/
public
MultiThreadingCounter
submitTask
(
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
throws
Exception
{
public
MultiThreadingCounter
submitTask
(
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
throws
Exception
{
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
"任务======= "
,
15
,
TimeUnit
.
SECOND
S
,
true
);
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
"任务======= "
,
15
,
TimeUnit
.
MINUTE
S
,
true
);
start
(
counter
,
callback
,
urls
);
start
(
counter
,
callback
,
urls
);
return
counter
;
return
counter
;
}
}
...
...
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
View file @
37ac4e23
...
@@ -100,11 +100,9 @@ public class SourceForwardCrawler {
...
@@ -100,11 +100,9 @@ public class SourceForwardCrawler {
isforward
=
"未知"
;
isforward
=
"未知"
;
}
}
}
else
if
(
attr
.
get
().
toString
().
contains
(
"www.toutiao.com"
)){
}
else
if
(
attr
.
get
().
toString
().
contains
(
"www.toutiao.com"
)){
if
(
body
.
contains
(
"isOriginal"
)){
if
(
body
.
contains
(
"isOriginal"
)
&&
body
.
contains
(
"isOriginal: true"
)){
if
(
body
.
contains
(
"isOriginal: true"
)){
isforward
=
"原创"
;
isforward
=
"原创"
;
}
}
}
}
else
{
}
else
{
channel
=
MatchChannel
.
verifyChannel
(
attr
.
get
().
toString
());
channel
=
MatchChannel
.
verifyChannel
(
attr
.
get
().
toString
());
if
(
channel
==
null
){
if
(
channel
==
null
){
...
...
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
View file @
37ac4e23
...
@@ -7,8 +7,6 @@ import java.util.List;
...
@@ -7,8 +7,6 @@ import java.util.List;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.source_forward.bean.MediaSelfSourceBean
;
import
com.zhiwei.source_forward.bean.MediaSelfSourceBean
;
import
com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution
;
import
com.zhiwei.source_forward.bean.MediaSelfSourceBean.Attribution
;
import
com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler
;
import
com.zhiwei.source_forward.crawler.MediaSelfSourceCrawler
;
...
@@ -19,18 +17,17 @@ public class MediaSelfSource {
...
@@ -19,18 +17,17 @@ public class MediaSelfSource {
private
static
Logger
logger
=
LogManager
.
getLogger
(
MediaSelfSource
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
MediaSelfSource
.
class
);
public
static
List
<
MediaSelfSourceBean
>
getMediaSelfSource
(
List
<
String
>
urlList
)
{
public
static
List
<
MediaSelfSourceBean
>
getMediaSelfSource
(
List
<
String
>
urlList
)
{
List
<
MediaSelfSourceBean
>
list
=
MediaSelfSourceCrawlerThread
.
getMediaSelfSource
(
urlList
);
return
MediaSelfSourceCrawlerThread
.
getMediaSelfSource
(
urlList
);
return
list
;
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List
<
String
>
urlList
=
new
ArrayList
<>();
//
List<String> urlList = new ArrayList<>();
urlList
.
add
(
"http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1"
);
//
urlList.add("http://sh.qihoo.com/pc/91d1d565fe552fa1e?sign=360_e39369d1");
List
<
MediaSelfSourceBean
>
u
=
MediaSelfSource
.
getMediaSelfSource
(
urlList
);
//
List<MediaSelfSourceBean> u = MediaSelfSource.getMediaSelfSource(urlList);
for
(
MediaSelfSourceBean
b
:
u
)
{
//
for(MediaSelfSourceBean b : u) {
System
.
out
.
println
(
b
.
toString
());
//
System.out.println(b.toString());
}
//
}
}
}
static
class
MediaSelfSourceCrawlerThread
extends
Thread
{
static
class
MediaSelfSourceCrawlerThread
extends
Thread
{
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
37ac4e23
...
@@ -2,7 +2,6 @@ package com.zhiwei.source_forward.run;
...
@@ -2,7 +2,6 @@ package com.zhiwei.source_forward.run;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Map.Entry
;
import
java.util.Map.Entry
;
...
@@ -10,11 +9,9 @@ import java.util.Map.Entry;
...
@@ -10,11 +9,9 @@ import java.util.Map.Entry;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.source_forward.bean.MediaSelfSourceBean
;
import
com.zhiwei.source_forward.bean.SourceForwardBean
;
import
com.zhiwei.source_forward.bean.SourceForwardBean
;
import
com.zhiwei.source_forward.bean.SourceForwardBean.Attribution
;
import
com.zhiwei.source_forward.bean.SourceForwardBean.Attribution
;
import
com.zhiwei.source_forward.crawler.SourceForwardCrawler
;
import
com.zhiwei.source_forward.crawler.SourceForwardCrawler
;
import
com.zhiwei.source_forward.run.MediaSelfSource.MediaSelfSourceCrawlerThread
;
import
com.zhiwei.source_forward.util.SourceForwardDataCallBack
;
import
com.zhiwei.source_forward.util.SourceForwardDataCallBack
;
/**
/**
...
@@ -28,82 +25,6 @@ public class SourceForward {
...
@@ -28,82 +25,6 @@ public class SourceForward {
private
static
Logger
logger
=
LogManager
.
getLogger
(
SourceForward
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
SourceForward
.
class
);
/**
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体号名称
* @param @param dataMap
* @param @return 设定文件
* @return Map<String,Map<String,Object>> 返回类型
*/
public
static
Map
<
String
,
Map
<
String
,
Object
>>
getMediaSelfSource
(
Map
<
String
,
Map
<
String
,
Object
>>
dataMap
){
//启动验证来源程序
List
<
String
>
urlList
=
new
ArrayList
<>();
for
(
Entry
<
String
,
Map
<
String
,
Object
>>
entry
:
dataMap
.
entrySet
()){
urlList
.
add
(
entry
.
getKey
());
}
List
<
MediaSelfSourceBean
>
sourceForwardList
=
MediaSelfSourceCrawlerThread
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
msfb
:
sourceForwardList
){
String
url
=
msfb
.
getUrl
();
//整合数据及验证转发原创
if
(
dataMap
.
containsKey
(
url
)){
Map
<
String
,
Object
>
data
=
dataMap
.
get
(
url
);
data
.
put
(
"自媒体号"
,
msfb
.
getMediaself
());
data
.
put
(
"频道"
,
msfb
.
getChannel
());
dataMap
.
put
(
url
,
data
);
}
}
return
dataMap
;
}
/**
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param urlList
* @param @return 设定文件
* @return Map<String,String> 返回类型
*/
public
static
Map
<
String
,
String
>
getMediaSelfSource
(
List
<
String
>
urlList
){
//启动验证来源程序
Map
<
String
,
String
>
dataMap
=
new
HashMap
<>();
for
(
String
url
:
urlList
){
dataMap
.
put
(
url
,
null
);
}
List
<
MediaSelfSourceBean
>
sourceForwardList
=
MediaSelfSourceCrawlerThread
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
mssb
:
sourceForwardList
){
String
url
=
mssb
.
getUrl
();
//整合数据及验证转发原创
if
(
dataMap
.
containsKey
(
url
)){
dataMap
.
put
(
url
,
mssb
.
getMediaself
());
}
}
return
dataMap
;
}
/**
*
* @Title: getMediaSelfSource
* @author hero
* @Description: 根据链接匹配自媒体账号
* @param @param url
* @param @return 设定文件
* @return String 返回类型
*/
public
static
String
getMediaSelfSource
(
String
url
){
//启动验证来源程序
List
<
String
>
urlList
=
new
ArrayList
<>();
urlList
.
add
(
url
);
List
<
MediaSelfSourceBean
>
sourceForwardList
=
MediaSelfSourceCrawlerThread
.
getMediaSelfSource
(
urlList
);
for
(
MediaSelfSourceBean
sourceMap
:
sourceForwardList
){
return
sourceMap
.
getMediaself
();
}
return
null
;
}
/**
* @Title: getSourceForward
* @Title: getSourceForward
* @author hero
* @author hero
* @Description: 验证文章是否转发
* @Description: 验证文章是否转发
...
@@ -117,9 +38,7 @@ public class SourceForward {
...
@@ -117,9 +38,7 @@ public class SourceForward {
for
(
Entry
<
String
,
Map
<
String
,
Object
>>
entry
:
dataMap
.
entrySet
()){
for
(
Entry
<
String
,
Map
<
String
,
Object
>>
entry
:
dataMap
.
entrySet
()){
urlList
.
add
(
entry
.
getKey
());
urlList
.
add
(
entry
.
getKey
());
}
}
System
.
out
.
println
(
urlList
.
size
());
List
<
SourceForwardBean
>
dataList
=
SourceForwardCrawlerThread
.
getSourceForward
(
urlList
);
List
<
SourceForwardBean
>
dataList
=
SourceForwardCrawlerThread
.
getSourceForward
(
urlList
);
System
.
out
.
println
(
dataList
.
size
());
for
(
SourceForwardBean
sfb
:
dataList
){
for
(
SourceForwardBean
sfb
:
dataList
){
String
url
=
sfb
.
getUrl
();
String
url
=
sfb
.
getUrl
();
String
root_source
=
sfb
.
getRoot_source
();
String
root_source
=
sfb
.
getRoot_source
();
...
@@ -161,7 +80,7 @@ public class SourceForward {
...
@@ -161,7 +80,7 @@ public class SourceForward {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// List<String> urlList = new ArrayList<>();
// List<String> urlList = new ArrayList<>();
// urlList.add("http
://www.toutiao.com/a6452936157751968013/
");
// urlList.add("http
s://www.toutiao.com/a6634320415839748621
");
// List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
// List<SourceForwardBean> da = SourceForward.getSourceForward(urlList);
// for(SourceForwardBean sfb : da) {
// for(SourceForwardBean sfb : da) {
// System.out.println(sfb.toString());
// System.out.println(sfb.toString());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment