Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
source_forward
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
source_forward
Commits
c799bd00
Commit
c799bd00
authored
Jan 23, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
调整 采集问题(未测试)
parent
16314308
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
43 additions
and
17 deletions
+43
-17
src/main/java/com/zhiwei/source_forward/crawler/ContentCrawler.java
+5
-0
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
+8
-4
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
+8
-4
src/main/java/com/zhiwei/source_forward/crawler/UrlLiveCrawler.java
+14
-5
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
+1
-1
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
+1
-2
src/main/java/com/zhiwei/source_forward/run/URLLive.java
+6
-1
No files found.
src/main/java/com/zhiwei/source_forward/crawler/ContentCrawler.java
View file @
c799bd00
...
@@ -32,9 +32,14 @@ public class ContentCrawler {
...
@@ -32,9 +32,14 @@ public class ContentCrawler {
*/
*/
public
MultiThreadingCounter
submitTask
(
ContentDataCallback
callback
,
public
MultiThreadingCounter
submitTask
(
ContentDataCallback
callback
,
String
...
urls
)
{
String
...
urls
)
{
try
{
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
15
,
TimeUnit
.
MINUTES
,
false
);
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
15
,
TimeUnit
.
MINUTES
,
false
);
start
(
counter
,
callback
,
urls
);
start
(
counter
,
callback
,
urls
);
return
counter
;
return
counter
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" exception {}"
,
e
);
return
null
;
}
}
}
/**
/**
...
...
src/main/java/com/zhiwei/source_forward/crawler/MediaSelfSourceCrawler.java
View file @
c799bd00
...
@@ -44,10 +44,15 @@ public class MediaSelfSourceCrawler {
...
@@ -44,10 +44,15 @@ public class MediaSelfSourceCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
MultiThreadingCounter
submitTask
(
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
throws
Exception
{
public
MultiThreadingCounter
submitTask
(
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
{
try
{
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
"任务======= "
,
15
,
TimeUnit
.
MINUTES
,
true
);
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
"任务======= "
,
15
,
TimeUnit
.
MINUTES
,
true
);
start
(
counter
,
callback
,
urls
);
start
(
counter
,
callback
,
urls
);
return
counter
;
return
counter
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" exception {}"
,
e
);
return
null
;
}
}
}
/**
/**
...
@@ -60,16 +65,15 @@ public class MediaSelfSourceCrawler {
...
@@ -60,16 +65,15 @@ public class MediaSelfSourceCrawler {
private
void
start
(
MultiThreadingCounter
counter
,
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
{
private
void
start
(
MultiThreadingCounter
counter
,
MediaSelfSourceDataCallBack
callback
,
String
...
urls
)
{
if
(
urls
!=
null
&&
urls
.
length
>
0
)
{
if
(
urls
!=
null
&&
urls
.
length
>
0
)
{
for
(
String
url
:
urls
)
{
for
(
String
url
:
urls
)
{
counter
.
increase
();
if
(
url
!=
null
)
{
if
(
url
!=
null
)
{
try
{
try
{
counter
.
increase
();
search
(
counter
,
url
,
Attribution
.
of
(
url
),
callback
);
search
(
counter
,
url
,
Attribution
.
of
(
url
),
callback
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜索创建出错"
,
e
);
logger
.
error
(
"搜索创建出错"
,
e
);
}
finally
{
counter
.
reduce
();
}
}
}
}
counter
.
reduce
();
}
}
}
}
}
}
...
...
src/main/java/com/zhiwei/source_forward/crawler/SourceForwardCrawler.java
View file @
c799bd00
...
@@ -32,25 +32,29 @@ public class SourceForwardCrawler {
...
@@ -32,25 +32,29 @@ public class SourceForwardCrawler {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
List
<
String
>
sourceList
=
SourceData
.
getSourceList
();
private
static
List
<
String
>
sourceList
=
SourceData
.
getSourceList
();
public
MultiThreadingCounter
submitTask
(
SourceForwardDataCallBack
callback
,
String
...
urls
)
throws
Exception
{
public
MultiThreadingCounter
submitTask
(
SourceForwardDataCallBack
callback
,
String
...
urls
)
{
try
{
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
15
,
TimeUnit
.
MINUTES
,
false
);
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
15
,
TimeUnit
.
MINUTES
,
false
);
start
(
counter
,
callback
,
urls
);
start
(
counter
,
callback
,
urls
);
return
counter
;
return
counter
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" exception "
,
e
);
return
null
;
}
}
}
private
void
start
(
MultiThreadingCounter
counter
,
SourceForwardDataCallBack
callback
,
String
...
urls
)
{
private
void
start
(
MultiThreadingCounter
counter
,
SourceForwardDataCallBack
callback
,
String
...
urls
)
{
if
(
urls
!=
null
&&
urls
.
length
>
0
)
{
if
(
urls
!=
null
&&
urls
.
length
>
0
)
{
for
(
String
url
:
urls
)
{
for
(
String
url
:
urls
)
{
counter
.
increase
();
if
(
url
!=
null
)
{
if
(
url
!=
null
)
{
try
{
try
{
counter
.
increase
();
search
(
counter
,
url
,
Attribution
.
of
(
url
),
callback
);
search
(
counter
,
url
,
Attribution
.
of
(
url
),
callback
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜索创建出错"
,
e
);
logger
.
error
(
"搜索创建出错"
,
e
);
}
finally
{
counter
.
reduce
();
}
}
}
}
counter
.
reduce
();
}
}
}
}
}
}
...
...
src/main/java/com/zhiwei/source_forward/crawler/UrlLiveCrawler.java
View file @
c799bd00
package
com
.
zhiwei
.
source_forward
.
crawler
;
package
com
.
zhiwei
.
source_forward
.
crawler
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.Arrays
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -37,22 +39,29 @@ public class UrlLiveCrawler {
...
@@ -37,22 +39,29 @@ public class UrlLiveCrawler {
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
UrlLiveCrawler
.
class
);
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
UrlLiveCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
MultiThreadingCounter
submitTask
(
UrlLiveDataCallback
callback
,
String
...
urls
)
throws
Exception
{
public
MultiThreadingCounter
submitTask
(
UrlLiveDataCallback
callback
,
String
...
urls
)
{
try
{
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
10
,
TimeUnit
.
MINUTES
,
false
);
MultiThreadingCounter
counter
=
new
MultiThreadingCounter
(
10
,
TimeUnit
.
MINUTES
,
false
);
start
(
counter
,
callback
,
urls
);
start
(
counter
,
callback
,
urls
);
return
counter
;
return
counter
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 判断链接是否删除 {} "
,
e
);
return
null
;
}
}
}
private
void
start
(
MultiThreadingCounter
counter
,
UrlLiveDataCallback
callback
,
String
...
urls
)
{
private
void
start
(
MultiThreadingCounter
counter
,
UrlLiveDataCallback
callback
,
String
...
urls
)
{
if
(
urls
!=
null
&&
urls
.
length
>
0
)
{
if
(
nonNull
(
urls
)
&&
urls
.
length
>
0
)
{
for
(
String
url
:
urls
)
{
for
(
String
url
:
urls
)
{
if
(
url
!=
null
)
{
counter
.
increase
();
if
(
nonNull
(
url
))
{
try
{
try
{
search
(
counter
,
url
,
Attribution
.
of
(
url
,
1
),
callback
);
search
(
counter
,
url
,
Attribution
.
of
(
url
,
1
),
callback
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜索创建出错:"
,
e
);
logger
.
error
(
"搜索创建出错:"
,
e
);
}
}
}
}
counter
.
reduce
();
}
}
}
}
}
}
...
@@ -83,7 +92,7 @@ public class UrlLiveCrawler {
...
@@ -83,7 +92,7 @@ public class UrlLiveCrawler {
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析出错"
,
e
);
logger
.
error
(
"解析出错
{}
"
,
e
);
}
finally
{
}
finally
{
if
(
response
!=
null
)
{
if
(
response
!=
null
)
{
response
.
close
();
response
.
close
();
...
@@ -103,7 +112,7 @@ public class UrlLiveCrawler {
...
@@ -103,7 +112,7 @@ public class UrlLiveCrawler {
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
" 数据是否删除 采集出错 {} "
,
e
);
}
finally
{
}
finally
{
counter
.
reduce
();
counter
.
reduce
();
}
}
...
...
src/main/java/com/zhiwei/source_forward/run/MediaSelfSource.java
View file @
c799bd00
...
@@ -47,7 +47,7 @@ public class MediaSelfSource {
...
@@ -47,7 +47,7 @@ public class MediaSelfSource {
};
};
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
}
catch
(
Exception
e
){
}
catch
(
Exception
e
){
e
.
printStackTrace
(
);
logger
.
error
(
" 网媒自媒体号 判断 {} "
,
e
);
}
}
return
list
;
return
list
;
}
}
...
...
src/main/java/com/zhiwei/source_forward/run/SourceForward.java
View file @
c799bd00
...
@@ -62,7 +62,6 @@ public class SourceForward {
...
@@ -62,7 +62,6 @@ public class SourceForward {
dataMap
.
put
(
url
,
data
);
dataMap
.
put
(
url
,
data
);
}
}
}
}
System
.
out
.
println
(
"success"
);
return
dataMap
;
return
dataMap
;
}
}
...
@@ -104,7 +103,7 @@ public class SourceForward {
...
@@ -104,7 +103,7 @@ public class SourceForward {
};
};
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
}
catch
(
Exception
e
){
}
catch
(
Exception
e
){
e
.
printStackTrace
(
);
logger
.
error
(
" 来源判断 出错 {} "
,
e
);
}
}
return
list
;
return
list
;
}
}
...
...
src/main/java/com/zhiwei/source_forward/run/URLLive.java
View file @
c799bd00
...
@@ -6,6 +6,9 @@ import java.util.List;
...
@@ -6,6 +6,9 @@ import java.util.List;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Map.Entry
;
import
java.util.Map.Entry
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.source_forward.bean.UrlLiveBean
;
import
com.zhiwei.source_forward.bean.UrlLiveBean
;
import
com.zhiwei.source_forward.bean.UrlLiveBean.Attribution
;
import
com.zhiwei.source_forward.bean.UrlLiveBean.Attribution
;
import
com.zhiwei.source_forward.crawler.UrlLiveCrawler
;
import
com.zhiwei.source_forward.crawler.UrlLiveCrawler
;
...
@@ -19,6 +22,8 @@ import com.zhiwei.source_forward.util.UrlLiveDataCallback;
...
@@ -19,6 +22,8 @@ import com.zhiwei.source_forward.util.UrlLiveDataCallback;
*/
*/
public
class
URLLive
{
public
class
URLLive
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
URLLive
.
class
);
/**
/**
* @Title: verificationURLLive
* @Title: verificationURLLive
* @author hero
* @author hero
...
@@ -93,7 +98,7 @@ public class URLLive {
...
@@ -93,7 +98,7 @@ public class URLLive {
};
};
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
crawler
.
submitTask
(
callback
,
urlList
.
toArray
(
new
String
[
urlList
.
size
()])).
await
();
}
catch
(
Exception
e
){
}
catch
(
Exception
e
){
e
.
printStackTrace
(
);
logger
.
error
(
" 数据采集运行有问题 {} "
,
e
);
}
}
return
list
;
return
list
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment