Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
soubao_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
soubao_crawler
Commits
95487743
Commit
95487743
authored
Oct 18, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
因修改采集核心包版本,修改相应的方法
parent
f09faf1a
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
6 additions
and
15 deletions
+6
-15
pom.xml
+2
-7
src/main/java/com/zhiwei/crawler/run/MainRun.java
+2
-3
src/main/java/com/zhiwei/crawler/soubao/Crawler.java
+0
-0
src/main/java/com/zhiwei/crawler/soubao/SouBaoCrawlerThread.java
+2
-5
No files found.
pom.xml
View file @
95487743
...
@@ -25,22 +25,17 @@
...
@@ -25,22 +25,17 @@
<version>
3.8.1
</version>
<version>
3.8.1
</version>
</dependency>
</dependency>
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
proxy-client
</artifactId>
<version>
0.0.2-RELEASE
</version>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.0.
5
-SNAPSHOT
</version>
<version>
0.0.
8
-SNAPSHOT
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
cleaner-unified-urlfilter
</artifactId>
<artifactId>
cleaner-unified-urlfilter
</artifactId>
<version>
1.0.
0
.RELEASE
</version>
<version>
1.0.
6
.RELEASE
</version>
</dependency>
</dependency>
<dependency>
<dependency>
...
...
src/main/java/com/zhiwei/crawler/run/MainRun.java
View file @
95487743
...
@@ -4,9 +4,9 @@ import java.util.concurrent.Executors;
...
@@ -4,9 +4,9 @@ import java.util.concurrent.Executors;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.config.ProxyConfig
;
import
com.zhiwei.crawler.config.ProxyConfig
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.proxy.common.Definition.GroupType
;
public
class
MainRun
{
public
class
MainRun
{
private
ScheduledExecutorService
scheduExec
;
private
ScheduledExecutorService
scheduExec
;
...
@@ -21,8 +21,7 @@ public class MainRun {
...
@@ -21,8 +21,7 @@ public class MainRun {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
/** 初始化代理IP **/
/** 初始化代理IP **/
ProxyFactory
.
init
(
ProxyConfig
.
registry
,
ProxyConfig
.
group
,
GroupType
.
PROVIDER
,
ProxyFactory
.
init
(
ProxyConfig
.
registry
,
ProxyConfig
.
group
,
GroupType
.
PROVIDER
);
ProxyFactory:
:
getNatProxy
);
new
MainRun
().
showTimer
();
new
MainRun
().
showTimer
();
}
}
...
...
src/main/java/com/zhiwei/crawler/soubao/Crawler.java
View file @
95487743
This diff is collapsed.
Click to expand it.
src/main/java/com/zhiwei/crawler/soubao/SouBaoCrawlerThread.java
View file @
95487743
package
com
.
zhiwei
.
crawler
.
soubao
;
package
com
.
zhiwei
.
crawler
.
soubao
;
import
java.net.Proxy
;
import
java.util.concurrent.BlockingQueue
;
import
java.util.concurrent.BlockingQueue
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.crawler.proxy.Proxy
Factory
;
import
com.zhiwei.crawler.proxy.Proxy
Holder
;
import
com.zhiwei.crawler.run.SoubaoCrawlerRun
;
import
com.zhiwei.crawler.run.SoubaoCrawlerRun
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -19,7 +18,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
...
@@ -19,7 +18,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
SouBaoCrawlerThread
extends
Thread
{
public
class
SouBaoCrawlerThread
extends
Thread
{
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
SoubaoCrawlerRun
.
class
);
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
SoubaoCrawlerRun
.
class
);
private
BlockingQueue
<
String
>
wordsQueue
;
private
BlockingQueue
<
String
>
wordsQueue
;
public
SouBaoCrawlerThread
(
BlockingQueue
<
String
>
wordsQueue
)
{
public
SouBaoCrawlerThread
(
BlockingQueue
<
String
>
wordsQueue
)
{
this
.
wordsQueue
=
wordsQueue
;
this
.
wordsQueue
=
wordsQueue
;
}
}
...
@@ -30,11 +28,10 @@ public class SouBaoCrawlerThread extends Thread{
...
@@ -30,11 +28,10 @@ public class SouBaoCrawlerThread extends Thread{
while
(
wordsQueue
!=
null
&&
wordsQueue
.
size
()>
0
){
while
(
wordsQueue
!=
null
&&
wordsQueue
.
size
()>
0
){
try
{
try
{
String
word
=
wordsQueue
.
take
();
String
word
=
wordsQueue
.
take
();
Proxy
proxy
=
ProxyFactory
.
proxyCallback
().
getProxy
();
/***开始采集**/
/***开始采集**/
logger
.
info
(
"开始采集:{}搜报网关键词,目前未采集的关键词为:{}"
,
word
,
wordsQueue
.
size
());
logger
.
info
(
"开始采集:{}搜报网关键词,目前未采集的关键词为:{}"
,
word
,
wordsQueue
.
size
());
long
s
=
System
.
currentTimeMillis
();
long
s
=
System
.
currentTimeMillis
();
Crawler
.
start
(
1
,
word
,
proxy
);
Crawler
.
start
(
1
,
word
,
ProxyHolder
.
NAT_PROXY
.
getProxy
()
);
long
e
=
System
.
currentTimeMillis
();
long
e
=
System
.
currentTimeMillis
();
logger
.
info
(
"采集:::{}搜报网关键词结束,采集所用时间为:{}"
,
word
,
(
e
-
s
));
logger
.
info
(
"采集:::{}搜报网关键词结束,采集所用时间为:{}"
,
word
,
(
e
-
s
));
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment