Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
soubao_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
soubao_crawler
Commits
223c421c
Commit
223c421c
authored
Jun 29, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提交数据
parent
893da226
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
8 deletions
+15
-8
src/main/java/com/zhiwei/crawler/dao/WordsDao.java
+7
-5
src/main/java/com/zhiwei/crawler/run/MainRun.java
+1
-1
src/main/java/com/zhiwei/crawler/run/SoubaoCrawlerRun.java
+6
-1
src/main/java/com/zhiwei/crawler/soubao/SouBaoCrawlerThread.java
+1
-1
No files found.
src/main/java/com/zhiwei/crawler/dao/WordsDao.java
View file @
223c421c
...
@@ -33,8 +33,7 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
...
@@ -33,8 +33,7 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
try
{
try
{
BlockingQueue
<
String
>
list
=
new
LinkedBlockingQueue
<
String
>();
BlockingQueue
<
String
>
list
=
new
LinkedBlockingQueue
<
String
>();
DBObject
query
=
new
BasicDBObject
();
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"company"
,
"美赞臣"
);
DBCursor
cur
=
this
.
getReadColl
().
find
();
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
);
while
(
cur
.
hasNext
()){
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
DBObject
doc
=
cur
.
next
();
list
.
add
(
doc
.
get
(
"word"
).
toString
());
list
.
add
(
doc
.
get
(
"word"
).
toString
());
...
@@ -46,9 +45,6 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
...
@@ -46,9 +45,6 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
}
}
}
}
/**
/**
* @Title: getWordList
* @Title: getWordList
* @author hero
* @author hero
...
@@ -59,7 +55,13 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
...
@@ -59,7 +55,13 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
*/
*/
public
BlockingQueue
<
String
>
getWordList
(
String
company
){
public
BlockingQueue
<
String
>
getWordList
(
String
company
){
DBObject
query
=
new
BasicDBObject
();
DBObject
query
=
new
BasicDBObject
();
if
(
company
.
contains
(
"-"
))
{
company
=
company
.
replace
(
"-"
,
""
);
query
.
put
(
"company"
,
new
BasicDBObject
(
"$ne"
,
company
));
}
else
{
query
.
put
(
"company"
,
company
);
query
.
put
(
"company"
,
company
);
}
try
{
try
{
BlockingQueue
<
String
>
list
=
new
LinkedBlockingQueue
<
String
>();
BlockingQueue
<
String
>
list
=
new
LinkedBlockingQueue
<
String
>();
...
...
src/main/java/com/zhiwei/crawler/run/MainRun.java
View file @
223c421c
...
@@ -12,7 +12,7 @@ public class MainRun {
...
@@ -12,7 +12,7 @@ public class MainRun {
}
}
public
void
showTimer
()
{
public
void
showTimer
()
{
scheduExec
.
scheduleAtFixedRate
(
new
SoubaoCrawlerRun
(),
1
000
,
60
*
60
*
1000
,
TimeUnit
.
MILLISECOND
S
);
scheduExec
.
scheduleAtFixedRate
(
new
SoubaoCrawlerRun
(),
1
,
60
*
3
,
TimeUnit
.
MINUTE
S
);
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
...
...
src/main/java/com/zhiwei/crawler/run/SoubaoCrawlerRun.java
View file @
223c421c
...
@@ -37,7 +37,12 @@ public class SoubaoCrawlerRun implements Runnable{
...
@@ -37,7 +37,12 @@ public class SoubaoCrawlerRun implements Runnable{
logger
.
info
(
"采集开始....."
);
logger
.
info
(
"采集开始....."
);
long
s
=
System
.
currentTimeMillis
();
long
s
=
System
.
currentTimeMillis
();
int
thread
=
5
;
int
thread
=
5
;
BlockingQueue
<
String
>
wordesQueue
=
wordsDao
.
getAllWordList
();
//美赞臣数据优先
BlockingQueue
<
String
>
wordesQueue
=
wordsDao
.
getWordList
(
"美赞臣"
);
//其他组数据采集关键词
BlockingQueue
<
String
>
otherWordQueue
=
wordsDao
.
getWordList
(
"-美赞臣"
);
wordesQueue
.
addAll
(
otherWordQueue
);
SouBaoCrawlerThread
[]
souBaoCrawlerThread
=
new
SouBaoCrawlerThread
[
thread
];
SouBaoCrawlerThread
[]
souBaoCrawlerThread
=
new
SouBaoCrawlerThread
[
thread
];
ExecutorService
service
=
Executors
.
newFixedThreadPool
(
2
);
ExecutorService
service
=
Executors
.
newFixedThreadPool
(
2
);
for
(
int
i
=
0
;
i
<
thread
;
i
++)
{
for
(
int
i
=
0
;
i
<
thread
;
i
++)
{
...
...
src/main/java/com/zhiwei/crawler/soubao/SouBaoCrawlerThread.java
View file @
223c421c
...
@@ -35,7 +35,7 @@ public class SouBaoCrawlerThread extends Thread{
...
@@ -35,7 +35,7 @@ public class SouBaoCrawlerThread extends Thread{
String
word
=
wordsQueue
.
take
();
String
word
=
wordsQueue
.
take
();
Proxy
proxy
=
ProxyClientUtil
.
getProxy
();
Proxy
proxy
=
ProxyClientUtil
.
getProxy
();
/***开始采集**/
/***开始采集**/
logger
.
info
(
"开始采集:
::{}搜报网关键词"
,
word
);
logger
.
info
(
"开始采集:
{}搜报网关键词,目前未采集的关键词为:{}"
,
word
,
wordsQueue
.
size
()
);
long
s
=
System
.
currentTimeMillis
();
long
s
=
System
.
currentTimeMillis
();
Crawler
.
start
(
1
,
word
,
proxy
);
Crawler
.
start
(
1
,
word
,
proxy
);
long
e
=
System
.
currentTimeMillis
();
long
e
=
System
.
currentTimeMillis
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment