Commit 223c421c by zhiwei

提交数据

parent 893da226
......@@ -33,8 +33,7 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
try {
BlockingQueue<String> list = new LinkedBlockingQueue<String>();
DBObject query = new BasicDBObject();
query.put("company", "美赞臣");
DBCursor cur = this.getReadColl().find(query);
DBCursor cur = this.getReadColl().find();
while(cur.hasNext()){
DBObject doc = cur.next();
list.add(doc.get("word").toString());
......@@ -46,9 +45,6 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
}
}
/**
* @Title: getWordList
* @author hero
......@@ -59,7 +55,13 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
*/
public BlockingQueue<String> getWordList(String company){
DBObject query = new BasicDBObject();
if(company.contains("-")) {
company = company.replace("-", "");
query.put("company", new BasicDBObject("$ne", company));
}else {
query.put("company", company);
}
try {
BlockingQueue<String> list = new LinkedBlockingQueue<String>();
......
......@@ -12,7 +12,7 @@ public class MainRun {
}
public void showTimer() {
scheduExec.scheduleAtFixedRate(new SoubaoCrawlerRun(), 1000, 60 * 60 * 1000, TimeUnit.MILLISECONDS);
scheduExec.scheduleAtFixedRate(new SoubaoCrawlerRun(), 1, 60*3, TimeUnit.MINUTES);
}
public static void main(String[] args) {
......
......@@ -37,7 +37,12 @@ public class SoubaoCrawlerRun implements Runnable{
logger.info("采集开始.....");
long s = System.currentTimeMillis();
int thread = 5;
BlockingQueue<String> wordesQueue = wordsDao.getAllWordList();
//美赞臣数据优先
BlockingQueue<String> wordesQueue = wordsDao.getWordList("美赞臣");
//其他组数据采集关键词
BlockingQueue<String> otherWordQueue = wordsDao.getWordList("-美赞臣");
wordesQueue.addAll(otherWordQueue);
SouBaoCrawlerThread[] souBaoCrawlerThread = new SouBaoCrawlerThread[thread];
ExecutorService service = Executors.newFixedThreadPool(2);
for (int i = 0; i < thread; i++) {
......
......@@ -35,7 +35,7 @@ public class SouBaoCrawlerThread extends Thread{
String word = wordsQueue.take();
Proxy proxy = ProxyClientUtil.getProxy();
/***开始采集**/
logger.info("开始采集:::{}搜报网关键词", word);
logger.info("开始采集:{}搜报网关键词,目前未采集的关键词为:{}", word, wordsQueue.size());
long s = System.currentTimeMillis();
Crawler.start(1, word, proxy);
long e = System.currentTimeMillis();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment