Commit 223c421c by zhiwei

提交数据

parent 893da226
...@@ -33,8 +33,7 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class); ...@@ -33,8 +33,7 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
try { try {
BlockingQueue<String> list = new LinkedBlockingQueue<String>(); BlockingQueue<String> list = new LinkedBlockingQueue<String>();
DBObject query = new BasicDBObject(); DBObject query = new BasicDBObject();
query.put("company", "美赞臣"); DBCursor cur = this.getReadColl().find();
DBCursor cur = this.getReadColl().find(query);
while(cur.hasNext()){ while(cur.hasNext()){
DBObject doc = cur.next(); DBObject doc = cur.next();
list.add(doc.get("word").toString()); list.add(doc.get("word").toString());
...@@ -46,9 +45,6 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class); ...@@ -46,9 +45,6 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
} }
} }
/** /**
* @Title: getWordList * @Title: getWordList
* @author hero * @author hero
...@@ -59,7 +55,13 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class); ...@@ -59,7 +55,13 @@ private static Logger logger = LoggerFactory.getLogger(WordsDao.class);
*/ */
public BlockingQueue<String> getWordList(String company){ public BlockingQueue<String> getWordList(String company){
DBObject query = new BasicDBObject(); DBObject query = new BasicDBObject();
query.put("company", company);
if(company.contains("-")) {
company = company.replace("-", "");
query.put("company", new BasicDBObject("$ne", company));
}else {
query.put("company", company);
}
try { try {
BlockingQueue<String> list = new LinkedBlockingQueue<String>(); BlockingQueue<String> list = new LinkedBlockingQueue<String>();
......
...@@ -12,7 +12,7 @@ public class MainRun { ...@@ -12,7 +12,7 @@ public class MainRun {
} }
public void showTimer() { public void showTimer() {
scheduExec.scheduleAtFixedRate(new SoubaoCrawlerRun(), 1000, 60 * 60 * 1000, TimeUnit.MILLISECONDS); scheduExec.scheduleAtFixedRate(new SoubaoCrawlerRun(), 1, 60*3, TimeUnit.MINUTES);
} }
public static void main(String[] args) { public static void main(String[] args) {
......
...@@ -37,7 +37,12 @@ public class SoubaoCrawlerRun implements Runnable{ ...@@ -37,7 +37,12 @@ public class SoubaoCrawlerRun implements Runnable{
logger.info("采集开始....."); logger.info("采集开始.....");
long s = System.currentTimeMillis(); long s = System.currentTimeMillis();
int thread = 5; int thread = 5;
BlockingQueue<String> wordesQueue = wordsDao.getAllWordList(); //美赞臣数据优先
BlockingQueue<String> wordesQueue = wordsDao.getWordList("美赞臣");
//其他组数据采集关键词
BlockingQueue<String> otherWordQueue = wordsDao.getWordList("-美赞臣");
wordesQueue.addAll(otherWordQueue);
SouBaoCrawlerThread[] souBaoCrawlerThread = new SouBaoCrawlerThread[thread]; SouBaoCrawlerThread[] souBaoCrawlerThread = new SouBaoCrawlerThread[thread];
ExecutorService service = Executors.newFixedThreadPool(2); ExecutorService service = Executors.newFixedThreadPool(2);
for (int i = 0; i < thread; i++) { for (int i = 0; i < thread; i++) {
......
...@@ -35,7 +35,7 @@ public class SouBaoCrawlerThread extends Thread{ ...@@ -35,7 +35,7 @@ public class SouBaoCrawlerThread extends Thread{
String word = wordsQueue.take(); String word = wordsQueue.take();
Proxy proxy = ProxyClientUtil.getProxy(); Proxy proxy = ProxyClientUtil.getProxy();
/***开始采集**/ /***开始采集**/
logger.info("开始采集:::{}搜报网关键词", word); logger.info("开始采集:{}搜报网关键词,目前未采集的关键词为:{}", word, wordsQueue.size());
long s = System.currentTimeMillis(); long s = System.currentTimeMillis();
Crawler.start(1, word, proxy); Crawler.start(1, word, proxy);
long e = System.currentTimeMillis(); long e = System.currentTimeMillis();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment