Commit 644ed491 by zhiwei

修复热搜采集问题

parent 6f72ce80
......@@ -15,6 +15,7 @@ import com.mongodb.Mongo;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.WriteResult;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.tools.timeparse.TimeParse;
......@@ -31,12 +32,12 @@ public class HotSearchListTest{
DB db = mongo.getDB("hot_search_list");
DBCollection coll = db.getCollection("hot_search_list2019_09");
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
DB dbNew = mongoNew.getDB("hot_search_list");
// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
// DB dbNew = mongoNew.getDB("hot_search_list");
Map<String,String> timLine = TimeParse.getTimeMap("2019-10-02 00:00:00", "2019-10-09 23:59:59", "dd", 1);
Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
timLine.forEach((start, end) ->{
......@@ -47,50 +48,49 @@ public class HotSearchListTest{
String collName = "hot_search_list"+year+"_"+month;
System.out.println("collName=========="+collName);
DBCollection collNew = dbNew.getCollection(collName);
DBObject countIndexDoc = new BasicDBObject();
countIndexDoc.put("count", -1);
DBObject timeIndexDoc = new BasicDBObject();
timeIndexDoc.put("time", -1);
DBObject rankIndexDoc = new BasicDBObject();
rankIndexDoc.put("rank", -1);
DBObject nameIndexDoc = new BasicDBObject();
nameIndexDoc.put("name", -1);
DBObject typeIndexDoc = new BasicDBObject();
typeIndexDoc.put("type", -1);
try {
collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
} catch (Exception e) {
e.printStackTrace();
}
// DBCollection collNew = dbNew.getCollection(collName);
// DBObject countIndexDoc = new BasicDBObject();
// countIndexDoc.put("count", -1);
// DBObject timeIndexDoc = new BasicDBObject();
// timeIndexDoc.put("time", -1);
// DBObject rankIndexDoc = new BasicDBObject();
// rankIndexDoc.put("rank", -1);
// DBObject nameIndexDoc = new BasicDBObject();
// nameIndexDoc.put("name", -1);
// DBObject typeIndexDoc = new BasicDBObject();
// typeIndexDoc.put("type", -1);
// try {
// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
// } catch (Exception e) {
// e.printStackTrace();
// }
DBObject query = new BasicDBObject(new BasicDBObject("time",
new BasicDBObject("$gte",startDate).append("$lte", endDate)));
System.out.println(query);
int i = 0;
DBCursor cur = coll.find(query).skip(i);
System.out.println(query +"======="+ cur.count());
List<DBObject> dataList = new ArrayList<>();
while(cur.hasNext()) {
DBObject doc = cur.next();
try {
System.out.println(i+"====");
collNew.save(doc);
i++;
WriteResult wr = coll.remove(query);
System.out.println("========"+wr.getN());
// int i = 0;
// DBCursor cur = coll.remove(query);
// System.out.println(query +"======="+ cur.count());
// List<DBObject> dataList = new ArrayList<>();
// while(cur.hasNext()) {
// DBObject doc = cur.next();
// try {
//// collNew.save(doc);
// i++;
// coll.remove(doc);
} catch (Exception e2) {
e2.printStackTrace();
}
dataList.add(doc);
}
System.out.println(collName +"数据量大小" +dataList.size());
cur.close();
// } catch (Exception e2) {
// e2.printStackTrace();
// }
// dataList.add(doc);
// }
// System.out.println(collName +"数据量大小" +dataList.size());
// cur.close();
// if(!dataList.isEmpty()) {
// try {
// collNew.insert(dataList);
......
......@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
......
......@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
......
......@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread {
TimeUnit.MINUTES.sleep(5);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
......
......@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000);
}
ZhiWeiTools.sleep(50);
}
......
......@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{
TimeUnit.DAYS.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
......@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){
System.out.println("topic::::"+topic);
logger.info("topic::::{}", topic);
DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
......
......@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(10);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
......
#registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
#group=hangzhou
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
group=hangzhou
########################################################
registry=zookeeper://192.168.0.36:2181
group=local
\ No newline at end of file
#registry=zookeeper://192.168.0.36:2181
#group=local
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment