Commit 644ed491 by zhiwei

修复热搜采集问题

parent 6f72ce80
...@@ -15,6 +15,7 @@ import com.mongodb.Mongo; ...@@ -15,6 +15,7 @@ import com.mongodb.Mongo;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
import com.mongodb.MongoCredential; import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress; import com.mongodb.ServerAddress;
import com.mongodb.WriteResult;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.Config; import com.zhiwei.searchhotcrawler.config.Config;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
...@@ -31,12 +32,12 @@ public class HotSearchListTest{ ...@@ -31,12 +32,12 @@ public class HotSearchListTest{
DB db = mongo.getDB("hot_search_list"); DB db = mongo.getDB("hot_search_list");
DBCollection coll = db.getCollection("hot_search_list2019_09"); DBCollection coll = db.getCollection("hot_search_list2019_09");
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray()); // MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort); // ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew)); // Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
DB dbNew = mongoNew.getDB("hot_search_list"); // DB dbNew = mongoNew.getDB("hot_search_list");
Map<String,String> timLine = TimeParse.getTimeMap("2019-10-02 00:00:00", "2019-10-09 23:59:59", "dd", 1); Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
timLine.forEach((start, end) ->{ timLine.forEach((start, end) ->{
...@@ -47,50 +48,49 @@ public class HotSearchListTest{ ...@@ -47,50 +48,49 @@ public class HotSearchListTest{
String collName = "hot_search_list"+year+"_"+month; String collName = "hot_search_list"+year+"_"+month;
System.out.println("collName=========="+collName); System.out.println("collName=========="+collName);
DBCollection collNew = dbNew.getCollection(collName); // DBCollection collNew = dbNew.getCollection(collName);
// DBObject countIndexDoc = new BasicDBObject();
DBObject countIndexDoc = new BasicDBObject(); // countIndexDoc.put("count", -1);
countIndexDoc.put("count", -1); // DBObject timeIndexDoc = new BasicDBObject();
DBObject timeIndexDoc = new BasicDBObject(); // timeIndexDoc.put("time", -1);
timeIndexDoc.put("time", -1); // DBObject rankIndexDoc = new BasicDBObject();
DBObject rankIndexDoc = new BasicDBObject(); // rankIndexDoc.put("rank", -1);
rankIndexDoc.put("rank", -1); // DBObject nameIndexDoc = new BasicDBObject();
DBObject nameIndexDoc = new BasicDBObject(); // nameIndexDoc.put("name", -1);
nameIndexDoc.put("name", -1); // DBObject typeIndexDoc = new BasicDBObject();
DBObject typeIndexDoc = new BasicDBObject(); // typeIndexDoc.put("type", -1);
typeIndexDoc.put("type", -1); // try {
// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
try { // collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc")); // collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc")); // collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc")); // collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc")); // } catch (Exception e) {
collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc")); // e.printStackTrace();
} catch (Exception e) { // }
e.printStackTrace();
}
DBObject query = new BasicDBObject(new BasicDBObject("time", DBObject query = new BasicDBObject(new BasicDBObject("time",
new BasicDBObject("$gte",startDate).append("$lte", endDate))); new BasicDBObject("$gte",startDate).append("$lte", endDate)));
System.out.println(query); System.out.println(query);
int i = 0; WriteResult wr = coll.remove(query);
DBCursor cur = coll.find(query).skip(i); System.out.println("========"+wr.getN());
System.out.println(query +"======="+ cur.count()); // int i = 0;
List<DBObject> dataList = new ArrayList<>(); // DBCursor cur = coll.remove(query);
while(cur.hasNext()) { // System.out.println(query +"======="+ cur.count());
DBObject doc = cur.next(); // List<DBObject> dataList = new ArrayList<>();
try { // while(cur.hasNext()) {
System.out.println(i+"===="); // DBObject doc = cur.next();
collNew.save(doc); // try {
i++; //// collNew.save(doc);
// i++;
// coll.remove(doc); // coll.remove(doc);
} catch (Exception e2) { // } catch (Exception e2) {
e2.printStackTrace(); // e2.printStackTrace();
} // }
dataList.add(doc); // dataList.add(doc);
} // }
System.out.println(collName +"数据量大小" +dataList.size()); // System.out.println(collName +"数据量大小" +dataList.size());
cur.close(); // cur.close();
// if(!dataList.isEmpty()) { // if(!dataList.isEmpty()) {
// try { // try {
// collNew.insert(dataList); // collNew.insert(dataList);
......
...@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(5); TimeUnit.MINUTES.sleep(5);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
......
...@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(10); TimeUnit.MINUTES.sleep(10);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
......
...@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread { ...@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread {
TimeUnit.MINUTES.sleep(5); TimeUnit.MINUTES.sleep(5);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
......
...@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(1); TimeUnit.MINUTES.sleep(1);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
......
...@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{ ...@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{
TimeUnit.DAYS.sleep(1); TimeUnit.DAYS.sleep(1);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
...@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{ ...@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>(); List<DBObject> data = new ArrayList<>();
for(WeiboTopic topic : list){ for(WeiboTopic topic : list){
System.out.println("topic::::"+topic); logger.info("topic::::{}", topic);
DBObject doc = new BasicDBObject(); DBObject doc = new BasicDBObject();
doc.put("_id", topic.getId()); doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName()); doc.put("name", topic.getTopicName());
......
...@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{
TimeUnit.MINUTES.sleep(10); TimeUnit.MINUTES.sleep(10);
} catch (Exception e) { } catch (Exception e) {
e.fillInStackTrace(); e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
} }
......
#registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182 registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
#group=hangzhou group=hangzhou
######################################################## ########################################################
registry=zookeeper://192.168.0.36:2181 #registry=zookeeper://192.168.0.36:2181
group=local #group=local
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment