Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
644ed491
Commit
644ed491
authored
Oct 14, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修复热搜采集问题
parent
6f72ce80
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
56 additions
and
50 deletions
+56
-50
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
+44
-44
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+1
-0
src/main/resources/proxyip.properties
+5
-5
No files found.
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
View file @
644ed491
...
...
@@ -15,6 +15,7 @@ import com.mongodb.Mongo;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.ServerAddress
;
import
com.mongodb.WriteResult
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.tools.timeparse.TimeParse
;
...
...
@@ -31,12 +32,12 @@ public class HotSearchListTest{
DB
db
=
mongo
.
getDB
(
"hot_search_list"
);
DBCollection
coll
=
db
.
getCollection
(
"hot_search_list2019_09"
);
MongoCredential
credentialNew
=
MongoCredential
.
createCredential
(
"datapush"
,
"admin"
,
"4d8ce5c42073c"
.
toCharArray
());
ServerAddress
addressNew
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
Mongo
mongoNew
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credentialNew
));
DB
dbNew
=
mongoNew
.
getDB
(
"hot_search_list"
);
//
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
//
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
//
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
//
DB dbNew = mongoNew.getDB("hot_search_list");
Map
<
String
,
String
>
timLine
=
TimeParse
.
getTimeMap
(
"2019-10-0
2
00:00:00"
,
"2019-10-09 23:59:59"
,
"dd"
,
1
);
Map
<
String
,
String
>
timLine
=
TimeParse
.
getTimeMap
(
"2019-10-0
1
00:00:00"
,
"2019-10-09 23:59:59"
,
"dd"
,
1
);
timLine
.
forEach
((
start
,
end
)
->{
...
...
@@ -47,50 +48,49 @@ public class HotSearchListTest{
String
collName
=
"hot_search_list"
+
year
+
"_"
+
month
;
System
.
out
.
println
(
"collName=========="
+
collName
);
DBCollection
collNew
=
dbNew
.
getCollection
(
collName
);
DBObject
countIndexDoc
=
new
BasicDBObject
();
countIndexDoc
.
put
(
"count"
,
-
1
);
DBObject
timeIndexDoc
=
new
BasicDBObject
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
DBObject
rankIndexDoc
=
new
BasicDBObject
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
DBObject
nameIndexDoc
=
new
BasicDBObject
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
DBObject
typeIndexDoc
=
new
BasicDBObject
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
collNew
.
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"count_desc"
));
collNew
.
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
collNew
.
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
collNew
.
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
collNew
.
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
// DBCollection collNew = dbNew.getCollection(collName);
// DBObject countIndexDoc = new BasicDBObject();
// countIndexDoc.put("count", -1);
// DBObject timeIndexDoc = new BasicDBObject();
// timeIndexDoc.put("time", -1);
// DBObject rankIndexDoc = new BasicDBObject();
// rankIndexDoc.put("rank", -1);
// DBObject nameIndexDoc = new BasicDBObject();
// nameIndexDoc.put("name", -1);
// DBObject typeIndexDoc = new BasicDBObject();
// typeIndexDoc.put("type", -1);
// try {
// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
// } catch (Exception e) {
// e.printStackTrace();
// }
DBObject
query
=
new
BasicDBObject
(
new
BasicDBObject
(
"time"
,
new
BasicDBObject
(
"$gte"
,
startDate
).
append
(
"$lte"
,
endDate
)));
System
.
out
.
println
(
query
);
int
i
=
0
;
DBCursor
cur
=
coll
.
find
(
query
).
skip
(
i
);
System
.
out
.
println
(
query
+
"======="
+
cur
.
count
());
List
<
DBObject
>
dataList
=
new
ArrayList
<>();
while
(
cur
.
hasNext
())
{
DBObject
doc
=
cur
.
next
();
try
{
System
.
out
.
println
(
i
+
"===="
);
collNew
.
save
(
doc
);
i
++;
WriteResult
wr
=
coll
.
remove
(
query
);
System
.
out
.
println
(
"========"
+
wr
.
getN
());
// int i = 0;
// DBCursor cur = coll.remove(query);
// System.out.println(query +"======="+ cur.count());
// List<DBObject> dataList = new ArrayList<>();
// while(cur.hasNext()) {
// DBObject doc = cur.next();
// try {
//// collNew.save(doc);
// i++;
// coll.remove(doc);
}
catch
(
Exception
e2
)
{
e2
.
printStackTrace
();
}
dataList
.
add
(
doc
);
}
System
.
out
.
println
(
collName
+
"数据量大小"
+
dataList
.
size
());
cur
.
close
();
//
} catch (Exception e2) {
//
e2.printStackTrace();
//
}
//
dataList.add(doc);
//
}
//
System.out.println(collName +"数据量大小" +dataList.size());
//
cur.close();
// if(!dataList.isEmpty()) {
// try {
// collNew.insert(dataList);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
644ed491
...
...
@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
644ed491
...
...
@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
644ed491
...
...
@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread {
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
644ed491
...
...
@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
644ed491
...
...
@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{
TimeUnit
.
DAYS
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
WeiboTopic
topic
:
list
){
System
.
out
.
println
(
"topic::::"
+
topic
);
logger
.
info
(
"topic::::{}"
,
topic
);
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
644ed491
...
...
@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
...
...
src/main/resources/proxyip.properties
View file @
644ed491
#
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
#
group=hangzhou
registry
=
zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
group
=
hangzhou
########################################################
registry
=
zookeeper://192.168.0.36:2181
group
=
local
\ No newline at end of file
#registry=zookeeper://192.168.0.36:2181
#
group
=
local
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment