Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
644ed491
Commit
644ed491
authored
Oct 14, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修复热搜采集问题
parent
6f72ce80
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
56 additions
and
50 deletions
+56
-50
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
+44
-44
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+1
-0
src/main/resources/proxyip.properties
+5
-5
No files found.
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
View file @
644ed491
...
@@ -15,6 +15,7 @@ import com.mongodb.Mongo;
...
@@ -15,6 +15,7 @@ import com.mongodb.Mongo;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.ServerAddress
;
import
com.mongodb.ServerAddress
;
import
com.mongodb.WriteResult
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
...
@@ -31,12 +32,12 @@ public class HotSearchListTest{
...
@@ -31,12 +32,12 @@ public class HotSearchListTest{
DB
db
=
mongo
.
getDB
(
"hot_search_list"
);
DB
db
=
mongo
.
getDB
(
"hot_search_list"
);
DBCollection
coll
=
db
.
getCollection
(
"hot_search_list2019_09"
);
DBCollection
coll
=
db
.
getCollection
(
"hot_search_list2019_09"
);
MongoCredential
credentialNew
=
MongoCredential
.
createCredential
(
"datapush"
,
"admin"
,
"4d8ce5c42073c"
.
toCharArray
());
//
MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
ServerAddress
addressNew
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
//
ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
Mongo
mongoNew
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credentialNew
));
//
Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
DB
dbNew
=
mongoNew
.
getDB
(
"hot_search_list"
);
//
DB dbNew = mongoNew.getDB("hot_search_list");
Map
<
String
,
String
>
timLine
=
TimeParse
.
getTimeMap
(
"2019-10-0
2
00:00:00"
,
"2019-10-09 23:59:59"
,
"dd"
,
1
);
Map
<
String
,
String
>
timLine
=
TimeParse
.
getTimeMap
(
"2019-10-0
1
00:00:00"
,
"2019-10-09 23:59:59"
,
"dd"
,
1
);
timLine
.
forEach
((
start
,
end
)
->{
timLine
.
forEach
((
start
,
end
)
->{
...
@@ -47,50 +48,49 @@ public class HotSearchListTest{
...
@@ -47,50 +48,49 @@ public class HotSearchListTest{
String
collName
=
"hot_search_list"
+
year
+
"_"
+
month
;
String
collName
=
"hot_search_list"
+
year
+
"_"
+
month
;
System
.
out
.
println
(
"collName=========="
+
collName
);
System
.
out
.
println
(
"collName=========="
+
collName
);
DBCollection
collNew
=
dbNew
.
getCollection
(
collName
);
// DBCollection collNew = dbNew.getCollection(collName);
// DBObject countIndexDoc = new BasicDBObject();
DBObject
countIndexDoc
=
new
BasicDBObject
();
// countIndexDoc.put("count", -1);
countIndexDoc
.
put
(
"count"
,
-
1
);
// DBObject timeIndexDoc = new BasicDBObject();
DBObject
timeIndexDoc
=
new
BasicDBObject
();
// timeIndexDoc.put("time", -1);
timeIndexDoc
.
put
(
"time"
,
-
1
);
// DBObject rankIndexDoc = new BasicDBObject();
DBObject
rankIndexDoc
=
new
BasicDBObject
();
// rankIndexDoc.put("rank", -1);
rankIndexDoc
.
put
(
"rank"
,
-
1
);
// DBObject nameIndexDoc = new BasicDBObject();
DBObject
nameIndexDoc
=
new
BasicDBObject
();
// nameIndexDoc.put("name", -1);
nameIndexDoc
.
put
(
"name"
,
-
1
);
// DBObject typeIndexDoc = new BasicDBObject();
DBObject
typeIndexDoc
=
new
BasicDBObject
();
// typeIndexDoc.put("type", -1);
typeIndexDoc
.
put
(
"type"
,
-
1
);
// try {
// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
try
{
// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
collNew
.
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"count_desc"
));
// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
collNew
.
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
collNew
.
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
collNew
.
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
// } catch (Exception e) {
collNew
.
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
// e.printStackTrace();
}
catch
(
Exception
e
)
{
// }
e
.
printStackTrace
();
}
DBObject
query
=
new
BasicDBObject
(
new
BasicDBObject
(
"time"
,
DBObject
query
=
new
BasicDBObject
(
new
BasicDBObject
(
"time"
,
new
BasicDBObject
(
"$gte"
,
startDate
).
append
(
"$lte"
,
endDate
)));
new
BasicDBObject
(
"$gte"
,
startDate
).
append
(
"$lte"
,
endDate
)));
System
.
out
.
println
(
query
);
System
.
out
.
println
(
query
);
int
i
=
0
;
WriteResult
wr
=
coll
.
remove
(
query
);
DBCursor
cur
=
coll
.
find
(
query
).
skip
(
i
);
System
.
out
.
println
(
"========"
+
wr
.
getN
());
System
.
out
.
println
(
query
+
"======="
+
cur
.
count
());
// int i = 0;
List
<
DBObject
>
dataList
=
new
ArrayList
<>();
// DBCursor cur = coll.remove(query);
while
(
cur
.
hasNext
())
{
// System.out.println(query +"======="+ cur.count());
DBObject
doc
=
cur
.
next
();
// List<DBObject> dataList = new ArrayList<>();
try
{
// while(cur.hasNext()) {
System
.
out
.
println
(
i
+
"===="
);
// DBObject doc = cur.next();
collNew
.
save
(
doc
);
// try {
i
++;
//// collNew.save(doc);
// i++;
// coll.remove(doc);
// coll.remove(doc);
}
catch
(
Exception
e2
)
{
//
} catch (Exception e2) {
e2
.
printStackTrace
();
//
e2.printStackTrace();
}
//
}
dataList
.
add
(
doc
);
//
dataList.add(doc);
}
//
}
System
.
out
.
println
(
collName
+
"数据量大小"
+
dataList
.
size
());
//
System.out.println(collName +"数据量大小" +dataList.size());
cur
.
close
();
//
cur.close();
// if(!dataList.isEmpty()) {
// if(!dataList.isEmpty()) {
// try {
// try {
// collNew.insert(dataList);
// collNew.insert(dataList);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
644ed491
...
@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{
...
@@ -29,6 +29,7 @@ public class BaiduHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
5
);
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
644ed491
...
@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{
...
@@ -28,6 +28,7 @@ public class DouyinHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
10
);
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
644ed491
...
@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread {
...
@@ -27,6 +27,7 @@ public class SougoHotSearchRun extends Thread {
TimeUnit
.
MINUTES
.
sleep
(
5
);
TimeUnit
.
MINUTES
.
sleep
(
5
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
644ed491
...
@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{
...
@@ -28,6 +28,7 @@ public class WeiboHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
1
);
TimeUnit
.
MINUTES
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
644ed491
...
@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{
...
@@ -28,6 +28,7 @@ public class WeiboTopicRun extends Thread{
TimeUnit
.
DAYS
.
sleep
(
1
);
TimeUnit
.
DAYS
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{
...
@@ -41,7 +42,7 @@ public class WeiboTopicRun extends Thread{
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
WeiboTopic
topic
:
list
){
for
(
WeiboTopic
topic
:
list
){
System
.
out
.
println
(
"topic::::"
+
topic
);
logger
.
info
(
"topic::::{}"
,
topic
);
DBObject
doc
=
new
BasicDBObject
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
644ed491
...
@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{
...
@@ -27,6 +27,7 @@ public class ZhihuHotSearchRun extends Thread{
TimeUnit
.
MINUTES
.
sleep
(
10
);
TimeUnit
.
MINUTES
.
sleep
(
10
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
}
}
...
...
src/main/resources/proxyip.properties
View file @
644ed491
#
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
registry
=
zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
#
group=hangzhou
group
=
hangzhou
########################################################
########################################################
registry
=
zookeeper://192.168.0.36:2181
#registry=zookeeper://192.168.0.36:2181
group
=
local
#
group
=
local
\ No newline at end of file
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment