Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
9ce337bb
Commit
9ce337bb
authored
Mar 10, 2020
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加总热搜表缓存+mongo修改为多节点读取+微博话题类型错误修复
parent
dd95f27b
Show whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
712 additions
and
794 deletions
+712
-794
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchCache.java
+100
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+1
-6
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
+5
-12
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+9
-8
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+126
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+21
-104
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
+0
-67
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboSuperTopicDAO.java
+17
-47
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
+77
-75
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+7
-6
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
+0
-123
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+122
-125
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+124
-127
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+6
-2
src/main/java/com/zhiwei/searchhotcrawler/timer/UpdateWechatUserRun.java
+47
-50
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+6
-9
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
+3
-7
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+9
-3
src/main/resources/db.properties
+8
-8
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchCache.java
0 → 100644
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
lombok.Data
;
import
lombok.ToString
;
import
java.util.Date
;
@ToString
@Data
public
class
HotSearchCache
{
/**
* 主键
*/
private
String
id
;
/**
* 消息链接
*/
private
String
url
;
/**
* 热搜关键词,且为消息主键
*/
private
String
name
;
/**
* 热搜或话题导语
*/
private
String
topicLead
;
/**
* 最高热搜值
*/
private
Integer
highestCount
;
/**
* 最新热搜热度值
*/
private
Integer
lastCount
;
/**
* 状态(true 为热搜; false为时时上升)
*/
private
Boolean
hot
;
/**
* 话题开始时间
*/
private
Date
startTime
;
/**
* 话题结束时间
*/
private
Date
endTime
;
/**
* 最高排名
*/
private
Integer
highestRank
;
/**
* 最新排名
*/
private
Integer
lastRank
;
/**
* 热搜分类
*/
private
String
type
;
/**
* 热搜持续时长
*/
private
Integer
duration
;
public
HotSearchCache
(
String
url
,
String
name
,
String
topicLead
,
Integer
highestCount
,
Integer
lastCount
,
Boolean
hot
,
Date
startTime
,
Date
endTime
,
Integer
highestRank
,
Integer
lastRank
,
String
type
,
Integer
duration
){
this
.
id
=
name
+
"_"
+
type
;
this
.
url
=
url
;
this
.
name
=
name
;
this
.
topicLead
=
topicLead
;
this
.
hot
=
hot
;
this
.
highestCount
=
highestCount
;
this
.
lastCount
=
lastCount
;
this
.
hot
=
hot
;
this
.
startTime
=
startTime
;
this
.
endTime
=
endTime
;
this
.
highestRank
=
highestRank
;
this
.
lastRank
=
lastRank
;
this
.
type
=
type
;
this
.
duration
=
duration
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
View file @
9ce337bb
...
...
@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
private
String
topicLead
;
/**
*
时时
热搜量
* 热搜量
*/
private
Integer
count
;
...
...
@@ -60,11 +60,6 @@ public class HotSearchList implements Serializable{
private
Date
time
;
/**
* 据上分钟变化量
*/
private
Integer
changeCount
;
/**
* 排名
*/
private
Integer
rank
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
→
src/main/java/com/zhiwei/searchhotcrawler/config/
DB
Config.java
View file @
9ce337bb
...
...
@@ -3,7 +3,7 @@ package com.zhiwei.searchhotcrawler.config;
import
java.io.InputStream
;
import
java.util.Properties
;
public
class
Config
{
public
class
DB
Config
{
static
{
Properties
conf
=
null
;
try
{
...
...
@@ -12,29 +12,22 @@ public class Config {
conf
=
new
Properties
();
conf
.
load
(
is
);
is
.
close
();
mongoIp
=
conf
.
getProperty
(
"mongoIp"
);
mongoPort
=
Integer
.
valueOf
(
conf
.
getProperty
(
"mongoPort"
));
userName
=
conf
.
getProperty
(
"db.username"
);
userPwd
=
conf
.
getProperty
(
"db.paasword"
);
authDB
=
conf
.
getProperty
(
"db.certifiedDB"
);
mongoUri
=
conf
.
getProperty
(
"mongoUri"
);
dbName
=
conf
.
getProperty
(
"dbName"
);
searchCollName
=
conf
.
getProperty
(
"searchCollName"
);
searchCacheCollName
=
conf
.
getProperty
(
"searchCacheCollName"
);
topicCollName
=
conf
.
getProperty
(
"topicCollName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
public
static
String
mongoIp
;
public
static
int
mongoPort
;
public
static
String
userName
;
public
static
String
userPwd
;
public
static
String
authDB
;
public
static
String
mongoUri
;
public
static
String
dbName
;
public
static
String
searchCollName
;
public
static
String
searchCacheCollName
;
public
static
String
topicCollName
;
public
static
String
collWechatUserName
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.*
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -107,6 +103,8 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型
*/
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
for
(
int
count
=
0
;
count
<=
5
;
count
++){
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
...
...
@@ -123,9 +121,10 @@ public class WeiboHotSearchCrawler {
try
{
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONArray
cardGroup
=
card
.
getJSONArray
(
"card_group"
);
if
(
Objects
.
nonNull
(
cardGroup
)
&&
!
cardGroup
.
isEmpty
()){
String
title
=
card
.
getString
(
"title"
);
boolean
hot
=
true
;
if
(
title
.
contains
(
"实时上升热点"
)){
if
(
Objects
.
nonNull
(
title
)
&&
title
.
contains
(
"实时上升热点"
)){
hot
=
false
;
rank
=
50
;
}
...
...
@@ -142,6 +141,9 @@ public class WeiboHotSearchCrawler {
result
.
add
(
hotSearch
);
rank
++;
}
}
else
{
log
.
info
(
"card 数据结构为:{}"
,
card
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
continue
;
...
...
@@ -150,14 +152,13 @@ public class WeiboHotSearchCrawler {
return
result
;
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
);
return
Collections
.
emptyList
();
}
}
else
{
log
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
}
}
catch
(
IOException
e1
)
{
log
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
);
return
Collections
.
emptyList
();
}
}
return
Collections
.
emptyList
();
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
9ce337bb
...
...
@@ -61,7 +61,7 @@ public class WeiboSuperTopicCrawler {
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
try
{
System
.
out
.
println
(
"pageUrl=========="
+
pageUrl
);
//
System.out.println("pageUrl=========="+pageUrl);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
,
headMap
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"desc1"
))
{
topicList
.
addAll
(
parseTopicRankHtml
(
page
,
htmlBody
,
type
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
9ce337bb
...
...
@@ -134,7 +134,7 @@ public class WeiboTopicCrawler {
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
try
{
log
.
info
(
"pageUrl::{}"
,
pageUrl
);
//
log.info("pageUrl::{}", pageUrl);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"top_mark_text"
))
{
topicList
.
addAll
(
parseTopicHtml
(
htmlBody
));
...
...
@@ -202,7 +202,7 @@ public class WeiboTopicCrawler {
}
return
topicList
;
}
else
{
log
.
info
(
"html:{}"
,
htmlBody
);
//
log.info("html:{}",htmlBody);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"解析榜单列表页面时出现错误,错误为:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
0 → 100644
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.client.MongoCollection
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Objects
;
/**
* 热搜基础结果表
*/
@Log4j2
public
class
HotSearchCacheDAO
{
private
static
MongoCollection
collection
=
MongoDBTemplate
.
getCollection
(
DBConfig
.
dbName
,
DBConfig
.
searchCacheCollName
);
/**
* 添加及更新相应数据表中的数据
* @param document
*/
public
void
addAndUpdateData
(
Document
document
){
try
{
String
name
=
document
.
getString
(
"name"
);
String
type
=
document
.
getString
(
"type"
);
int
lastRank
=
document
.
getInteger
(
"rank"
)!=
null
?
document
.
getInteger
(
"rank"
):
-
1
;
int
lastCount
=
document
.
getInteger
(
"count"
)!=
null
?
document
.
getInteger
(
"count"
):
-
1
;
Date
startTime
=
document
.
getDate
(
"time"
);
Date
endTime
=
new
Date
(
startTime
.
getTime
()
+
(
60
*
1000
));
String
topicLead
=
document
.
getString
(
"topic_lead"
)!=
null
?
document
.
getString
(
"topic_lead"
):
null
;
boolean
hot
=
document
.
getBoolean
(
"hot"
)!=
null
?
document
.
getBoolean
(
"hot"
):
true
;
String
url
=
document
.
getString
(
"url"
)!=
null
?
document
.
getString
(
"url"
):
null
;
String
id
=
name
+
"_"
+
type
;
Document
query
=
new
Document
(
"_id"
,
id
);
Document
nowDoc
=
(
Document
)
collection
.
find
(
query
).
first
();
if
(
Objects
.
nonNull
(
nowDoc
))
{
int
highestRank
=
nowDoc
.
getInteger
(
"highestRank"
);
int
highestCount
=
nowDoc
.
getInteger
(
"highestCount"
);
//判断最大热度值
if
(
lastCount
>
highestCount
)
{
highestCount
=
lastCount
;
}
//判断最高排名
if
(
lastRank
<
highestRank
)
{
highestRank
=
lastRank
;
}
//计算热搜时长
int
duration
=
nowDoc
.
getInteger
(
"duration"
);
int
durationNow
=
getDuration
(
type
,
duration
);
endTime
=
new
Date
(
new
Date
().
getTime
()
+
(
60
*
1000
));
//更新相应信息
nowDoc
.
put
(
"endTime"
,
endTime
);
nowDoc
.
put
(
"lastRank"
,
lastRank
);
nowDoc
.
put
(
"lastCount"
,
lastCount
);
nowDoc
.
put
(
"highestRank"
,
highestRank
);
nowDoc
.
put
(
"highestCount"
,
highestCount
);
nowDoc
.
put
(
"duration"
,
durationNow
);
collection
.
replaceOne
(
query
,
nowDoc
);
}
else
{
nowDoc
=
new
Document
();
int
durationNow
=
getDuration
(
type
,
0
);
nowDoc
.
put
(
"_id"
,
id
);
nowDoc
.
put
(
"url"
,
url
);
nowDoc
.
put
(
"name"
,
name
);
nowDoc
.
put
(
"hot"
,
hot
);
nowDoc
.
put
(
"topicLead"
,
topicLead
);
nowDoc
.
put
(
"type"
,
type
);
nowDoc
.
put
(
"lastRank"
,
lastRank
);
nowDoc
.
put
(
"highestRank"
,
lastRank
);
nowDoc
.
put
(
"lastCount"
,
lastCount
);
nowDoc
.
put
(
"highestCount"
,
lastCount
);
nowDoc
.
put
(
"startTime"
,
startTime
);
nowDoc
.
put
(
"endTime"
,
endTime
);
nowDoc
.
put
(
"duration"
,
durationNow
);
collection
.
insertOne
(
nowDoc
);
}
}
catch
(
Exception
e
){
log
.
info
(
"数据存储时出错:{}"
,
e
);
}
}
/**
* 计算热搜时长
* @param type
* @param duration
* @return
*/
private
int
getDuration
(
String
type
,
int
duration
){
switch
(
type
){
case
"微博热搜"
:
duration
=
duration
+
1
;
break
;
case
"百度热搜"
:
duration
=
duration
+
5
;
break
;
case
"知乎热搜"
:
duration
=
duration
+
10
;
break
;
case
"抖音热搜"
:
duration
=
duration
+
10
;
break
;
case
"搜狗微信热搜"
:
duration
=
duration
+
5
;
break
;
case
"微博话题"
:
duration
=
duration
+
3
;
break
;
default
:
duration
=
duration
+
1
;
}
return
duration
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
View file @
9ce337bb
...
...
@@ -6,7 +6,12 @@ import java.util.Date;
import
java.util.List
;
import
java.util.Objects
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.mongodb.client.ListIndexesIterable
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.mongodb.client.model.IndexOptions
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
...
...
@@ -14,138 +19,50 @@ import com.mongodb.DBCursor;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.cache.CacheManager
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
org.bson.BsonDocument
;
import
org.bson.Document
;
import
org.bson.conversions.Bson
;
@Log4j2
public
class
HotSearchListDAO
extends
MongoDBTemplate
{
public
class
HotSearchListDAO
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
HotSearchListDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
year
=
time
.
substring
(
0
,
4
);
String
month
=
time
.
substring
(
5
,
7
);
String
collName
=
Config
.
searchCollName
+
year
+
"_"
+
month
;
super
.
setCollName
(
collName
);
String
collName
=
DB
Config
.
searchCollName
+
year
+
"_"
+
month
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
createIndex
(
);
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
/**
* 初次创建表及创建相应的索引
*/
private
void
createIndex
(){
List
<
DBObject
>
indexList
=
this
.
getReadColl
().
getIndexInfo
();
if
(
Objects
.
isNull
(
indexList
)
&&
indexList
.
isEmpty
()){
DBObject
countIndexDoc
=
new
BasicDBObject
();
countIndexDoc
.
put
(
"count"
,
-
1
);
DBObject
timeIndexDoc
=
new
BasicDBObject
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
DBObject
rankIndexDoc
=
new
BasicDBObject
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
DBObject
nameIndexDoc
=
new
BasicDBObject
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
DBObject
typeIndexDoc
=
new
BasicDBObject
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
super
.
getReadColl
().
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"count_desc"
));
super
.
getReadColl
().
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
super
.
getReadColl
().
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
super
.
getReadColl
().
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
super
.
getReadColl
().
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
/**
* 添加数据入库
* @param list
*/
public
void
addHotSearchList
(
List
<
DBObject
>
list
){
try
{
this
.
getReadColl
().
insert
(
list
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
public
void
addHotSearch
(
DBObject
doc
){
try
{
this
.
getReadColl
().
insert
(
doc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
/**
* 查询据上次变化量
* @Title: getChangeCount
* @author hero
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
HotSearchList
weiboHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"name"
,
weiboHotSearch
.
getName
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
public
void
addHotSearchList
(
List
<
Document
>
list
){
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
if
(
doc
.
get
(
"count"
)!=
null
)
{
result
=
weiboHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
break
;
}
}
cur
.
close
();
mongoCollection
.
insertMany
(
list
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
return
result
;
}
return
result
;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public
List
<
DBObject
>
getHotOneHour
(
String
type
){
List
<
DBObject
>
list
=
new
ArrayList
<>();
Date
date
=
new
Date
((
new
Date
().
getTime
()-
60
*
60
*
1000
));
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"time"
,
new
BasicDBObject
(
"$gte"
,
date
));
query
.
put
(
"changeCount"
,
0
);
query
.
put
(
"type"
,
type
);
public
void
addHotSearch
(
Document
doc
){
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
String
name
=
doc
.
get
(
"name"
).
toString
();
if
(
CacheManager
.
getCacheByKey
(
name
)==
null
){
CacheManager
.
putCache
(
name
,
doc
,
48
*
60
*
60
*
1000
);
list
.
add
(
doc
);
}
}
cur
.
close
();
mongoCollection
.
insertOne
(
doc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
deleted
100644 → 0
View file @
dd95f27b
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.Collections
;
import
java.util.List
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
@Log4j2
public
class
WechatUserDao
extends
MongoDBTemplate
{
public
WechatUserDao
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
super
.
setCollName
(
Config
.
collWechatUserName
);
}
/**
* 添加分组用户
* @param userlist
* @param groupName
* @param groupId
*/
public
void
addWechatUser
(
List
<
String
>
userlist
,
String
groupName
,
Integer
groupId
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
groupId
+
"-"
+
groupName
);
doc
.
put
(
"groupId"
,
groupId
);
doc
.
put
(
"groupName"
,
groupName
);
doc
.
put
(
"user"
,
userlist
);
this
.
getReadColl
().
save
(
doc
);
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
/**
* 根据分组名称查询分组用户
* @param group
* @return
*/
@SuppressWarnings
(
"unchecked"
)
public
List
<
String
>
getWechatUserByGroup
(
String
group
){
try
{
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"groupName"
,
group
);
DBObject
doc
=
this
.
getReadColl
().
findOne
(
query
);
if
(
doc
!=
null
){
return
(
List
<
String
>)
doc
.
get
(
"user"
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
return
Collections
.
emptyList
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboSuperTopicDAO.java
View file @
9ce337bb
...
...
@@ -3,76 +3,46 @@ package com.zhiwei.searchhotcrawler.dao;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Objects
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
org.bson.Document
;
@Log4j2
public
class
WeiboSuperTopicDAO
extends
MongoDBTemplate
{
public
class
WeiboSuperTopicDAO
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
WeiboSuperTopicDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
year
=
time
.
substring
(
0
,
4
);
String
month
=
time
.
substring
(
5
,
7
);
String
collName
=
Config
.
topic
CollName
+
year
+
"_"
+
month
;
super
.
setCollName
(
collName
);
String
collName
=
DBConfig
.
search
CollName
+
year
+
"_"
+
month
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
createIndex
();
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
/**
* 初次创建表及创建相应的索引
*/
private
void
createIndex
(){
List
<
DBObject
>
indexList
=
this
.
getReadColl
().
getIndexInfo
();
if
(
Objects
.
isNull
(
indexList
)
&&
indexList
.
isEmpty
()){
DBObject
countIndexDoc
=
new
BasicDBObject
();
countIndexDoc
.
put
(
"score_num"
,
-
1
);
DBObject
timeIndexDoc
=
new
BasicDBObject
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
DBObject
rankIndexDoc
=
new
BasicDBObject
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
DBObject
nameIndexDoc
=
new
BasicDBObject
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
DBObject
typeIndexDoc
=
new
BasicDBObject
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
super
.
getReadColl
().
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"score_desc"
));
super
.
getReadColl
().
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
super
.
getReadColl
().
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
super
.
getReadColl
().
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
super
.
getReadColl
().
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
/**
* 添加数据入库
* @param list
*/
public
void
addTopicList
(
List
<
D
BObjec
t
>
list
){
public
void
addTopicList
(
List
<
D
ocumen
t
>
list
){
try
{
this
.
getReadColl
().
insert
(
list
);
mongoCollection
.
insertMany
(
list
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
public
void
addTopic
(
D
BObjec
t
doc
){
public
void
addTopic
(
D
ocumen
t
doc
){
try
{
this
.
getReadColl
().
insert
(
doc
);
mongoCollection
.
insertOne
(
doc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
dbtemplate
;
import
java.util.Arrays
;
import
com.mongodb.DB
;
import
com.mongodb.DBCollection
;
import
com.mongodb.Mongo
;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.MongoException
;
import
com.mongodb.ServerAddress
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.mongodb.MongoClientOptions
;
import
com.mongodb.MongoClientURI
;
import
com.mongodb.WriteConcern
;
import
com.mongodb.client.ListIndexesIterable
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.mongodb.client.model.IndexOptions
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.Objects
;
@Log4j2
public
enum
MongoDBTemplate
{
instance
;
private
MongoClient
mongoClient
;
static
{
MongoClientOptions
options
=
new
MongoClientOptions
.
Builder
()
.
connectionsPerHost
(
300
)
//连接池设置为300个连接,默认为100
.
connectTimeout
(
15000
)
//连接超时,推荐>3000毫秒
.
maxWaitTime
(
5000
)
.
socketTimeout
(
0
)
// 套接字超时时间,0无限制
.
threadsAllowedToBlockForConnectionMultiplier
(
5000
)
// 线程队列数,如果连接线程排满了队列就会抛出“Out of semaphores to get db”错误。
.
writeConcern
(
WriteConcern
.
W1
)
//
.
build
();
log
.
info
(
"MongoDBTemplate.static initializer : {}"
,
DBConfig
.
mongoUri
);
MongoClientURI
mongoClientURI
=
new
MongoClientURI
(
DBConfig
.
mongoUri
);
instance
.
mongoClient
=
new
MongoClient
(
mongoClientURI
);
}
/**
/**
* 获取DB实例 - 指定DB
*
* @Description: MongoDB模板类
* @author Tou Tang
* @date 2014-11-14 下午3:24:40
* @param databaseName
* @return
*/
public
class
MongoDBTemplate
{
protected
static
Mongo
reader
;
protected
static
Mongo
writer
;
protected
String
collName
;
protected
String
dbName
;
@SuppressWarnings
(
"deprecation"
)
public
MongoDBTemplate
()
{
try
{
MongoCredential
credential
=
MongoCredential
.
createCredential
(
Config
.
userName
,
Config
.
authDB
,
Config
.
userPwd
.
toCharArray
());
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
if
(
reader
==
null
)
{
reader
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// reader = new MongoClient(address);
}
if
(
writer
==
null
)
{
writer
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// writer = new MongoClient(address);
}
}
catch
(
MongoException
e
)
{
e
.
printStackTrace
();
}
}
public
DBCollection
getReadColl
()
{
@SuppressWarnings
(
"deprecation"
)
final
DB
db
=
getReader
().
getDB
(
dbName
);
final
DBCollection
coll
=
db
.
getCollection
(
collName
);
return
coll
;
}
protected
Mongo
getReader
()
{
return
reader
;
public
static
MongoDatabase
getDB
(
String
databaseName
)
{
return
instance
.
mongoClient
.
getDatabase
(
databaseName
);
}
public
DBCollection
getWriteColl
()
{
@SuppressWarnings
(
"deprecation"
)
final
DB
db
=
getWriter
().
getDB
(
dbName
);
final
DBCollection
coll
=
db
.
getCollection
(
collName
);
return
coll
;
}
protected
Mongo
getWriter
()
{
return
writer
;
/**
* 获取collection对象 - 指定Collection
*
* @param databaseName
* @param collectionName
* @return
*/
public
static
MongoCollection
<
Document
>
getCollection
(
String
databaseName
,
String
collectionName
)
{
MongoDatabase
db
=
instance
.
mongoClient
.
getDatabase
(
databaseName
);
return
db
.
getCollection
(
collectionName
)
;
}
protected
void
setCollName
(
final
String
collName
)
{
this
.
collName
=
collName
;
}
protected
void
setDbName
(
final
String
dbName
)
{
this
.
dbName
=
dbName
;
}
@SuppressWarnings
(
"static-access"
)
protected
void
setReader
(
final
Mongo
reader
)
{
this
.
reader
=
reader
;
/**
* 创建索引
* @param databaseName
* @param collectionName
*/
public
static
void
createIndex
(
String
databaseName
,
String
collectionName
){
MongoDatabase
db
=
instance
.
mongoClient
.
getDatabase
(
databaseName
);
MongoCollection
mongoCollection
=
db
.
getCollection
(
collectionName
);
ListIndexesIterable
<
Document
>
indexList
=
mongoCollection
.
listIndexes
();
if
(
Objects
.
isNull
(
indexList
)){
Document
countIndexDoc
=
new
Document
();
countIndexDoc
.
put
(
"score_num"
,
-
1
);
Document
timeIndexDoc
=
new
Document
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
Document
rankIndexDoc
=
new
Document
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
Document
nameIndexDoc
=
new
Document
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
Document
typeIndexDoc
=
new
Document
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
mongoCollection
.
createIndex
(
countIndexDoc
,
new
IndexOptions
().
name
(
"count_desc"
));
mongoCollection
.
createIndex
(
timeIndexDoc
,
new
IndexOptions
().
name
(
"time_desc"
));
mongoCollection
.
createIndex
(
rankIndexDoc
,
new
IndexOptions
().
name
(
"rank_desc"
));
mongoCollection
.
createIndex
(
nameIndexDoc
,
new
IndexOptions
().
name
(
"name_desc"
));
mongoCollection
.
createIndex
(
typeIndexDoc
,
new
IndexOptions
().
name
(
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
@SuppressWarnings
(
"static-access"
)
protected
void
setWriter
(
final
Mongo
writer
)
{
this
.
writer
=
writer
;
}
public
static
void
main
(
String
[]
args
)
{
}
}
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
9ce337bb
...
...
@@ -20,9 +20,12 @@ public class HotSearchRun {
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
ProxyFactory
.
init
(
simpleConfig
);
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
new
CacheListener
().
startListen
();
// new UpdateWechatUserRun().start();
// ZhiWeiTools.sleep(10000);
// new CacheListener().startListen();
//推送程序启动
// new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
// ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
//
...
...
@@ -47,8 +50,6 @@ public class HotSearchRun {
new
ZhihuHotSearchRun
().
start
();
new
WeiboSuperTopicRun
().
start
();
new
WeiboTopicRun
().
start
();
//推送程序启动
new
SendWeiboHotSearchRun
().
start
();
new
SendZhihuHotSearchRun
().
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
deleted
100644 → 0
View file @
dd95f27b
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
public
class
HotSearchListTest
{
public
static
void
main
(
String
[]
args
)
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"zzw"
).
build
();
ProxyFactory
.
init
(
simpleConfig
);
String
url
=
"http://app.myzaker.com/news/app.php?f="
;
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
Elements
elements
=
Jsoup
.
parse
(
htmlBody
).
select
(
"div.titlebar>a"
);
for
(
Element
element
:
elements
){
String
lableUrl
=
"http://app.myzaker.com/news/app.php"
+
element
.
attr
(
"href"
);
System
.
out
.
println
(
"lableUrl========="
+
lableUrl
);
String
htmlBodyLable
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
lableUrl
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
Elements
elementsLable
=
Jsoup
.
parse
(
htmlBodyLable
).
select
(
"div#infinite_scroll>a"
);
for
(
Element
elementLable
:
elementsLable
){
System
.
out
.
println
(
elementLable
.
attr
(
"href"
)
+
"============="
+
elementLable
.
text
());
}
}
}
catch
(
Exception
e
){
e
.
printStackTrace
();
}
// MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
// ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
// Mongo mongo = new MongoClient(address, Arrays.asList(credential));
//
// DB db = mongo.getDB("hot_search_list");
// DBCollection coll = db.getCollection("hot_search_list2019_09");
//
//// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
//// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
//// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
//// DB dbNew = mongoNew.getDB("hot_search_list");
//
// Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
//
// timLine.forEach((start, end) ->{
//
// String year = end.substring(0,4);
// String month = end.substring(5,7);
// Date startDate = TimeParse.stringFormartDate(start);
// Date endDate = TimeParse.stringFormartDate(end);
//
// String collName = "hot_search_list"+year+"_"+month;
// System.out.println("collName=========="+collName);
//// DBCollection collNew = dbNew.getCollection(collName);
//// DBObject countIndexDoc = new BasicDBObject();
//// countIndexDoc.put("count", -1);
//// DBObject timeIndexDoc = new BasicDBObject();
//// timeIndexDoc.put("time", -1);
//// DBObject rankIndexDoc = new BasicDBObject();
//// rankIndexDoc.put("rank", -1);
//// DBObject nameIndexDoc = new BasicDBObject();
//// nameIndexDoc.put("name", -1);
//// DBObject typeIndexDoc = new BasicDBObject();
//// typeIndexDoc.put("type", -1);
//// try {
//// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
//// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
//// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
//// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
//// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//
// DBObject query = new BasicDBObject(new BasicDBObject("time",
// new BasicDBObject("$gte",startDate).append("$lte", endDate)));
// System.out.println(query);
// WriteResult wr = coll.remove(query);
// System.out.println("========"+wr.getN());
//// int i = 0;
//// DBCursor cur = coll.remove(query);
//// System.out.println(query +"======="+ cur.count());
//// List<DBObject> dataList = new ArrayList<>();
//// while(cur.hasNext()) {
//// DBObject doc = cur.next();
//// try {
////// collNew.save(doc);
//// i++;
//// coll.remove(doc);
//// } catch (Exception e2) {
//// e2.printStackTrace();
//// }
//// dataList.add(doc);
//// }
//// System.out.println(collName +"数据量大小" +dataList.size());
//// cur.close();
//// if(!dataList.isEmpty()) {
//// try {
//// collNew.insert(dataList);
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//// }
// });
// mongo.close();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
9ce337bb
...
...
@@ -6,7 +6,9 @@ import java.util.List;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -39,23 +41,24 @@ public class BaiduHotSearchRun extends Thread{
private
void
getHotList
()
{
log
.
info
(
"百度风云榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
saveDataList
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
saveDataList
=
new
ArrayList
<>();
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
list
.
forEach
(
baiduHotSearch
->{
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
baiduHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"name"
,
baiduHotSearch
.
getName
());
doc
.
put
(
"url"
,
baiduHotSearch
.
getUrl
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getType
());
saveDataList
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
});
}
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
9ce337bb
...
...
@@ -5,7 +5,9 @@ import java.util.Date;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -42,23 +44,24 @@ public class DouyinHotSearchRun extends Thread{
private
void
getHotList
()
{
log
.
info
(
"抖音热搜榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
log
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
douyinHotSearch
:
list
){
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
douyinHotSearch
);
DBObject
douyin
=
new
BasicDBObject
();
Document
douyin
=
new
Document
();
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"hot"
,
douyinHotSearch
.
getHot
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"type"
,
douyinHotSearch
.
getType
());
data
.
add
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
hotSearchCacheDAO
.
addAndUpdateData
(
douyin
);
}
log
.
info
(
"抖音热搜榜采集结束........"
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.Calendar
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
@Log4j2
public
class
SendWeiboHotSearchRun
extends
Thread
{
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
@Override
public
void
run
()
{
while
(
true
)
{
try
{
Calendar
calendar
=
Calendar
.
getInstance
();
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
log
.
info
(
"微博推送,当前系统时间为:"
+
hour
);
if
(
hour
>
6
&&
hour
<
23
)
{
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
微博热搜
.
name
());
if
(
list
!=
null
&&
!
list
.
isEmpty
())
{
for
(
DBObject
weibo
:
list
)
{
String
title
=
weibo
.
get
(
"name"
).
toString
();
String
time
=
TimeParse
.
dateFormartString
((
Date
)
weibo
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
String
url
=
weibo
.
get
(
"url"
).
toString
();
sendTemplateByUserIds
(
title
,
time
,
url
);
}
}
else
{
log
.
info
(
"微博最近一小时无数据"
);
sendTemplateByUserIds
(
"最近一小时无数据"
,
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd HH:mm:ss"
),
null
);
}
}
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
}
catch
(
Exception
e
)
{
log
.
debug
(
"微博热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
continue
;
}
}
}
/**
* @Title: sendTemplateByUserIds
* @author hero
* @Description: 发送模版消息
* @param @param
* microTouTiao
* @param @param
* userList 设定文件
* @return void 返回类型
*/
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
JSONObject
first
=
new
JSONObject
();
first
.
put
(
"value"
,
"您好,有一条来自微博热搜榜的预警通知。"
);
dataMap
.
put
(
"first"
,
first
);
JSONObject
keyword1
=
new
JSONObject
();
keyword1
.
put
(
"value"
,
title
);
keyword1
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword1"
,
keyword1
);
JSONObject
keyword2
=
new
JSONObject
();
keyword2
.
put
(
"value"
,
"微博热搜榜"
);
keyword2
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword2"
,
keyword2
);
JSONObject
keyword3
=
new
JSONObject
();
keyword3
.
put
(
"value"
,
time
);
keyword3
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword3"
,
keyword3
);
JSONObject
remark
=
new
JSONObject
();
remark
.
put
(
"value"
,
"知微情报监测服务"
);
dataMap
.
put
(
"remark"
,
remark
);
List
<
String
>
userList
=
getUserList
();
if
(
userList
!=
null
&&
userList
.
size
()
>
0
)
{
for
(
String
openId
:
userList
)
{
Template
template
=
new
Template
();
template
.
setTouser
(
openId
);
if
(
url
!=
null
)
{
template
.
setUrl
(
url
);
}
template
.
setTemplate_id
(
WechatConstant
.
WECHAT_TEMPLATEID_EARLY_IT
);
template
.
setData
(
dataMap
);
JSONObject
templateJson
=
(
JSONObject
)
JSONObject
.
toJSON
(
template
);
WechatCodeUtil
.
sendDataJson
(
templateJson
);
}
}
else
{
log
.
info
(
"拉取微博用户列表失败"
);
}
}
/**
* @Title: getUserList
* @author hero
* @Description: 用户列表
* @param @param
* projectName
* @param @return
* 设定文件
* @return List<String> 返回类型
*/
public
static
List
<
String
>
getUserList
()
{
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"weibohot"
);
if
(
userList
==
null
){
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"weibohot"
);
}
return
userList
;
}
}
//package com.zhiwei.searchhotcrawler.timer;
//
//import java.util.Calendar;
//import java.util.Date;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//
//import lombok.extern.log4j.Log4j2;
//
//import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
//import com.zhiwei.searchhotcrawler.bean.HotSearchType;
//import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
//import com.zhiwei.searchhotcrawler.util.Template;
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
//import com.zhiwei.searchhotcrawler.util.WechatConstant;
//import com.zhiwei.tools.timeparse.TimeParse;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//@Log4j2
//public class SendWeiboHotSearchRun extends Thread {
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// private static WechatUserDao wechatUserDao = new WechatUserDao();
// @Override
// public void run() {
// while (true) {
// try {
// Calendar calendar = Calendar.getInstance();
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
// log.info("微博推送,当前系统时间为:" + hour);
// if (hour > 6 && hour < 23) {
// List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
// if (list != null && !list.isEmpty()) {
// for (DBObject weibo : list) {
// String title = weibo.get("name").toString();
// String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
// String url = weibo.get("url").toString();
// sendTemplateByUserIds(title, time, url);
// }
// } else {
// log.info("微博最近一小时无数据");
// sendTemplateByUserIds("最近一小时无数据",
// TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
// }
// }
// ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
// } catch (Exception e) {
// log.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace());
// ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
// continue;
// }
// }
// }
//
// /**
// * @Title: sendTemplateByUserIds
// * @author hero
// * @Description: 发送模版消息
// * @param @param
// * microTouTiao
// * @param @param
// * userList 设定文件
// * @return void 返回类型
// */
// public static void sendTemplateByUserIds(String title, String time, String url) {
// Map<String, Object> dataMap = new HashMap<String, Object>();
// JSONObject first = new JSONObject();
// first.put("value", "您好,有一条来自微博热搜榜的预警通知。");
// dataMap.put("first", first);
// JSONObject keyword1 = new JSONObject();
// keyword1.put("value", title);
// keyword1.put("color", "#173177");
// dataMap.put("keyword1", keyword1);
// JSONObject keyword2 = new JSONObject();
// keyword2.put("value", "微博热搜榜");
// keyword2.put("color", "#173177");
// dataMap.put("keyword2", keyword2);
// JSONObject keyword3 = new JSONObject();
// keyword3.put("value", time);
// keyword3.put("color", "#173177");
// dataMap.put("keyword3", keyword3);
// JSONObject remark = new JSONObject();
// remark.put("value", "知微情报监测服务");
// dataMap.put("remark", remark);
// List<String> userList = getUserList();
// if (userList != null && userList.size() > 0) {
// for (String openId : userList) {
// Template template = new Template();
// template.setTouser(openId);
// if (url != null) {
// template.setUrl(url);
// }
// template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
// template.setData(dataMap);
//
// JSONObject templateJson = (JSONObject) JSONObject.toJSON(template);
// WechatCodeUtil.sendDataJson(templateJson);
// }
// } else {
// log.info("拉取微博用户列表失败");
// }
// }
//
// /**
// * @Title: getUserList
// * @author hero
// * @Description: 用户列表
// * @param @param
// * projectName
// * @param @return
// * 设定文件
// * @return List<String> 返回类型
// */
// public static List<String> getUserList() {
// List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot");
// if(userList==null){
// userList = WechatCodeUtil.getUserListByGroupName("weibohot");
// }
// return userList;
// }
//}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.Calendar
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
@Log4j2
public
class
SendZhihuHotSearchRun
extends
Thread
{
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
@Override
public
void
run
()
{
while
(
true
)
{
try
{
Calendar
calendar
=
Calendar
.
getInstance
();
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
log
.
info
(
"知乎推送,当前系统时间为:"
+
hour
);
if
(
hour
>
6
&&
hour
<
23
){
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
知乎热搜
.
name
());
if
(
list
!=
null
&&
!
list
.
isEmpty
()){
for
(
DBObject
zhihu
:
list
){
String
title
=
zhihu
.
get
(
"display_query"
).
toString
();
String
time
=
TimeParse
.
dateFormartString
((
Date
)
zhihu
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
String
url
=
zhihu
.
get
(
"_id"
).
toString
();
if
(
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
>
6
&&
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
<
23
){
sendTemplateByUserIds
(
title
,
time
,
url
);
}
}
}
else
{
log
.
info
(
"知乎最近一小时无数据"
);
sendTemplateByUserIds
(
"最近一小时无数据"
,
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd HH:mm:ss"
),
null
);
}
}
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
}
catch
(
Exception
e
)
{
log
.
debug
(
"知乎热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
}
}
}
/**
* @Title: sendTemplateByUserIds
* @author hero
* @Description: 发送模版消息
* @param @param microTouTiao
* @param @param userList 设定文件
* @return void 返回类型
*/
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
JSONObject
first
=
new
JSONObject
();
first
.
put
(
"value"
,
"您好,有一条来自知乎热搜榜的预警通知。"
);
dataMap
.
put
(
"first"
,
first
);
JSONObject
keyword1
=
new
JSONObject
();
keyword1
.
put
(
"value"
,
title
);
keyword1
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword1"
,
keyword1
);
JSONObject
keyword2
=
new
JSONObject
();
keyword2
.
put
(
"value"
,
"知乎热搜榜"
);
keyword2
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword2"
,
keyword2
);
JSONObject
keyword3
=
new
JSONObject
();
keyword3
.
put
(
"value"
,
time
);
keyword3
.
put
(
"color"
,
"#173177"
);
dataMap
.
put
(
"keyword3"
,
keyword3
);
JSONObject
remark
=
new
JSONObject
();
remark
.
put
(
"value"
,
"知微情报监测服务"
);
dataMap
.
put
(
"remark"
,
remark
);
List
<
String
>
userList
=
getUserList
();
if
(
userList
!=
null
&&
!
userList
.
isEmpty
())
{
for
(
String
openId
:
userList
)
{
Template
template
=
new
Template
();
template
.
setTouser
(
openId
);
if
(
url
!=
null
){
template
.
setUrl
(
url
);
}
template
.
setTemplate_id
(
WechatConstant
.
WECHAT_TEMPLATEID_EARLY_IT
);
template
.
setData
(
dataMap
);
JSONObject
templateJson
=
(
JSONObject
)
JSONObject
.
toJSON
(
template
);
WechatCodeUtil
.
sendDataJson
(
templateJson
);
}
}
else
{
log
.
info
(
"知乎推送拉取用户列表失败"
);
}
}
/**
* @Title: getUserList
* @author hero
* @Description: 用户列表
* @param @param projectName
* @param @return 设定文件
* @return List<String> 返回类型
*/
private
static
List
<
String
>
getUserList
()
{
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"LP组"
);
if
(
userList
==
null
){
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"LP组"
);
}
return
userList
;
}
}
//package com.zhiwei.searchhotcrawler.timer;
//
//import java.util.Calendar;
//import java.util.Date;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//
//import lombok.extern.log4j.Log4j2;
//
//import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
//import com.zhiwei.searchhotcrawler.bean.HotSearchType;
//import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
//import com.zhiwei.searchhotcrawler.util.Template;
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
//import com.zhiwei.searchhotcrawler.util.WechatConstant;
//import com.zhiwei.tools.timeparse.TimeParse;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//@Log4j2
//public class SendZhihuHotSearchRun extends Thread{
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// private static WechatUserDao wechatUserDao = new WechatUserDao();
// @Override
// public void run() {
//
// while(true) {
// try {
// Calendar calendar = Calendar.getInstance();
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
// log.info("知乎推送,当前系统时间为:"+hour);
// if(hour > 6 && hour <23){
// List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
// if(list!=null && !list.isEmpty()){
// for(DBObject zhihu : list){
// String title = zhihu.get("display_query").toString();
// String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
// String url = zhihu.get("_id").toString();
// if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){
// sendTemplateByUserIds(title, time, url);
// }
// }
// }else{
// log.info("知乎最近一小时无数据");
// sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
// }
// }
// ZhiWeiTools.sleep(1*60*60*1000);
// } catch (Exception e) {
// log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
// ZhiWeiTools.sleep(1*60*60*1000);
// }
// }
// }
//
// /**
// * @Title: sendTemplateByUserIds
// * @author hero
// * @Description: 发送模版消息
// * @param @param microTouTiao
// * @param @param userList 设定文件
// * @return void 返回类型
// */
// public static void sendTemplateByUserIds(String title,String time, String url) {
//
// Map<String, Object> dataMap = new HashMap<>();
// JSONObject first = new JSONObject();
// first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
// dataMap.put("first", first);
// JSONObject keyword1 = new JSONObject();
// keyword1.put("value", title);
// keyword1.put("color", "#173177");
// dataMap.put("keyword1", keyword1);
// JSONObject keyword2 = new JSONObject();
// keyword2.put("value", "知乎热搜榜");
// keyword2.put("color", "#173177");
// dataMap.put("keyword2", keyword2);
// JSONObject keyword3 = new JSONObject();
// keyword3.put("value", time);
// keyword3.put("color", "#173177");
// dataMap.put("keyword3", keyword3);
// JSONObject remark = new JSONObject();
// remark.put("value", "知微情报监测服务");
// dataMap.put("remark", remark);
//
// List<String> userList = getUserList();
// if(userList!=null && !userList.isEmpty()) {
// for (String openId : userList) {
// Template template = new Template();
// template.setTouser(openId);
// if(url!=null){
// template.setUrl(url);
// }
// template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
// template.setData(dataMap);
//
// JSONObject templateJson = (JSONObject)JSONObject.toJSON(template);
// WechatCodeUtil.sendDataJson(templateJson);
// }
// }else {
// log.info("知乎推送拉取用户列表失败");
// }
//
// }
//
// /**
// * @Title: getUserList
// * @author hero
// * @Description: 用户列表
// * @param @param projectName
// * @param @return 设定文件
// * @return List<String> 返回类型
// */
//// private static List<String> getUserList()
//// {
//// List<String> userList = wechatUserDao.getWechatUserByGroup("LP组");
//// if(userList==null){
//// userList = WechatCodeUtil.getUserListByGroupName("LP组");
//// }
//// return userList;
//// }
//
//}
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
9ce337bb
...
...
@@ -5,7 +5,9 @@ import java.util.Date;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -37,12 +39,13 @@ public class SougoHotSearchRun extends Thread {
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
log
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
sougoHotSearch
:
list
){
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"name"
,
sougoHotSearch
.
getName
());
doc
.
put
(
"url"
,
sougoHotSearch
.
getUrl
());
...
...
@@ -51,6 +54,7 @@ public class SougoHotSearchRun extends Thread {
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getType
());
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
hotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"搜狗微信采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/UpdateWechatUserRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.Calendar
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map.Entry
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
@Log4j2
public
class
UpdateWechatUserRun
extends
Thread
{
private
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
@Override
public
void
run
()
{
log
.
info
(
"开始更新用户数据"
);
while
(
true
)
{
try
{
Calendar
calendar
=
Calendar
.
getInstance
();
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
if
(
hour
>
6
){
Map
<
String
,
Integer
>
groupMap
=
WechatCodeUtil
.
getAllGroupIp
();
log
.
info
(
"此公众号的分组数量为:::{}"
,
groupMap
.
size
());
if
(!
groupMap
.
isEmpty
()
&&
groupMap
!=
null
){
for
(
Entry
<
String
,
Integer
>
group
:
groupMap
.
entrySet
()){
log
.
info
(
"此公众号的分组名称及IP为:::{},{}"
,
group
.
getKey
(),
group
.
getValue
());
List
<
String
>
userList
=
WechatCodeUtil
.
getUserListByGroupId
(
group
.
getValue
());
log
.
info
(
"{},此分组下的用户数量为::{}"
,
group
.
getKey
(),
userList
.
size
());
if
(
userList
!=
null
&&
!
userList
.
isEmpty
()){
wechatUserDao
.
addWechatUser
(
userList
,
group
.
getKey
(),
group
.
getValue
());
}
}
}
}
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
}
catch
(
Exception
e
)
{
log
.
debug
(
"知乎热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
continue
;
}
}
}
}
//package com.zhiwei.searchhotcrawler.timer;
//
//import java.util.Calendar;
//import java.util.List;
//import java.util.Map;
//import java.util.Map.Entry;
//
//import lombok.extern.log4j.Log4j2;
//
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//@Log4j2
//public class UpdateWechatUserRun extends Thread{
// private WechatUserDao wechatUserDao = new WechatUserDao();
// @Override
// public void run() {
// log.info("开始更新用户数据");
// while(true) {
// try {
// Calendar calendar = Calendar.getInstance();
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
// if(hour > 6 ){
// Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp();
// log.info("此公众号的分组数量为:::{}", groupMap.size());
// if(!groupMap.isEmpty() && groupMap!=null){
// for(Entry<String,Integer> group : groupMap.entrySet()){
// log.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue());
// List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue());
// log.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size());
// if(userList!=null && !userList.isEmpty()){
// wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue());
// }
// }
// }
// }
// ZhiWeiTools.sleep(1*60*60*1000);
// } catch (Exception e) {
// log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
// ZhiWeiTools.sleep(1*60*60*1000);
// continue;
// }
// }
// }
//
//
//}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
9ce337bb
...
...
@@ -5,16 +5,13 @@ import java.util.Date;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
org.bson.Document
;
@Log4j2
public
class
WeiboHotSearchRun
extends
Thread
{
...
...
@@ -38,13 +35,13 @@ public class WeiboHotSearchRun extends Thread{
private
void
getHotList
()
{
log
.
info
(
"微博话题采集开始........"
);
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
log
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
weiboHotSearch
:
list
){
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
Document
doc
=
new
Document
();
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
doc
.
put
(
"name"
,
weiboHotSearch
.
getName
());
doc
.
put
(
"url"
,
weiboHotSearch
.
getUrl
());
...
...
@@ -52,11 +49,11 @@ public class WeiboHotSearchRun extends Thread{
doc
.
put
(
"hot"
,
weiboHotSearch
.
getHot
());
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getType
());
doc
.
put
(
"icon"
,
weiboHotSearch
.
getIcon
());
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
View file @
9ce337bb
...
...
@@ -9,11 +9,7 @@ import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import
com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler
;
import
com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
org.bson.Document
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
@Log4j2
...
...
@@ -40,10 +36,10 @@ public class WeiboSuperTopicRun extends Thread{
log
.
info
(
"微博超话采集开始........"
);
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
log
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
WeiboSuperTopic
topic
:
list
){
log
.
info
(
"topic::::{}"
,
topic
);
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
...
...
@@ -32,12 +33,13 @@ public class WeiboTopicRun extends Thread{
private
void
getTopicList
()
{
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"微博话题采集开始........"
);
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
();
log
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
topic
:
list
){
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getName
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
...
...
@@ -50,6 +52,7 @@ public class WeiboTopicRun extends Thread{
doc
.
put
(
"topic_lead"
,
topic
.
getTopicLead
());
doc
.
put
(
"comment_count"
,
topic
.
getCommentCount
());
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
...
...
@@ -36,13 +39,15 @@ public class ZhihuHotSearchRun extends Thread{
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
log
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
Document
>
dataList
=
new
ArrayList
<>();
for
(
HotSearchList
zhihuHotSearch
:
list
){
D
BObject
zhihu
=
new
BasicDBObjec
t
();
D
ocument
zhihu
=
new
Documen
t
();
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
...
...
@@ -50,11 +55,12 @@ public class ZhihuHotSearchRun extends Thread{
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
getHot
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getType
());
hotSearchDAO
.
addHotSearch
(
zhihu
);
dataList
.
add
(
zhihu
);
hotSearchCacheDAO
.
addAndUpdateData
(
zhihu
);
}
hotSearchDAO
.
addHotSearchList
(
dataList
);
log
.
info
(
"知乎话题采集结束........"
);
}
...
...
src/main/resources/db.properties
View file @
9ce337bb
#mongoIp=202.107.192.94
mongoIp
=
192.168.0.101
mongoPort
=
30000
#mongoIp=192.168.0.81
#mongoPort=27017
db.username
=
searchhotcrawleruser
db.paasword
=
searchhotcrawler1q2w3e4r
db.certifiedDB
=
admin
#local service
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@202.107.192.94:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
#local
#mongoUri=mongodb://192.168.0.81:27017/istarshine_data
#service
mongoUri
=
mongodb://istarshineuser:istarshine1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
dbName
=
hot_search_list
searchCollName
=
hot_search_list
searchCacheCollName
=
hot_search_cache
topicCollName
=
topic_list
collWechatUserName
=
wechat_user
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment