Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
9ce337bb
Commit
9ce337bb
authored
Mar 10, 2020
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加总热搜表缓存+mongo修改为多节点读取+微博话题类型错误修复
parent
dd95f27b
Show whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
712 additions
and
794 deletions
+712
-794
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchCache.java
+100
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+1
-6
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
+5
-12
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+9
-8
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+126
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+21
-104
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
+0
-67
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboSuperTopicDAO.java
+17
-47
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
+77
-75
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+7
-6
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
+0
-123
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+122
-125
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+124
-127
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+6
-2
src/main/java/com/zhiwei/searchhotcrawler/timer/UpdateWechatUserRun.java
+47
-50
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+6
-9
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
+3
-7
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
+7
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+9
-3
src/main/resources/db.properties
+8
-8
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchCache.java
0 → 100644
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
lombok.Data
;
import
lombok.ToString
;
import
java.util.Date
;
@ToString
@Data
public
class
HotSearchCache
{
/**
* 主键
*/
private
String
id
;
/**
* 消息链接
*/
private
String
url
;
/**
* 热搜关键词,且为消息主键
*/
private
String
name
;
/**
* 热搜或话题导语
*/
private
String
topicLead
;
/**
* 最高热搜值
*/
private
Integer
highestCount
;
/**
* 最新热搜热度值
*/
private
Integer
lastCount
;
/**
* 状态(true 为热搜; false为时时上升)
*/
private
Boolean
hot
;
/**
* 话题开始时间
*/
private
Date
startTime
;
/**
* 话题结束时间
*/
private
Date
endTime
;
/**
* 最高排名
*/
private
Integer
highestRank
;
/**
* 最新排名
*/
private
Integer
lastRank
;
/**
* 热搜分类
*/
private
String
type
;
/**
* 热搜持续时长
*/
private
Integer
duration
;
public
HotSearchCache
(
String
url
,
String
name
,
String
topicLead
,
Integer
highestCount
,
Integer
lastCount
,
Boolean
hot
,
Date
startTime
,
Date
endTime
,
Integer
highestRank
,
Integer
lastRank
,
String
type
,
Integer
duration
){
this
.
id
=
name
+
"_"
+
type
;
this
.
url
=
url
;
this
.
name
=
name
;
this
.
topicLead
=
topicLead
;
this
.
hot
=
hot
;
this
.
highestCount
=
highestCount
;
this
.
lastCount
=
lastCount
;
this
.
hot
=
hot
;
this
.
startTime
=
startTime
;
this
.
endTime
=
endTime
;
this
.
highestRank
=
highestRank
;
this
.
lastRank
=
lastRank
;
this
.
type
=
type
;
this
.
duration
=
duration
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
View file @
9ce337bb
...
@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
...
@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
private
String
topicLead
;
private
String
topicLead
;
/**
/**
*
时时
热搜量
* 热搜量
*/
*/
private
Integer
count
;
private
Integer
count
;
...
@@ -60,11 +60,6 @@ public class HotSearchList implements Serializable{
...
@@ -60,11 +60,6 @@ public class HotSearchList implements Serializable{
private
Date
time
;
private
Date
time
;
/**
/**
* 据上分钟变化量
*/
private
Integer
changeCount
;
/**
* 排名
* 排名
*/
*/
private
Integer
rank
;
private
Integer
rank
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
→
src/main/java/com/zhiwei/searchhotcrawler/config/
DB
Config.java
View file @
9ce337bb
...
@@ -3,7 +3,7 @@ package com.zhiwei.searchhotcrawler.config;
...
@@ -3,7 +3,7 @@ package com.zhiwei.searchhotcrawler.config;
import
java.io.InputStream
;
import
java.io.InputStream
;
import
java.util.Properties
;
import
java.util.Properties
;
public
class
Config
{
public
class
DB
Config
{
static
{
static
{
Properties
conf
=
null
;
Properties
conf
=
null
;
try
{
try
{
...
@@ -12,29 +12,22 @@ public class Config {
...
@@ -12,29 +12,22 @@ public class Config {
conf
=
new
Properties
();
conf
=
new
Properties
();
conf
.
load
(
is
);
conf
.
load
(
is
);
is
.
close
();
is
.
close
();
mongoIp
=
conf
.
getProperty
(
"mongoIp"
);
mongoUri
=
conf
.
getProperty
(
"mongoUri"
);
mongoPort
=
Integer
.
valueOf
(
conf
.
getProperty
(
"mongoPort"
));
userName
=
conf
.
getProperty
(
"db.username"
);
userPwd
=
conf
.
getProperty
(
"db.paasword"
);
authDB
=
conf
.
getProperty
(
"db.certifiedDB"
);
dbName
=
conf
.
getProperty
(
"dbName"
);
dbName
=
conf
.
getProperty
(
"dbName"
);
searchCollName
=
conf
.
getProperty
(
"searchCollName"
);
searchCollName
=
conf
.
getProperty
(
"searchCollName"
);
searchCacheCollName
=
conf
.
getProperty
(
"searchCacheCollName"
);
topicCollName
=
conf
.
getProperty
(
"topicCollName"
);
topicCollName
=
conf
.
getProperty
(
"topicCollName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
}
}
public
static
String
mongoIp
;
public
static
String
mongoUri
;
public
static
int
mongoPort
;
public
static
String
userName
;
public
static
String
userPwd
;
public
static
String
authDB
;
public
static
String
dbName
;
public
static
String
dbName
;
public
static
String
searchCollName
;
public
static
String
searchCollName
;
public
static
String
searchCacheCollName
;
public
static
String
topicCollName
;
public
static
String
topicCollName
;
public
static
String
collWechatUserName
;
public
static
String
collWechatUserName
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.*
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
...
@@ -107,6 +103,8 @@ public class WeiboHotSearchCrawler {
...
@@ -107,6 +103,8 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型
* @return void 返回类型
*/
*/
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
for
(
int
count
=
0
;
count
<=
5
;
count
++){
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
...
@@ -123,9 +121,10 @@ public class WeiboHotSearchCrawler {
...
@@ -123,9 +121,10 @@ public class WeiboHotSearchCrawler {
try
{
try
{
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONArray
cardGroup
=
card
.
getJSONArray
(
"card_group"
);
JSONArray
cardGroup
=
card
.
getJSONArray
(
"card_group"
);
if
(
Objects
.
nonNull
(
cardGroup
)
&&
!
cardGroup
.
isEmpty
()){
String
title
=
card
.
getString
(
"title"
);
String
title
=
card
.
getString
(
"title"
);
boolean
hot
=
true
;
boolean
hot
=
true
;
if
(
title
.
contains
(
"实时上升热点"
)){
if
(
Objects
.
nonNull
(
title
)
&&
title
.
contains
(
"实时上升热点"
)){
hot
=
false
;
hot
=
false
;
rank
=
50
;
rank
=
50
;
}
}
...
@@ -142,6 +141,9 @@ public class WeiboHotSearchCrawler {
...
@@ -142,6 +141,9 @@ public class WeiboHotSearchCrawler {
result
.
add
(
hotSearch
);
result
.
add
(
hotSearch
);
rank
++;
rank
++;
}
}
}
else
{
log
.
info
(
"card 数据结构为:{}"
,
card
);
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
log
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
continue
;
continue
;
...
@@ -150,14 +152,13 @@ public class WeiboHotSearchCrawler {
...
@@ -150,14 +152,13 @@ public class WeiboHotSearchCrawler {
return
result
;
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
);
log
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
);
return
Collections
.
emptyList
();
}
}
}
else
{
}
else
{
log
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
log
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
}
}
}
catch
(
IOException
e1
)
{
}
catch
(
IOException
e1
)
{
log
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
);
log
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
);
return
Collections
.
emptyList
();
}
}
}
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
9ce337bb
...
@@ -61,7 +61,7 @@ public class WeiboSuperTopicCrawler {
...
@@ -61,7 +61,7 @@ public class WeiboSuperTopicCrawler {
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
3
;
retryTimes
++)
{
try
{
try
{
System
.
out
.
println
(
"pageUrl=========="
+
pageUrl
);
//
System.out.println("pageUrl=========="+pageUrl);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
,
headMap
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
,
headMap
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"desc1"
))
{
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"desc1"
))
{
topicList
.
addAll
(
parseTopicRankHtml
(
page
,
htmlBody
,
type
));
topicList
.
addAll
(
parseTopicRankHtml
(
page
,
htmlBody
,
type
));
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
9ce337bb
...
@@ -134,7 +134,7 @@ public class WeiboTopicCrawler {
...
@@ -134,7 +134,7 @@ public class WeiboTopicCrawler {
//重试三次
//重试三次
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
for
(
int
retryTimes
=
1
;
retryTimes
<=
5
;
retryTimes
++)
{
try
{
try
{
log
.
info
(
"pageUrl::{}"
,
pageUrl
);
//
log.info("pageUrl::{}", pageUrl);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
pageUrl
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"top_mark_text"
))
{
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"top_mark_text"
))
{
topicList
.
addAll
(
parseTopicHtml
(
htmlBody
));
topicList
.
addAll
(
parseTopicHtml
(
htmlBody
));
...
@@ -202,7 +202,7 @@ public class WeiboTopicCrawler {
...
@@ -202,7 +202,7 @@ public class WeiboTopicCrawler {
}
}
return
topicList
;
return
topicList
;
}
else
{
}
else
{
log
.
info
(
"html:{}"
,
htmlBody
);
//
log.info("html:{}",htmlBody);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析榜单列表页面时出现错误,错误为:{}"
,
e
);
log
.
error
(
"解析榜单列表页面时出现错误,错误为:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
0 → 100644
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.client.MongoCollection
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Objects
;
/**
* 热搜基础结果表
*/
@Log4j2
public
class
HotSearchCacheDAO
{
private
static
MongoCollection
collection
=
MongoDBTemplate
.
getCollection
(
DBConfig
.
dbName
,
DBConfig
.
searchCacheCollName
);
/**
* 添加及更新相应数据表中的数据
* @param document
*/
public
void
addAndUpdateData
(
Document
document
){
try
{
String
name
=
document
.
getString
(
"name"
);
String
type
=
document
.
getString
(
"type"
);
int
lastRank
=
document
.
getInteger
(
"rank"
)!=
null
?
document
.
getInteger
(
"rank"
):
-
1
;
int
lastCount
=
document
.
getInteger
(
"count"
)!=
null
?
document
.
getInteger
(
"count"
):
-
1
;
Date
startTime
=
document
.
getDate
(
"time"
);
Date
endTime
=
new
Date
(
startTime
.
getTime
()
+
(
60
*
1000
));
String
topicLead
=
document
.
getString
(
"topic_lead"
)!=
null
?
document
.
getString
(
"topic_lead"
):
null
;
boolean
hot
=
document
.
getBoolean
(
"hot"
)!=
null
?
document
.
getBoolean
(
"hot"
):
true
;
String
url
=
document
.
getString
(
"url"
)!=
null
?
document
.
getString
(
"url"
):
null
;
String
id
=
name
+
"_"
+
type
;
Document
query
=
new
Document
(
"_id"
,
id
);
Document
nowDoc
=
(
Document
)
collection
.
find
(
query
).
first
();
if
(
Objects
.
nonNull
(
nowDoc
))
{
int
highestRank
=
nowDoc
.
getInteger
(
"highestRank"
);
int
highestCount
=
nowDoc
.
getInteger
(
"highestCount"
);
//判断最大热度值
if
(
lastCount
>
highestCount
)
{
highestCount
=
lastCount
;
}
//判断最高排名
if
(
lastRank
<
highestRank
)
{
highestRank
=
lastRank
;
}
//计算热搜时长
int
duration
=
nowDoc
.
getInteger
(
"duration"
);
int
durationNow
=
getDuration
(
type
,
duration
);
endTime
=
new
Date
(
new
Date
().
getTime
()
+
(
60
*
1000
));
//更新相应信息
nowDoc
.
put
(
"endTime"
,
endTime
);
nowDoc
.
put
(
"lastRank"
,
lastRank
);
nowDoc
.
put
(
"lastCount"
,
lastCount
);
nowDoc
.
put
(
"highestRank"
,
highestRank
);
nowDoc
.
put
(
"highestCount"
,
highestCount
);
nowDoc
.
put
(
"duration"
,
durationNow
);
collection
.
replaceOne
(
query
,
nowDoc
);
}
else
{
nowDoc
=
new
Document
();
int
durationNow
=
getDuration
(
type
,
0
);
nowDoc
.
put
(
"_id"
,
id
);
nowDoc
.
put
(
"url"
,
url
);
nowDoc
.
put
(
"name"
,
name
);
nowDoc
.
put
(
"hot"
,
hot
);
nowDoc
.
put
(
"topicLead"
,
topicLead
);
nowDoc
.
put
(
"type"
,
type
);
nowDoc
.
put
(
"lastRank"
,
lastRank
);
nowDoc
.
put
(
"highestRank"
,
lastRank
);
nowDoc
.
put
(
"lastCount"
,
lastCount
);
nowDoc
.
put
(
"highestCount"
,
lastCount
);
nowDoc
.
put
(
"startTime"
,
startTime
);
nowDoc
.
put
(
"endTime"
,
endTime
);
nowDoc
.
put
(
"duration"
,
durationNow
);
collection
.
insertOne
(
nowDoc
);
}
}
catch
(
Exception
e
){
log
.
info
(
"数据存储时出错:{}"
,
e
);
}
}
/**
* 计算热搜时长
* @param type
* @param duration
* @return
*/
private
int
getDuration
(
String
type
,
int
duration
){
switch
(
type
){
case
"微博热搜"
:
duration
=
duration
+
1
;
break
;
case
"百度热搜"
:
duration
=
duration
+
5
;
break
;
case
"知乎热搜"
:
duration
=
duration
+
10
;
break
;
case
"抖音热搜"
:
duration
=
duration
+
10
;
break
;
case
"搜狗微信热搜"
:
duration
=
duration
+
5
;
break
;
case
"微博话题"
:
duration
=
duration
+
3
;
break
;
default
:
duration
=
duration
+
1
;
}
return
duration
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
View file @
9ce337bb
...
@@ -6,7 +6,12 @@ import java.util.Date;
...
@@ -6,7 +6,12 @@ import java.util.Date;
import
java.util.List
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.Objects
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.mongodb.client.ListIndexesIterable
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.mongodb.client.model.IndexOptions
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
...
@@ -14,138 +19,50 @@ import com.mongodb.DBCursor;
...
@@ -14,138 +19,50 @@ import com.mongodb.DBCursor;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.cache.CacheManager
;
import
com.zhiwei.searchhotcrawler.cache.CacheManager
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
org.bson.BsonDocument
;
import
org.bson.Document
;
import
org.bson.conversions.Bson
;
@Log4j2
@Log4j2
public
class
HotSearchListDAO
extends
MongoDBTemplate
{
public
class
HotSearchListDAO
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
HotSearchListDAO
()
{
public
HotSearchListDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
year
=
time
.
substring
(
0
,
4
);
String
year
=
time
.
substring
(
0
,
4
);
String
month
=
time
.
substring
(
5
,
7
);
String
month
=
time
.
substring
(
5
,
7
);
String
collName
=
Config
.
searchCollName
+
year
+
"_"
+
month
;
String
collName
=
DB
Config
.
searchCollName
+
year
+
"_"
+
month
;
super
.
setCollName
(
collName
);
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
//给数据表创建索引
createIndex
(
);
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
}
/**
* 初次创建表及创建相应的索引
*/
private
void
createIndex
(){
List
<
DBObject
>
indexList
=
this
.
getReadColl
().
getIndexInfo
();
if
(
Objects
.
isNull
(
indexList
)
&&
indexList
.
isEmpty
()){
DBObject
countIndexDoc
=
new
BasicDBObject
();
countIndexDoc
.
put
(
"count"
,
-
1
);
DBObject
timeIndexDoc
=
new
BasicDBObject
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
DBObject
rankIndexDoc
=
new
BasicDBObject
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
DBObject
nameIndexDoc
=
new
BasicDBObject
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
DBObject
typeIndexDoc
=
new
BasicDBObject
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
super
.
getReadColl
().
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"count_desc"
));
super
.
getReadColl
().
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
super
.
getReadColl
().
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
super
.
getReadColl
().
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
super
.
getReadColl
().
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
/**
/**
* 添加数据入库
* 添加数据入库
* @param list
* @param list
*/
*/
public
void
addHotSearchList
(
List
<
DBObject
>
list
){
public
void
addHotSearchList
(
List
<
Document
>
list
){
try
{
this
.
getReadColl
().
insert
(
list
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
public
void
addHotSearch
(
DBObject
doc
){
try
{
this
.
getReadColl
().
insert
(
doc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
/**
* 查询据上次变化量
* @Title: getChangeCount
* @author hero
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
HotSearchList
weiboHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"name"
,
weiboHotSearch
.
getName
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
try
{
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
mongoCollection
.
insertMany
(
list
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
if
(
doc
.
get
(
"count"
)!=
null
)
{
result
=
weiboHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
break
;
}
}
cur
.
close
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
return
result
;
}
}
return
result
;
}
}
public
void
addHotSearch
(
Document
doc
){
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public
List
<
DBObject
>
getHotOneHour
(
String
type
){
List
<
DBObject
>
list
=
new
ArrayList
<>();
Date
date
=
new
Date
((
new
Date
().
getTime
()-
60
*
60
*
1000
));
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"time"
,
new
BasicDBObject
(
"$gte"
,
date
));
query
.
put
(
"changeCount"
,
0
);
query
.
put
(
"type"
,
type
);
try
{
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
);
mongoCollection
.
insertOne
(
doc
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
String
name
=
doc
.
get
(
"name"
).
toString
();
if
(
CacheManager
.
getCacheByKey
(
name
)==
null
){
CacheManager
.
putCache
(
name
,
doc
,
48
*
60
*
60
*
1000
);
list
.
add
(
doc
);
}
}
cur
.
close
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
return
list
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
deleted
100644 → 0
View file @
dd95f27b
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.Collections
;
import
java.util.List
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
@Log4j2
public
class
WechatUserDao
extends
MongoDBTemplate
{
public
WechatUserDao
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
super
.
setCollName
(
Config
.
collWechatUserName
);
}
/**
* 添加分组用户
* @param userlist
* @param groupName
* @param groupId
*/
public
void
addWechatUser
(
List
<
String
>
userlist
,
String
groupName
,
Integer
groupId
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
groupId
+
"-"
+
groupName
);
doc
.
put
(
"groupId"
,
groupId
);
doc
.
put
(
"groupName"
,
groupName
);
doc
.
put
(
"user"
,
userlist
);
this
.
getReadColl
().
save
(
doc
);
break
;
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
/**
* 根据分组名称查询分组用户
* @param group
* @return
*/
@SuppressWarnings
(
"unchecked"
)
public
List
<
String
>
getWechatUserByGroup
(
String
group
){
try
{
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"groupName"
,
group
);
DBObject
doc
=
this
.
getReadColl
().
findOne
(
query
);
if
(
doc
!=
null
){
return
(
List
<
String
>)
doc
.
get
(
"user"
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
return
Collections
.
emptyList
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboSuperTopicDAO.java
View file @
9ce337bb
...
@@ -3,76 +3,46 @@ package com.zhiwei.searchhotcrawler.dao;
...
@@ -3,76 +3,46 @@ package com.zhiwei.searchhotcrawler.dao;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Objects
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
lombok.extern.log4j.Log4j2
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
org.bson.Document
;
@Log4j2
@Log4j2
public
class
WeiboSuperTopicDAO
extends
MongoDBTemplate
{
public
class
WeiboSuperTopicDAO
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
WeiboSuperTopicDAO
()
{
public
WeiboSuperTopicDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
year
=
time
.
substring
(
0
,
4
);
String
year
=
time
.
substring
(
0
,
4
);
String
month
=
time
.
substring
(
5
,
7
);
String
month
=
time
.
substring
(
5
,
7
);
String
collName
=
Config
.
topic
CollName
+
year
+
"_"
+
month
;
String
collName
=
DBConfig
.
search
CollName
+
year
+
"_"
+
month
;
super
.
setCollName
(
collName
);
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
createIndex
();
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
}
/**
* 初次创建表及创建相应的索引
*/
private
void
createIndex
(){
List
<
DBObject
>
indexList
=
this
.
getReadColl
().
getIndexInfo
();
if
(
Objects
.
isNull
(
indexList
)
&&
indexList
.
isEmpty
()){
DBObject
countIndexDoc
=
new
BasicDBObject
();
countIndexDoc
.
put
(
"score_num"
,
-
1
);
DBObject
timeIndexDoc
=
new
BasicDBObject
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
DBObject
rankIndexDoc
=
new
BasicDBObject
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
DBObject
nameIndexDoc
=
new
BasicDBObject
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
DBObject
typeIndexDoc
=
new
BasicDBObject
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
super
.
getReadColl
().
createIndex
(
countIndexDoc
,
new
BasicDBObject
(
"name"
,
"score_desc"
));
super
.
getReadColl
().
createIndex
(
timeIndexDoc
,
new
BasicDBObject
(
"name"
,
"time_desc"
));
super
.
getReadColl
().
createIndex
(
rankIndexDoc
,
new
BasicDBObject
(
"name"
,
"rank_desc"
));
super
.
getReadColl
().
createIndex
(
nameIndexDoc
,
new
BasicDBObject
(
"name"
,
"name_desc"
));
super
.
getReadColl
().
createIndex
(
typeIndexDoc
,
new
BasicDBObject
(
"name"
,
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
/**
/**
* 添加数据入库
* 添加数据入库
* @param list
* @param list
*/
*/
public
void
addTopicList
(
List
<
D
BObjec
t
>
list
){
public
void
addTopicList
(
List
<
D
ocumen
t
>
list
){
try
{
try
{
this
.
getReadColl
().
insert
(
list
);
mongoCollection
.
insertMany
(
list
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
}
public
void
addTopic
(
D
BObjec
t
doc
){
public
void
addTopic
(
D
ocumen
t
doc
){
try
{
try
{
this
.
getReadColl
().
insert
(
doc
);
mongoCollection
.
insertOne
(
doc
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
dbtemplate
;
package
com
.
zhiwei
.
searchhotcrawler
.
dbtemplate
;
import
java.util.Arrays
;
import
com.mongodb.DB
;
import
com.mongodb.DBCollection
;
import
com.mongodb.Mongo
;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.MongoClientOptions
;
import
com.mongodb.MongoException
;
import
com.mongodb.MongoClientURI
;
import
com.mongodb.ServerAddress
;
import
com.mongodb.WriteConcern
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.mongodb.client.ListIndexesIterable
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.mongodb.client.model.IndexOptions
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.Objects
;
@Log4j2
public
enum
MongoDBTemplate
{
instance
;
private
MongoClient
mongoClient
;
static
{
MongoClientOptions
options
=
new
MongoClientOptions
.
Builder
()
.
connectionsPerHost
(
300
)
//连接池设置为300个连接,默认为100
.
connectTimeout
(
15000
)
//连接超时,推荐>3000毫秒
.
maxWaitTime
(
5000
)
.
socketTimeout
(
0
)
// 套接字超时时间,0无限制
.
threadsAllowedToBlockForConnectionMultiplier
(
5000
)
// 线程队列数,如果连接线程排满了队列就会抛出“Out of semaphores to get db”错误。
.
writeConcern
(
WriteConcern
.
W1
)
//
.
build
();
log
.
info
(
"MongoDBTemplate.static initializer : {}"
,
DBConfig
.
mongoUri
);
MongoClientURI
mongoClientURI
=
new
MongoClientURI
(
DBConfig
.
mongoUri
);
instance
.
mongoClient
=
new
MongoClient
(
mongoClientURI
);
}
/**
/**
* 获取DB实例 - 指定DB
*
*
* @Description: MongoDB模板类
* @param databaseName
* @author Tou Tang
* @return
* @date 2014-11-14 下午3:24:40
*/
*/
public
class
MongoDBTemplate
{
public
static
MongoDatabase
getDB
(
String
databaseName
)
{
protected
static
Mongo
reader
;
return
instance
.
mongoClient
.
getDatabase
(
databaseName
);
protected
static
Mongo
writer
;
protected
String
collName
;
protected
String
dbName
;
@SuppressWarnings
(
"deprecation"
)
public
MongoDBTemplate
()
{
try
{
MongoCredential
credential
=
MongoCredential
.
createCredential
(
Config
.
userName
,
Config
.
authDB
,
Config
.
userPwd
.
toCharArray
());
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
if
(
reader
==
null
)
{
reader
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// reader = new MongoClient(address);
}
if
(
writer
==
null
)
{
writer
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// writer = new MongoClient(address);
}
}
catch
(
MongoException
e
)
{
e
.
printStackTrace
();
}
}
public
DBCollection
getReadColl
()
{
@SuppressWarnings
(
"deprecation"
)
final
DB
db
=
getReader
().
getDB
(
dbName
);
final
DBCollection
coll
=
db
.
getCollection
(
collName
);
return
coll
;
}
protected
Mongo
getReader
()
{
return
reader
;
}
}
public
DBCollection
getWriteColl
()
{
/**
@SuppressWarnings
(
"deprecation"
)
* 获取collection对象 - 指定Collection
final
DB
db
=
getWriter
().
getDB
(
dbName
);
*
final
DBCollection
coll
=
db
.
getCollection
(
collName
);
* @param databaseName
return
coll
;
* @param collectionName
* @return
}
*/
public
static
MongoCollection
<
Document
>
getCollection
(
String
databaseName
,
String
collectionName
)
{
protected
Mongo
getWriter
()
{
MongoDatabase
db
=
instance
.
mongoClient
.
getDatabase
(
databaseName
);
return
writer
;
return
db
.
getCollection
(
collectionName
)
;
}
}
protected
void
setCollName
(
final
String
collName
)
{
this
.
collName
=
collName
;
}
protected
void
setDbName
(
final
String
dbName
)
{
/**
this
.
dbName
=
dbName
;
* 创建索引
}
* @param databaseName
* @param collectionName
@SuppressWarnings
(
"static-access"
)
*/
protected
void
setReader
(
final
Mongo
reader
)
{
public
static
void
createIndex
(
String
databaseName
,
String
collectionName
){
this
.
reader
=
reader
;
MongoDatabase
db
=
instance
.
mongoClient
.
getDatabase
(
databaseName
);
MongoCollection
mongoCollection
=
db
.
getCollection
(
collectionName
);
ListIndexesIterable
<
Document
>
indexList
=
mongoCollection
.
listIndexes
();
if
(
Objects
.
isNull
(
indexList
)){
Document
countIndexDoc
=
new
Document
();
countIndexDoc
.
put
(
"score_num"
,
-
1
);
Document
timeIndexDoc
=
new
Document
();
timeIndexDoc
.
put
(
"time"
,
-
1
);
Document
rankIndexDoc
=
new
Document
();
rankIndexDoc
.
put
(
"rank"
,
-
1
);
Document
nameIndexDoc
=
new
Document
();
nameIndexDoc
.
put
(
"name"
,
-
1
);
Document
typeIndexDoc
=
new
Document
();
typeIndexDoc
.
put
(
"type"
,
-
1
);
try
{
mongoCollection
.
createIndex
(
countIndexDoc
,
new
IndexOptions
().
name
(
"count_desc"
));
mongoCollection
.
createIndex
(
timeIndexDoc
,
new
IndexOptions
().
name
(
"time_desc"
));
mongoCollection
.
createIndex
(
rankIndexDoc
,
new
IndexOptions
().
name
(
"rank_desc"
));
mongoCollection
.
createIndex
(
nameIndexDoc
,
new
IndexOptions
().
name
(
"name_desc"
));
mongoCollection
.
createIndex
(
typeIndexDoc
,
new
IndexOptions
().
name
(
"type_desc"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
@SuppressWarnings
(
"static-access"
)
protected
void
setWriter
(
final
Mongo
writer
)
{
this
.
writer
=
writer
;
}
}
public
static
void
main
(
String
[]
args
)
{
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
9ce337bb
...
@@ -20,9 +20,12 @@ public class HotSearchRun {
...
@@ -20,9 +20,12 @@ public class HotSearchRun {
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
ProxyFactory
.
init
(
simpleConfig
);
ProxyFactory
.
init
(
simpleConfig
);
new
UpdateWechatUserRun
().
start
();
// new UpdateWechatUserRun().start();
ZhiWeiTools
.
sleep
(
10000
);
// ZhiWeiTools.sleep(10000);
new
CacheListener
().
startListen
();
// new CacheListener().startListen();
//推送程序启动
// new SendWeiboHotSearchRun().start();
// new SendZhihuHotSearchRun().start();
// ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
// ScheduledExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(6);
//
//
...
@@ -47,8 +50,6 @@ public class HotSearchRun {
...
@@ -47,8 +50,6 @@ public class HotSearchRun {
new
ZhihuHotSearchRun
().
start
();
new
ZhihuHotSearchRun
().
start
();
new
WeiboSuperTopicRun
().
start
();
new
WeiboSuperTopicRun
().
start
();
new
WeiboTopicRun
().
start
();
new
WeiboTopicRun
().
start
();
//推送程序启动
new
SendWeiboHotSearchRun
().
start
();
new
SendZhihuHotSearchRun
().
start
();
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
deleted
100644 → 0
View file @
dd95f27b
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
public
class
HotSearchListTest
{
public
static
void
main
(
String
[]
args
)
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"zzw"
).
build
();
ProxyFactory
.
init
(
simpleConfig
);
String
url
=
"http://app.myzaker.com/news/app.php?f="
;
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
Elements
elements
=
Jsoup
.
parse
(
htmlBody
).
select
(
"div.titlebar>a"
);
for
(
Element
element
:
elements
){
String
lableUrl
=
"http://app.myzaker.com/news/app.php"
+
element
.
attr
(
"href"
);
System
.
out
.
println
(
"lableUrl========="
+
lableUrl
);
String
htmlBodyLable
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
lableUrl
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
Elements
elementsLable
=
Jsoup
.
parse
(
htmlBodyLable
).
select
(
"div#infinite_scroll>a"
);
for
(
Element
elementLable
:
elementsLable
){
System
.
out
.
println
(
elementLable
.
attr
(
"href"
)
+
"============="
+
elementLable
.
text
());
}
}
}
catch
(
Exception
e
){
e
.
printStackTrace
();
}
// MongoCredential credential = MongoCredential.createCredential(Config.userName, Config.authDB, Config.userPwd.toCharArray());
// ServerAddress address = new ServerAddress(Config.mongoIp, Config.mongoPort);
// Mongo mongo = new MongoClient(address, Arrays.asList(credential));
//
// DB db = mongo.getDB("hot_search_list");
// DBCollection coll = db.getCollection("hot_search_list2019_09");
//
//// MongoCredential credentialNew = MongoCredential.createCredential("datapush", "admin", "4d8ce5c42073c".toCharArray());
//// ServerAddress addressNew = new ServerAddress(Config.mongoIp, Config.mongoPort);
//// Mongo mongoNew = new MongoClient(address, Arrays.asList(credentialNew));
//// DB dbNew = mongoNew.getDB("hot_search_list");
//
// Map<String,String> timLine = TimeParse.getTimeMap("2019-10-01 00:00:00", "2019-10-09 23:59:59", "dd", 1);
//
// timLine.forEach((start, end) ->{
//
// String year = end.substring(0,4);
// String month = end.substring(5,7);
// Date startDate = TimeParse.stringFormartDate(start);
// Date endDate = TimeParse.stringFormartDate(end);
//
// String collName = "hot_search_list"+year+"_"+month;
// System.out.println("collName=========="+collName);
//// DBCollection collNew = dbNew.getCollection(collName);
//// DBObject countIndexDoc = new BasicDBObject();
//// countIndexDoc.put("count", -1);
//// DBObject timeIndexDoc = new BasicDBObject();
//// timeIndexDoc.put("time", -1);
//// DBObject rankIndexDoc = new BasicDBObject();
//// rankIndexDoc.put("rank", -1);
//// DBObject nameIndexDoc = new BasicDBObject();
//// nameIndexDoc.put("name", -1);
//// DBObject typeIndexDoc = new BasicDBObject();
//// typeIndexDoc.put("type", -1);
//// try {
//// collNew.createIndex(countIndexDoc, new BasicDBObject("name", "count_desc"));
//// collNew.createIndex(timeIndexDoc, new BasicDBObject("name", "time_desc"));
//// collNew.createIndex(rankIndexDoc, new BasicDBObject("name", "rank_desc"));
//// collNew.createIndex(nameIndexDoc, new BasicDBObject("name", "name_desc"));
//// collNew.createIndex(typeIndexDoc, new BasicDBObject("name", "type_desc"));
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//
// DBObject query = new BasicDBObject(new BasicDBObject("time",
// new BasicDBObject("$gte",startDate).append("$lte", endDate)));
// System.out.println(query);
// WriteResult wr = coll.remove(query);
// System.out.println("========"+wr.getN());
//// int i = 0;
//// DBCursor cur = coll.remove(query);
//// System.out.println(query +"======="+ cur.count());
//// List<DBObject> dataList = new ArrayList<>();
//// while(cur.hasNext()) {
//// DBObject doc = cur.next();
//// try {
////// collNew.save(doc);
//// i++;
//// coll.remove(doc);
//// } catch (Exception e2) {
//// e2.printStackTrace();
//// }
//// dataList.add(doc);
//// }
//// System.out.println(collName +"数据量大小" +dataList.size());
//// cur.close();
//// if(!dataList.isEmpty()) {
//// try {
//// collNew.insert(dataList);
//// } catch (Exception e) {
//// e.printStackTrace();
//// }
//// }
// });
// mongo.close();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
9ce337bb
...
@@ -6,7 +6,9 @@ import java.util.List;
...
@@ -6,7 +6,9 @@ import java.util.List;
import
java.util.Objects
;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -39,23 +41,24 @@ public class BaiduHotSearchRun extends Thread{
...
@@ -39,23 +41,24 @@ public class BaiduHotSearchRun extends Thread{
private
void
getHotList
()
{
private
void
getHotList
()
{
log
.
info
(
"百度风云榜采集开始........"
);
log
.
info
(
"百度风云榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
saveDataList
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
saveDataList
=
new
ArrayList
<>();
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
list
.
forEach
(
baiduHotSearch
->{
list
.
forEach
(
baiduHotSearch
->{
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
baiduHotSearch
);
Document
doc
=
new
Document
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"name"
,
baiduHotSearch
.
getName
());
doc
.
put
(
"name"
,
baiduHotSearch
.
getName
());
doc
.
put
(
"url"
,
baiduHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
baiduHotSearch
.
getUrl
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getType
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getType
());
saveDataList
.
add
(
doc
);
saveDataList
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
});
});
}
}
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
9ce337bb
...
@@ -5,7 +5,9 @@ import java.util.Date;
...
@@ -5,7 +5,9 @@ import java.util.Date;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -42,23 +44,24 @@ public class DouyinHotSearchRun extends Thread{
...
@@ -42,23 +44,24 @@ public class DouyinHotSearchRun extends Thread{
private
void
getHotList
()
{
private
void
getHotList
()
{
log
.
info
(
"抖音热搜榜采集开始........"
);
log
.
info
(
"抖音热搜榜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
log
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
douyinHotSearch
:
list
){
for
(
HotSearchList
douyinHotSearch
:
list
){
int
changeCount
=
hotSearchDAO
.
getChangeCount
(
douyinHotSearch
);
Document
douyin
=
new
Document
();
DBObject
douyin
=
new
BasicDBObject
();
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"hot"
,
douyinHotSearch
.
getHot
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"type"
,
douyinHotSearch
.
getType
());
douyin
.
put
(
"type"
,
douyinHotSearch
.
getType
());
data
.
add
(
douyin
);
data
.
add
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
hotSearchCacheDAO
.
addAndUpdateData
(
douyin
);
}
}
log
.
info
(
"抖音热搜榜采集结束........"
);
log
.
info
(
"抖音热搜榜采集结束........"
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
//package com.zhiwei.searchhotcrawler.timer;
//
import
java.util.Calendar
;
//import java.util.Calendar;
import
java.util.Date
;
//import java.util.Date;
import
java.util.HashMap
;
//import java.util.HashMap;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
lombok.extern.log4j.Log4j2
;
//import lombok.extern.log4j.Log4j2;
import
org.slf4j.Logger
;
//
import
org.slf4j.LoggerFactory
;
//import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
import
com.alibaba.fastjson.JSONObject
;
//import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import
com.mongodb.DBObject
;
//import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
//import com.zhiwei.searchhotcrawler.util.Template;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
//import com.zhiwei.searchhotcrawler.util.WechatConstant;
import
com.zhiwei.searchhotcrawler.util.Template
;
//import com.zhiwei.tools.timeparse.TimeParse;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
//
import
com.zhiwei.tools.timeparse.TimeParse
;
//@Log4j2
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//public class SendWeiboHotSearchRun extends Thread {
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Log4j2
// private static WechatUserDao wechatUserDao = new WechatUserDao();
public
class
SendWeiboHotSearchRun
extends
Thread
{
// @Override
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
// public void run() {
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
// while (true) {
@Override
// try {
public
void
run
()
{
// Calendar calendar = Calendar.getInstance();
while
(
true
)
{
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
try
{
// log.info("微博推送,当前系统时间为:" + hour);
Calendar
calendar
=
Calendar
.
getInstance
();
// if (hour > 6 && hour < 23) {
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
// List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.微博热搜.name());
log
.
info
(
"微博推送,当前系统时间为:"
+
hour
);
// if (list != null && !list.isEmpty()) {
if
(
hour
>
6
&&
hour
<
23
)
{
// for (DBObject weibo : list) {
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
微博热搜
.
name
());
// String title = weibo.get("name").toString();
if
(
list
!=
null
&&
!
list
.
isEmpty
())
{
// String time = TimeParse.dateFormartString((Date) weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
for
(
DBObject
weibo
:
list
)
{
// String url = weibo.get("url").toString();
String
title
=
weibo
.
get
(
"name"
).
toString
();
// sendTemplateByUserIds(title, time, url);
String
time
=
TimeParse
.
dateFormartString
((
Date
)
weibo
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
// }
String
url
=
weibo
.
get
(
"url"
).
toString
();
// } else {
sendTemplateByUserIds
(
title
,
time
,
url
);
// log.info("微博最近一小时无数据");
}
// sendTemplateByUserIds("最近一小时无数据",
}
else
{
// TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
log
.
info
(
"微博最近一小时无数据"
);
// }
sendTemplateByUserIds
(
"最近一小时无数据"
,
// }
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd HH:mm:ss"
),
null
);
// ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
}
// } catch (Exception e) {
}
// log.debug("微博热搜推送出现问题,问题为:::{}", e.fillInStackTrace());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// ZhiWeiTools.sleep(1 * 60 * 60 * 1000);
}
catch
(
Exception
e
)
{
// continue;
log
.
debug
(
"微博热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
// }
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// }
continue
;
// }
}
//
}
// /**
}
// * @Title: sendTemplateByUserIds
// * @author hero
/**
// * @Description: 发送模版消息
* @Title: sendTemplateByUserIds
// * @param @param
* @author hero
// * microTouTiao
* @Description: 发送模版消息
// * @param @param
* @param @param
// * userList 设定文件
* microTouTiao
// * @return void 返回类型
* @param @param
// */
* userList 设定文件
// public static void sendTemplateByUserIds(String title, String time, String url) {
* @return void 返回类型
// Map<String, Object> dataMap = new HashMap<String, Object>();
*/
// JSONObject first = new JSONObject();
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
// first.put("value", "您好,有一条来自微博热搜榜的预警通知。");
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
// dataMap.put("first", first);
JSONObject
first
=
new
JSONObject
();
// JSONObject keyword1 = new JSONObject();
first
.
put
(
"value"
,
"您好,有一条来自微博热搜榜的预警通知。"
);
// keyword1.put("value", title);
dataMap
.
put
(
"first"
,
first
);
// keyword1.put("color", "#173177");
JSONObject
keyword1
=
new
JSONObject
();
// dataMap.put("keyword1", keyword1);
keyword1
.
put
(
"value"
,
title
);
// JSONObject keyword2 = new JSONObject();
keyword1
.
put
(
"color"
,
"#173177"
);
// keyword2.put("value", "微博热搜榜");
dataMap
.
put
(
"keyword1"
,
keyword1
);
// keyword2.put("color", "#173177");
JSONObject
keyword2
=
new
JSONObject
();
// dataMap.put("keyword2", keyword2);
keyword2
.
put
(
"value"
,
"微博热搜榜"
);
// JSONObject keyword3 = new JSONObject();
keyword2
.
put
(
"color"
,
"#173177"
);
// keyword3.put("value", time);
dataMap
.
put
(
"keyword2"
,
keyword2
);
// keyword3.put("color", "#173177");
JSONObject
keyword3
=
new
JSONObject
();
// dataMap.put("keyword3", keyword3);
keyword3
.
put
(
"value"
,
time
);
// JSONObject remark = new JSONObject();
keyword3
.
put
(
"color"
,
"#173177"
);
// remark.put("value", "知微情报监测服务");
dataMap
.
put
(
"keyword3"
,
keyword3
);
// dataMap.put("remark", remark);
JSONObject
remark
=
new
JSONObject
();
// List<String> userList = getUserList();
remark
.
put
(
"value"
,
"知微情报监测服务"
);
// if (userList != null && userList.size() > 0) {
dataMap
.
put
(
"remark"
,
remark
);
// for (String openId : userList) {
List
<
String
>
userList
=
getUserList
();
// Template template = new Template();
if
(
userList
!=
null
&&
userList
.
size
()
>
0
)
{
// template.setTouser(openId);
for
(
String
openId
:
userList
)
{
// if (url != null) {
Template
template
=
new
Template
();
// template.setUrl(url);
template
.
setTouser
(
openId
);
// }
if
(
url
!=
null
)
{
// template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template
.
setUrl
(
url
);
// template.setData(dataMap);
}
//
template
.
setTemplate_id
(
WechatConstant
.
WECHAT_TEMPLATEID_EARLY_IT
);
// JSONObject templateJson = (JSONObject) JSONObject.toJSON(template);
template
.
setData
(
dataMap
);
// WechatCodeUtil.sendDataJson(templateJson);
// }
JSONObject
templateJson
=
(
JSONObject
)
JSONObject
.
toJSON
(
template
);
// } else {
WechatCodeUtil
.
sendDataJson
(
templateJson
);
// log.info("拉取微博用户列表失败");
}
// }
}
else
{
// }
log
.
info
(
"拉取微博用户列表失败"
);
//
}
// /**
}
// * @Title: getUserList
// * @author hero
/**
// * @Description: 用户列表
* @Title: getUserList
// * @param @param
* @author hero
// * projectName
* @Description: 用户列表
// * @param @return
* @param @param
// * 设定文件
* projectName
// * @return List<String> 返回类型
* @param @return
// */
* 设定文件
// public static List<String> getUserList() {
* @return List<String> 返回类型
// List<String> userList = wechatUserDao.getWechatUserByGroup("weibohot");
*/
// if(userList==null){
public
static
List
<
String
>
getUserList
()
{
// userList = WechatCodeUtil.getUserListByGroupName("weibohot");
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"weibohot"
);
// }
if
(
userList
==
null
){
// return userList;
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"weibohot"
);
// }
}
//}
return
userList
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
//package com.zhiwei.searchhotcrawler.timer;
//
import
java.util.Calendar
;
//import java.util.Calendar;
import
java.util.Date
;
//import java.util.Date;
import
java.util.HashMap
;
//import java.util.HashMap;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
lombok.extern.log4j.Log4j2
;
//import lombok.extern.log4j.Log4j2;
import
org.slf4j.Logger
;
//
import
org.slf4j.LoggerFactory
;
//import com.alibaba.fastjson.JSONObject;
//import com.mongodb.DBObject;
import
com.alibaba.fastjson.JSONObject
;
//import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import
com.mongodb.DBObject
;
//import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
//import com.zhiwei.searchhotcrawler.util.Template;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
//import com.zhiwei.searchhotcrawler.util.WechatConstant;
import
com.zhiwei.searchhotcrawler.util.Template
;
//import com.zhiwei.tools.timeparse.TimeParse;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
//
import
com.zhiwei.tools.timeparse.TimeParse
;
//@Log4j2
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//public class SendZhihuHotSearchRun extends Thread{
// private HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
@Log4j2
// private static WechatUserDao wechatUserDao = new WechatUserDao();
public
class
SendZhihuHotSearchRun
extends
Thread
{
// @Override
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
// public void run() {
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
//
@Override
// while(true) {
public
void
run
()
{
// try {
// Calendar calendar = Calendar.getInstance();
while
(
true
)
{
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
try
{
// log.info("知乎推送,当前系统时间为:"+hour);
Calendar
calendar
=
Calendar
.
getInstance
();
// if(hour > 6 && hour <23){
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
// List<DBObject> list = hotSearchDAO.getHotOneHour(HotSearchType.知乎热搜.name());
log
.
info
(
"知乎推送,当前系统时间为:"
+
hour
);
// if(list!=null && !list.isEmpty()){
if
(
hour
>
6
&&
hour
<
23
){
// for(DBObject zhihu : list){
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
知乎热搜
.
name
());
// String title = zhihu.get("display_query").toString();
if
(
list
!=
null
&&
!
list
.
isEmpty
()){
// String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
for
(
DBObject
zhihu
:
list
){
// String url = zhihu.get("_id").toString();
String
title
=
zhihu
.
get
(
"display_query"
).
toString
();
// if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){
String
time
=
TimeParse
.
dateFormartString
((
Date
)
zhihu
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
// sendTemplateByUserIds(title, time, url);
String
url
=
zhihu
.
get
(
"_id"
).
toString
();
// }
if
(
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
>
6
&&
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
<
23
){
// }
sendTemplateByUserIds
(
title
,
time
,
url
);
// }else{
}
// log.info("知乎最近一小时无数据");
}
// sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
else
{
// }
log
.
info
(
"知乎最近一小时无数据"
);
// }
sendTemplateByUserIds
(
"最近一小时无数据"
,
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd HH:mm:ss"
),
null
);
// ZhiWeiTools.sleep(1*60*60*1000);
}
// } catch (Exception e) {
}
// log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// ZhiWeiTools.sleep(1*60*60*1000);
}
catch
(
Exception
e
)
{
// }
log
.
debug
(
"知乎热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
// }
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// }
}
//
}
// /**
}
// * @Title: sendTemplateByUserIds
// * @author hero
/**
// * @Description: 发送模版消息
* @Title: sendTemplateByUserIds
// * @param @param microTouTiao
* @author hero
// * @param @param userList 设定文件
* @Description: 发送模版消息
// * @return void 返回类型
* @param @param microTouTiao
// */
* @param @param userList 设定文件
// public static void sendTemplateByUserIds(String title,String time, String url) {
* @return void 返回类型
//
*/
// Map<String, Object> dataMap = new HashMap<>();
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
// JSONObject first = new JSONObject();
// first.put("value", "您好,有一条来自知乎热搜榜的预警通知。");
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
// dataMap.put("first", first);
JSONObject
first
=
new
JSONObject
();
// JSONObject keyword1 = new JSONObject();
first
.
put
(
"value"
,
"您好,有一条来自知乎热搜榜的预警通知。"
);
// keyword1.put("value", title);
dataMap
.
put
(
"first"
,
first
);
// keyword1.put("color", "#173177");
JSONObject
keyword1
=
new
JSONObject
();
// dataMap.put("keyword1", keyword1);
keyword1
.
put
(
"value"
,
title
);
// JSONObject keyword2 = new JSONObject();
keyword1
.
put
(
"color"
,
"#173177"
);
// keyword2.put("value", "知乎热搜榜");
dataMap
.
put
(
"keyword1"
,
keyword1
);
// keyword2.put("color", "#173177");
JSONObject
keyword2
=
new
JSONObject
();
// dataMap.put("keyword2", keyword2);
keyword2
.
put
(
"value"
,
"知乎热搜榜"
);
// JSONObject keyword3 = new JSONObject();
keyword2
.
put
(
"color"
,
"#173177"
);
// keyword3.put("value", time);
dataMap
.
put
(
"keyword2"
,
keyword2
);
// keyword3.put("color", "#173177");
JSONObject
keyword3
=
new
JSONObject
();
// dataMap.put("keyword3", keyword3);
keyword3
.
put
(
"value"
,
time
);
// JSONObject remark = new JSONObject();
keyword3
.
put
(
"color"
,
"#173177"
);
// remark.put("value", "知微情报监测服务");
dataMap
.
put
(
"keyword3"
,
keyword3
);
// dataMap.put("remark", remark);
JSONObject
remark
=
new
JSONObject
();
//
remark
.
put
(
"value"
,
"知微情报监测服务"
);
// List<String> userList = getUserList();
dataMap
.
put
(
"remark"
,
remark
);
// if(userList!=null && !userList.isEmpty()) {
// for (String openId : userList) {
List
<
String
>
userList
=
getUserList
();
// Template template = new Template();
if
(
userList
!=
null
&&
!
userList
.
isEmpty
())
{
// template.setTouser(openId);
for
(
String
openId
:
userList
)
{
// if(url!=null){
Template
template
=
new
Template
();
// template.setUrl(url);
template
.
setTouser
(
openId
);
// }
if
(
url
!=
null
){
// template.setTemplate_id(WechatConstant.WECHAT_TEMPLATEID_EARLY_IT);
template
.
setUrl
(
url
);
// template.setData(dataMap);
}
//
template
.
setTemplate_id
(
WechatConstant
.
WECHAT_TEMPLATEID_EARLY_IT
);
// JSONObject templateJson = (JSONObject)JSONObject.toJSON(template);
template
.
setData
(
dataMap
);
// WechatCodeUtil.sendDataJson(templateJson);
// }
JSONObject
templateJson
=
(
JSONObject
)
JSONObject
.
toJSON
(
template
);
// }else {
WechatCodeUtil
.
sendDataJson
(
templateJson
);
// log.info("知乎推送拉取用户列表失败");
}
// }
}
else
{
//
log
.
info
(
"知乎推送拉取用户列表失败"
);
// }
}
//
// /**
}
// * @Title: getUserList
// * @author hero
/**
// * @Description: 用户列表
* @Title: getUserList
// * @param @param projectName
* @author hero
// * @param @return 设定文件
* @Description: 用户列表
// * @return List<String> 返回类型
* @param @param projectName
// */
* @param @return 设定文件
//// private static List<String> getUserList()
* @return List<String> 返回类型
//// {
*/
//// List<String> userList = wechatUserDao.getWechatUserByGroup("LP组");
private
static
List
<
String
>
getUserList
()
//// if(userList==null){
{
//// userList = WechatCodeUtil.getUserListByGroupName("LP组");
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"LP组"
);
//// }
if
(
userList
==
null
){
//// return userList;
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"LP组"
);
//// }
}
//
return
userList
;
//}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
9ce337bb
...
@@ -5,7 +5,9 @@ import java.util.Date;
...
@@ -5,7 +5,9 @@ import java.util.Date;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -37,12 +39,13 @@ public class SougoHotSearchRun extends Thread {
...
@@ -37,12 +39,13 @@ public class SougoHotSearchRun extends Thread {
private
void
getHotList
()
{
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"搜狗微信采集开始........"
);
log
.
info
(
"搜狗微信采集开始........"
);
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
log
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
sougoHotSearch
:
list
){
for
(
HotSearchList
sougoHotSearch
:
list
){
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"name"
,
sougoHotSearch
.
getName
());
doc
.
put
(
"name"
,
sougoHotSearch
.
getName
());
doc
.
put
(
"url"
,
sougoHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
sougoHotSearch
.
getUrl
());
...
@@ -51,6 +54,7 @@ public class SougoHotSearchRun extends Thread {
...
@@ -51,6 +54,7 @@ public class SougoHotSearchRun extends Thread {
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getType
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getType
());
data
.
add
(
doc
);
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
}
hotSearchDAO
.
addHotSearchList
(
data
);
hotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"搜狗微信采集结束........"
);
log
.
info
(
"搜狗微信采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/UpdateWechatUserRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
//package com.zhiwei.searchhotcrawler.timer;
//
import
java.util.Calendar
;
//import java.util.Calendar;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
import
java.util.Map.Entry
;
//import java.util.Map.Entry;
//
import
lombok.extern.log4j.Log4j2
;
//import lombok.extern.log4j.Log4j2;
import
org.slf4j.Logger
;
//
import
org.slf4j.LoggerFactory
;
//import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
//
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
//@Log4j2
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//public class UpdateWechatUserRun extends Thread{
// private WechatUserDao wechatUserDao = new WechatUserDao();
@Log4j2
// @Override
public
class
UpdateWechatUserRun
extends
Thread
{
// public void run() {
private
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
// log.info("开始更新用户数据");
@Override
// while(true) {
public
void
run
()
{
// try {
log
.
info
(
"开始更新用户数据"
);
// Calendar calendar = Calendar.getInstance();
while
(
true
)
{
// int hour = calendar.get(Calendar.HOUR_OF_DAY);
try
{
// if(hour > 6 ){
Calendar
calendar
=
Calendar
.
getInstance
();
// Map<String,Integer> groupMap = WechatCodeUtil.getAllGroupIp();
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
// log.info("此公众号的分组数量为:::{}", groupMap.size());
if
(
hour
>
6
){
// if(!groupMap.isEmpty() && groupMap!=null){
Map
<
String
,
Integer
>
groupMap
=
WechatCodeUtil
.
getAllGroupIp
();
// for(Entry<String,Integer> group : groupMap.entrySet()){
log
.
info
(
"此公众号的分组数量为:::{}"
,
groupMap
.
size
());
// log.info("此公众号的分组名称及IP为:::{},{}", group.getKey(), group.getValue());
if
(!
groupMap
.
isEmpty
()
&&
groupMap
!=
null
){
// List<String> userList = WechatCodeUtil.getUserListByGroupId(group.getValue());
for
(
Entry
<
String
,
Integer
>
group
:
groupMap
.
entrySet
()){
// log.info("{},此分组下的用户数量为::{}", group.getKey(), userList.size());
log
.
info
(
"此公众号的分组名称及IP为:::{},{}"
,
group
.
getKey
(),
group
.
getValue
());
// if(userList!=null && !userList.isEmpty()){
List
<
String
>
userList
=
WechatCodeUtil
.
getUserListByGroupId
(
group
.
getValue
());
// wechatUserDao.addWechatUser(userList, group.getKey(), group.getValue());
log
.
info
(
"{},此分组下的用户数量为::{}"
,
group
.
getKey
(),
userList
.
size
());
// }
if
(
userList
!=
null
&&
!
userList
.
isEmpty
()){
// }
wechatUserDao
.
addWechatUser
(
userList
,
group
.
getKey
(),
group
.
getValue
());
// }
}
// }
}
// ZhiWeiTools.sleep(1*60*60*1000);
}
// } catch (Exception e) {
}
// log.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// ZhiWeiTools.sleep(1*60*60*1000);
}
catch
(
Exception
e
)
{
// continue;
log
.
debug
(
"知乎热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
// }
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
// }
continue
;
// }
}
//
}
//
}
//}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
9ce337bb
...
@@ -5,16 +5,13 @@ import java.util.Date;
...
@@ -5,16 +5,13 @@ import java.util.Date;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
org.bson.Document
;
@Log4j2
@Log4j2
public
class
WeiboHotSearchRun
extends
Thread
{
public
class
WeiboHotSearchRun
extends
Thread
{
...
@@ -38,13 +35,13 @@ public class WeiboHotSearchRun extends Thread{
...
@@ -38,13 +35,13 @@ public class WeiboHotSearchRun extends Thread{
private
void
getHotList
()
{
private
void
getHotList
()
{
log
.
info
(
"微博话题采集开始........"
);
log
.
info
(
"微博话题采集开始........"
);
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
log
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
weiboHotSearch
:
list
){
for
(
HotSearchList
weiboHotSearch
:
list
){
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
Document
doc
=
new
Document
();
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
doc
.
put
(
"name"
,
weiboHotSearch
.
getName
());
doc
.
put
(
"name"
,
weiboHotSearch
.
getName
());
doc
.
put
(
"url"
,
weiboHotSearch
.
getUrl
());
doc
.
put
(
"url"
,
weiboHotSearch
.
getUrl
());
...
@@ -52,11 +49,11 @@ public class WeiboHotSearchRun extends Thread{
...
@@ -52,11 +49,11 @@ public class WeiboHotSearchRun extends Thread{
doc
.
put
(
"hot"
,
weiboHotSearch
.
getHot
());
doc
.
put
(
"hot"
,
weiboHotSearch
.
getHot
());
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getType
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getType
());
doc
.
put
(
"icon"
,
weiboHotSearch
.
getIcon
());
doc
.
put
(
"icon"
,
weiboHotSearch
.
getIcon
());
data
.
add
(
doc
);
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
weiboHotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
log
.
info
(
"微博话题采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboSuperTopicRun.java
View file @
9ce337bb
...
@@ -9,11 +9,7 @@ import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
...
@@ -9,11 +9,7 @@ import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import
com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboSuperTopicCrawler
;
import
com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO
;
import
com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.bson.Document
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
@Log4j2
@Log4j2
...
@@ -40,10 +36,10 @@ public class WeiboSuperTopicRun extends Thread{
...
@@ -40,10 +36,10 @@ public class WeiboSuperTopicRun extends Thread{
log
.
info
(
"微博超话采集开始........"
);
log
.
info
(
"微博超话采集开始........"
);
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
List
<
WeiboSuperTopic
>
list
=
WeiboSuperTopicCrawler
.
startCrawler
();
log
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 微博超话此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
WeiboSuperTopic
topic
:
list
){
for
(
WeiboSuperTopic
topic
:
list
){
log
.
info
(
"topic::::{}"
,
topic
);
log
.
info
(
"topic::::{}"
,
topic
);
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"name"
,
topic
.
getTopicName
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
doc
.
put
(
"rank"
,
topic
.
getRank
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboTopicRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
...
@@ -32,12 +33,13 @@ public class WeiboTopicRun extends Thread{
...
@@ -32,12 +33,13 @@ public class WeiboTopicRun extends Thread{
private
void
getTopicList
()
{
private
void
getTopicList
()
{
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"微博话题采集开始........"
);
log
.
info
(
"微博话题采集开始........"
);
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
();
List
<
HotSearchList
>
list
=
WeiboTopicCrawler
.
startCrawlerByPhone
();
log
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 微博话题此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
D
BObjec
t
>
data
=
new
ArrayList
<>();
List
<
D
ocumen
t
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
topic
:
list
){
for
(
HotSearchList
topic
:
list
){
D
BObject
doc
=
new
BasicDBObjec
t
();
D
ocument
doc
=
new
Documen
t
();
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"_id"
,
topic
.
getId
());
doc
.
put
(
"name"
,
topic
.
getName
());
doc
.
put
(
"name"
,
topic
.
getName
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
doc
.
put
(
"url"
,
topic
.
getUrl
());
...
@@ -50,6 +52,7 @@ public class WeiboTopicRun extends Thread{
...
@@ -50,6 +52,7 @@ public class WeiboTopicRun extends Thread{
doc
.
put
(
"topic_lead"
,
topic
.
getTopicLead
());
doc
.
put
(
"topic_lead"
,
topic
.
getTopicLead
());
doc
.
put
(
"comment_count"
,
topic
.
getCommentCount
());
doc
.
put
(
"comment_count"
,
topic
.
getCommentCount
());
data
.
add
(
doc
);
data
.
add
(
doc
);
hotSearchCacheDAO
.
addAndUpdateData
(
doc
);
}
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
weiboHotSearchDAO
.
addHotSearchList
(
data
);
log
.
info
(
"微博话题采集结束........"
);
log
.
info
(
"微博话题采集结束........"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
9ce337bb
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
lombok.extern.log4j.Log4j2
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -36,13 +39,15 @@ public class ZhihuHotSearchRun extends Thread{
...
@@ -36,13 +39,15 @@ public class ZhihuHotSearchRun extends Thread{
private
void
getHotList
()
{
private
void
getHotList
()
{
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
log
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
log
.
info
(
"知乎话题采集开始...,当前线程名字:{}"
,
Thread
.
currentThread
().
getName
());
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
list
.
addAll
(
mobilelist
);
log
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
Document
>
dataList
=
new
ArrayList
<>();
for
(
HotSearchList
zhihuHotSearch
:
list
){
for
(
HotSearchList
zhihuHotSearch
:
list
){
D
BObject
zhihu
=
new
BasicDBObjec
t
();
D
ocument
zhihu
=
new
Documen
t
();
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
...
@@ -50,11 +55,12 @@ public class ZhihuHotSearchRun extends Thread{
...
@@ -50,11 +55,12 @@ public class ZhihuHotSearchRun extends Thread{
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
getHot
());
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
getHot
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getType
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getType
());
hotSearchDAO
.
addHotSearch
(
zhihu
);
dataList
.
add
(
zhihu
);
hotSearchCacheDAO
.
addAndUpdateData
(
zhihu
);
}
}
hotSearchDAO
.
addHotSearchList
(
dataList
);
log
.
info
(
"知乎话题采集结束........"
);
log
.
info
(
"知乎话题采集结束........"
);
}
}
...
...
src/main/resources/db.properties
View file @
9ce337bb
#mongoIp=202.107.192.94
#local service
mongoIp
=
192.168.0.101
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@202.107.192.94:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
mongoPort
=
30000
#local
#mongoIp=192.168.0.81
#mongoUri=mongodb://192.168.0.81:27017/istarshine_data
#mongoPort=27017
#service
db.username
=
searchhotcrawleruser
mongoUri
=
mongodb://istarshineuser:istarshine1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/istarshine_data?authSource=admin&authMechanism=SCRAM-SHA-1
db.paasword
=
searchhotcrawler1q2w3e4r
db.certifiedDB
=
admin
dbName
=
hot_search_list
dbName
=
hot_search_list
searchCollName
=
hot_search_list
searchCollName
=
hot_search_list
searchCacheCollName
=
hot_search_cache
topicCollName
=
topic_list
topicCollName
=
topic_list
collWechatUserName
=
wechat_user
collWechatUserName
=
wechat_user
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment