Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
ca20b119
Commit
ca20b119
authored
Jul 12, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
将百度热搜、微信热搜、抖音热搜合并到master
parent
a65b651d
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
1161 additions
and
8 deletions
+1161
-8
src/main/java/com/zhiwei/searchhotcrawler/bean/BaiDuHotSearch.java
+187
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/DouyinHotSearch.java
+107
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/SougoHotSearch.java
+124
-0
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
+6
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+121
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+90
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+93
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/BaiduHotSearchDAO.java
+114
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/DouyinHotSearchDAO.java
+94
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/SougoHotSearchDAO.java
+74
-0
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
+4
-2
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+45
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+43
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+44
-0
src/main/resources/db.properties
+7
-3
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/BaiDuHotSearch.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
BaiDuHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
2076919584659821600L
;
private
String
id
;
//主键(kw+时间)
private
String
url
;
//主链接
private
String
everurl
;
//相关链接
private
String
kw
;
//关键词
private
int
count
;
//搜索指数
private
String
day
;
//天
private
Date
time
;
//时间
private
int
changeCount
;
//据上分钟变化量
private
Integer
rank
;
//排名
public
BaiDuHotSearch
(){}
public
BaiDuHotSearch
(
Integer
rank
,
String
kw
,
String
everurl
,
int
count
){
this
.
id
=
kw
+
"_"
+
new
Date
().
getTime
();
this
.
rank
=
rank
;
this
.
kw
=
kw
;
this
.
count
=
count
;
this
.
everurl
=
everurl
;
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
}
@Override
public
String
toString
(){
return
"new BaiDuHotSearch["
+
"id = "
+
id
+
", url = "
+
url
+
", everurl = "
+
everurl
+
", kw = "
+
kw
+
", count = "
+
count
+
", day = "
+
day
+
", time = "
+
time
+
", rank = "
+
rank
+
", changeCount = "
+
changeCount
+
"]"
;
}
public
String
getDay
()
{
return
day
;
}
public
void
setDay
(
String
day
)
{
this
.
day
=
day
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getEverurl
()
{
return
everurl
;
}
public
void
setEverurl
(
String
everurl
)
{
this
.
everurl
=
everurl
;
}
public
String
getKw
()
{
return
kw
;
}
public
void
setKw
(
String
kw
)
{
this
.
kw
=
kw
;
}
public
int
getCount
()
{
return
count
;
}
public
void
setCount
(
int
count
)
{
this
.
count
=
count
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
int
getChangeCount
()
{
return
changeCount
;
}
public
void
setChangeCount
(
int
changeCount
)
{
this
.
changeCount
=
changeCount
;
}
public
int
getRank
()
{
return
rank
;
}
public
void
setRank
(
Integer
rank
)
{
this
.
rank
=
rank
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/DouyinHotSearch.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
public
class
DouyinHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
-
7707110236217797510L
;
private
String
id
;
//主键(word+时间)
// private String url; //消息链接
private
Integer
position
;
//排名
private
String
word
;
//热搜关键词
private
int
hot_value
;
//热度值
private
Date
time
;
//时间
private
int
changeCount
;
//据上分钟变化量
public
DouyinHotSearch
(){}
public
DouyinHotSearch
(
Integer
position
,
String
word
,
Integer
hot_value
)
{
this
.
id
=
word
+
"_"
+
new
Date
().
getTime
();
this
.
position
=
position
;
this
.
word
=
word
;
this
.
hot_value
=
hot_value
;
this
.
time
=
new
Date
();
}
@Override
public
String
toString
(){
return
"new DouyinHotSearch["
+
"id = "
+
id
+
", position = "
+
position
+
", word = "
+
word
+
", hot_value = "
+
hot_value
+
", time = "
+
time
+
", changeCount = "
+
changeCount
+
"]"
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
Integer
getPosition
()
{
return
position
;
}
public
void
setPosition
(
Integer
position
)
{
this
.
position
=
position
;
}
public
String
getWord
()
{
return
word
;
}
public
void
setWord
(
String
word
)
{
this
.
word
=
word
;
}
public
int
getHot_value
()
{
return
hot_value
;
}
public
void
setHot_value
(
int
hot_value
)
{
this
.
hot_value
=
hot_value
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
int
getChangeCount
()
{
return
changeCount
;
}
public
void
setChangeCount
(
int
changeCount
)
{
this
.
changeCount
=
changeCount
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/SougoHotSearch.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
SougoHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
2076919584659821600L
;
private
String
id
;
//主键(关键词+时间)
private
String
url
;
//主链接
private
String
everurl
;
//相关链接
private
String
kw
;
//关键词
private
String
day
;
//天
private
Date
time
;
//时间
private
Integer
rank
;
//排名
public
SougoHotSearch
()
{}
public
SougoHotSearch
(
Integer
rank
,
String
kw
,
String
everurl
)
{
this
.
id
=
kw
+
"_"
+
new
Date
().
getTime
();
this
.
rank
=
rank
;
this
.
kw
=
kw
;
this
.
everurl
=
everurl
;
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
}
@Override
public
String
toString
(){
return
"new SougoHotSearch["
+
"id = "
+
id
+
", url = "
+
url
+
", everurl = "
+
everurl
+
", kw = "
+
kw
+
", day = "
+
day
+
", time = "
+
time
+
", rank = "
+
rank
+
"]"
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getEverurl
()
{
return
everurl
;
}
public
void
setEverurl
(
String
everurl
)
{
this
.
everurl
=
everurl
;
}
public
String
getKw
()
{
return
kw
;
}
public
void
setKw
(
String
kw
)
{
this
.
kw
=
kw
;
}
public
String
getDay
()
{
return
day
;
}
public
void
setDay
(
String
day
)
{
this
.
day
=
day
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
Integer
getRank
()
{
return
rank
;
}
public
void
setRank
(
Integer
rank
)
{
this
.
rank
=
rank
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
View file @
ca20b119
...
...
@@ -20,6 +20,9 @@ public class Config {
dbName
=
conf
.
getProperty
(
"dbName"
);
collWeiboName
=
conf
.
getProperty
(
"collWeiboName"
);
collZhihuName
=
conf
.
getProperty
(
"collZhihuName"
);
collBaiduName
=
conf
.
getProperty
(
"collBaiduName"
);
collSougoName
=
conf
.
getProperty
(
"collSougoName"
);
collDouyinName
=
conf
.
getProperty
(
"collDouyinName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
}
catch
(
Exception
e
)
{
...
...
@@ -35,7 +38,9 @@ public class Config {
public
static
String
authDB
;
public
static
String
dbName
;
public
static
String
collWeiboName
;
public
static
String
collBaiduName
;
public
static
String
collZhihuName
;
public
static
String
collWechatUserName
;
public
static
String
collSougoName
;
public
static
String
collDouyinName
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
/**
* @ClassName:BaiDuHotSearch
* @Description: TODO(百度风云榜热搜采集)
* @author hero
* @date 2019年7月10日 上午10:54:31
*/
public
class
BaiDuHotSearchCrawler
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiDuHotSearchCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
* @Title: BaiDuHotSearchTest
* @author hero
* @Description: TODO(PC端百度风云榜采集)
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
BaiDuHotSearch
>
baiduHotSearch
(){
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
List
<
BaiDuHotSearch
>
list
=
new
ArrayList
<
BaiDuHotSearch
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"mainBody"
)){
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Elements
elements
=
document
.
select
(
"table.list-table"
).
select
(
"tr"
);
for
(
Element
element
:
elements
)
{
try
{
//获取排名rank
String
rankStr
=
null
;
//根据网页标签,给rankStr做判断
if
(!
element
.
select
(
"td.first"
).
select
(
"span.num-top"
).
isEmpty
())
{
rankStr
=
element
.
select
(
"td.first"
).
select
(
"span.num-top"
).
text
();
}
else
if
(!
element
.
select
(
"td.first"
).
select
(
"span.num-normal"
).
isEmpty
())
{
rankStr
=
element
.
select
(
"td.first"
).
select
(
"span.num-normal"
).
text
();
}
Integer
rank
=
null
;
//判断rankStr是否为空
if
(
StringUtils
.
isNoneBlank
(
rankStr
))
{
rank
=
Integer
.
valueOf
(
rankStr
);
}
//获取id(主键String)
// String id = element.select("td.keyword").select("a").text() + "_" +
// TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss");
//获取关键词(String)
String
kw
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
text
();
logger
.
info
(
"关键词:{}"
,
kw
);
//获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
attr
(
"href"
);
//获取搜索指数count(int)
String
hot
=
null
;
//判断热度值所在的规则是否为null
if
(!
element
.
select
(
"td.last"
).
select
(
"span.icon-fall"
).
isEmpty
())
{
hot
=
element
.
select
(
"td.last"
).
select
(
"span.icon-fall"
).
text
();
}
else
if
(!
element
.
select
(
"td.last"
).
select
(
"span.icon-rise"
).
isEmpty
())
{
hot
=
element
.
select
(
"td.last"
).
select
(
"span.icon-rise"
).
text
();
}
int
count
=
0
;
//判断hot是否为空
if
(
StringUtils
.
isNotBlank
(
hot
))
{
count
=
Integer
.
valueOf
(
hot
);
}
BaiDuHotSearch
hotSearch
=
new
BaiDuHotSearch
(
rank
,
kw
,
everurl
,
count
);
if
(
Objects
.
nonNull
(
rank
))
{
list
.
add
(
hotSearch
);
}
}
catch
(
Exception
e
)
{
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger
.
error
(
"解析百度风云榜时出现解析错误"
,
e
);
continue
;
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析百度风云榜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
return
null
;
}
}
else
{
// SendMailWeibo.sendMail("百度风云榜采集出现问题", "859548429@qq.com");
logger
.
info
(
"解析百度风云榜时出现解析错误,页面结构有问题"
);
}
break
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析百度风云榜时出现解析错误,页面结构有问题"
,
e
);
}
}
logger
.
info
(
"次轮采集的数据量为:"
,
list
.
size
());
return
list
;
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
/**
* @className DouyinHotSearchCrawler
* @Description:抖音热搜榜采集程序
* @author win 10
* @date:2019年07月11日 上午10:26:21
*/
public
class
DouyinHotSearchCrawler
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
ZhihuHotSearchCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
* @Title: getMobileDouyinHotList
* @author hero
* @Description: 移动端抖音热搜榜
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public
static
List
<
DouyinHotSearch
>
getMobileDouyinHotList
(){
List
<
DouyinHotSearch
>
list
=
null
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
// Map<String,String> headerMap = HeaderTool.getCommonHead();
// headerMap.put("Host", "api.zhihu.com");
// headerMap.put("Referer", url);
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36");
// headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
// headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
for
(
int
j
=
0
;
j
<
3
;
j
++){
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
.
contains
(
"word_list"
)){
list
=
new
ArrayList
<
DouyinHotSearch
>();
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
word_list
=
data
.
getJSONObject
(
"data"
).
getJSONArray
(
"word_list"
);
String
positionStr
=
null
;
String
word
=
null
;
String
hot_value_str
=
null
;
for
(
int
i
=
0
;
i
<
word_list
.
size
();
i
++)
{
JSONObject
wl
=
word_list
.
getJSONObject
(
i
);
//获取排名
positionStr
=
wl
.
getString
(
"position"
);
Integer
position
=
null
;
position
=
Integer
.
valueOf
(
positionStr
);
//获取关键词
word
=
wl
.
getString
(
"word"
);
//获取热度值
hot_value_str
=
wl
.
getString
(
"hot_value"
);
Integer
hot_value
=
null
;
hot_value
=
Integer
.
valueOf
(
hot_value_str
);
logger
.
info
(
"热度为:::{}"
,
hot_value
);
DouyinHotSearch
douyin
=
new
DouyinHotSearch
(
position
,
word
,
hot_value
);
list
.
add
(
douyin
);
}
break
;
}
else
{
System
.
out
.
println
(
"---------------"
);
}
}
}
catch
(
IOException
e
)
{
logger
.
debug
(
"获取抖音热搜榜时出现问题:{}"
,
e
.
fillInStackTrace
());
continue
;
}
}
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.SougoHotSearch
;
/**
* @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集)
* @author hero
* @date 2019年7月10日 上午10:54:31
*/
public
class
SougoHotSearchCrawler
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
* @Title: SougoHotSearchTest
* @author hero
* @Description: TODO(PC端搜狗微信关键词采集)
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
SougoHotSearch
>
sougoHotSearch
(){
String
url
=
"https://weixin.sogou.com"
;
List
<
SougoHotSearch
>
list
=
new
ArrayList
<
SougoHotSearch
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"topwords"
)){
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Elements
elements
=
document
.
select
(
"ol#topwords"
).
select
(
"li"
);
for
(
Element
element
:
elements
)
{
try
{
//获取排名rank
String
rankStr
=
null
;
if
(!
element
.
select
(
"li"
).
select
(
"i"
).
isEmpty
())
{
rankStr
=
element
.
select
(
"li"
).
select
(
"i"
).
text
();
}
Integer
rank
=
null
;
if
(
StringUtils
.
isNoneBlank
(
rankStr
))
{
rank
=
Integer
.
valueOf
(
rankStr
);
}
//获取关键词(String)
String
kw
=
element
.
select
(
"li"
).
select
(
"a"
).
text
();
logger
.
info
(
"关键词:{}"
,
kw
);
//获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"href"
);
SougoHotSearch
hotSearch
=
new
SougoHotSearch
(
rank
,
kw
,
everurl
);
if
(
Objects
.
nonNull
(
rank
))
{
list
.
add
(
hotSearch
);
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误"
,
e
);
continue
;
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
return
null
;
}
}
else
{
logger
.
info
(
"解析搜狗微信时出现解析错误,页面结构有问题"
);
}
break
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
e
);
}
}
logger
.
info
(
"此轮采集的数据量为:"
,
list
.
size
());
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/BaiduHotSearchDAO.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
import
com.zhiwei.searchhotcrawler.cache.CacheManager
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
BaiduHotSearchDAO
extends
MongoDBTemplate
{
public
BaiduHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
// Date date = new Date();
// String time = TimeParse.dateFormartString(date, "yyyy");
// if(Calendar.MONTH<6){
// collWeiboName = Config.collWeiboName + time+"_01";
// }else{
// collWeiboName = Config.collWeiboName + time+"_06";
// }
// System.out.println("collWeiboName========="+collWeiboName);
super
.
setCollName
(
Config
.
collBaiduName
);
}
/**
* @Title: addBaiduSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public
void
addBaiduSearch
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
continue
;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param baiduHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
BaiDuHotSearch
baiduHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"kw"
,
baiduHotSearch
.
getKw
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
result
=
baiduHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
break
;
}
cur
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
result
;
}
return
result
;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getWeiboHotOneHour(){
// List<DBObject> list = new ArrayList<DBObject>();
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
// query.put("changeCount", 0);
//
// try {
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// String name = doc.get("name").toString();
// if(CacheManager.getCacheByKey(name)==null){
// CacheManager.putCache(name, doc, 48*60*60*1000);
// list.add(doc);
// }
// }
// cur.close();
// } catch (Exception e) {
// return null;
// }
// return list;
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dao/DouyinHotSearchDAO.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.mongodb.WriteConcern
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
DouyinHotSearchDAO
extends
MongoDBTemplate
{
public
DouyinHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
super
.
setCollName
(
Config
.
collDouyinName
);
}
@SuppressWarnings
(
"deprecation"
)
public
void
addDouyinHotSearch
(
DBObject
douyin
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
douyin
,
WriteConcern
.
SAFE
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
continue
;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param douyinHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
DouyinHotSearch
douyinHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"word"
,
douyinHotSearch
.
getWord
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
result
=
douyinHotSearch
.
getHot_value
()
-
Integer
.
valueOf
(
doc
.
get
(
"hot_value"
).
toString
());
break
;
}
cur
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
result
;
}
return
result
;
}
/**
* @Title: getDouyinHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getDouyinHotSearch(){
// List<DBObject> list = null;
// try {
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
//
// long count = this.getReadColl().count(query);
// if(count>0){
// list = new ArrayList<DBObject>();
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// list.add(doc);
// }
// cur.close();
// }
// return list;
// } catch (Exception e) {
// e.printStackTrace();
// return list;
// }
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dao/SougoHotSearchDAO.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.List
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SougoHotSearchDAO
extends
MongoDBTemplate
{
public
SougoHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
// Date date = new Date();
// String time = TimeParse.dateFormartString(date, "yyyy");
// if(Calendar.MONTH<6){
// collWeiboName = Config.collWeiboName + time+"_01";
// }else{
// collWeiboName = Config.collWeiboName + time+"_06";
// }
// System.out.println("collWeiboName========="+collWeiboName);
super
.
setCollName
(
Config
.
collSougoName
);
}
/**
* @Title: addSougoHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public
void
addSougoSearch
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
continue
;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param sougoHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
// public int getChangeCount(SougoHotSearch sougoHotSearch){
// int result = 0;
// DBObject query = new BasicDBObject();
// query.put("kw", sougoHotSearch.getKw());
// DBObject sort = new BasicDBObject();
// sort.put("time", -1);
// try {
// DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// result = sougoHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
// break;
// }
// cur.close();
// } catch (Exception e) {
// e.printStackTrace();
// return result;
// }
// return result;
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
View file @
ca20b119
...
...
@@ -29,11 +29,13 @@ public class MongoDBTemplate {
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
if
(
reader
==
null
)
{
reader
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// reader = new MongoClient(address, Arrays.asList(credential));
reader
=
new
MongoClient
(
address
);
}
if
(
writer
==
null
)
{
writer
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
// writer = new MongoClient(address, Arrays.asList(credential));
writer
=
new
MongoClient
(
address
);
}
}
catch
(
MongoException
e
)
{
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
ca20b119
...
...
@@ -5,8 +5,11 @@ import java.util.concurrent.ScheduledExecutorService;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SougoHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun
;
...
...
@@ -24,6 +27,9 @@ public class HotSearchRun {
public
void
showTimer
()
{
scheduExec
.
scheduleAtFixedRate
(
new
WeiboHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
ZhihuHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
BaiduHotSearchRun
(),
0
,
5
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
SougoHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTES
);
scheduExec
.
scheduleAtFixedRate
(
new
DouyinHotSearchRun
(),
0
,
10
,
TimeUnit
.
MINUTES
);
}
public
static
void
main
(
String
[]
args
)
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.BaiduHotSearchDAO
;
public
class
BaiduHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiduHotSearchRun
.
class
);
private
BaiduHotSearchDAO
baiduHotSearchDAO
=
new
BaiduHotSearchDAO
();
@Override
public
void
run
()
{
logger
.
info
(
"百度风云榜采集开始........"
);
List
<
BaiDuHotSearch
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
BaiDuHotSearch
baiduHotSearch
:
list
){
int
changeCount
=
baiduHotSearchDAO
.
getChangeCount
(
baiduHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"kw"
,
baiduHotSearch
.
getKw
());
doc
.
put
(
"everurl"
,
baiduHotSearch
.
getEverurl
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
data
.
add
(
doc
);
}
baiduHotSearchDAO
.
addBaiduSearch
(
data
);
logger
.
info
(
"百度风云榜采集结束........"
);
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.DouyinHotSearchDAO
;
public
class
DouyinHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotSearchRun
.
class
);
private
DouyinHotSearchDAO
douyinHotSearchDAO
=
new
DouyinHotSearchDAO
();
@Override
public
void
run
()
{
logger
.
info
(
"抖音热搜榜采集开始........"
);
List
<
DouyinHotSearch
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
logger
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
DouyinHotSearch
douyinHotSearch
:
list
){
int
changeCount
=
douyinHotSearchDAO
.
getChangeCount
(
douyinHotSearch
);
DBObject
douyin
=
new
BasicDBObject
();
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"word"
,
douyinHotSearch
.
getWord
());
douyin
.
put
(
"position"
,
douyinHotSearch
.
getPosition
());
douyin
.
put
(
"hot_value"
,
douyinHotSearch
.
getHot_value
());
// douyin.put("url", douyinHotSearch.getUrl());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
data
.
add
(
douyin
);
douyinHotSearchDAO
.
addDouyinHotSearch
(
douyin
);
}
logger
.
info
(
"抖音热搜榜采集结束........"
);
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
ca20b119
...
...
@@ -66,7 +66,7 @@ public class SendWeiboHotSearchRun extends Thread {
* @return void 返回类型
*/
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
JSONObject
first
=
new
JSONObject
();
first
.
put
(
"value"
,
"您好,有一条来自微博热搜榜的预警通知。"
);
dataMap
.
put
(
"first"
,
first
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
ca20b119
...
...
@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{
*/
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
JSONObject
first
=
new
JSONObject
();
first
.
put
(
"value"
,
"您好,有一条来自知乎热搜榜的预警通知。"
);
dataMap
.
put
(
"first"
,
first
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
0 → 100644
View file @
ca20b119
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.SougoHotSearch
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.SougoHotSearchDAO
;
public
class
SougoHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchRun
.
class
);
private
SougoHotSearchDAO
sougoHotSearchDAO
=
new
SougoHotSearchDAO
();
@Override
public
void
run
()
{
logger
.
info
(
"搜狗微信采集开始........"
);
List
<
SougoHotSearch
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
SougoHotSearch
sougoHotSearch
:
list
){
// int changeCount = baiduHotSearchDAO.getChangeCount(sougoHotSearch);
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"kw"
,
sougoHotSearch
.
getKw
());
doc
.
put
(
"everurl"
,
sougoHotSearch
.
getEverurl
());
// doc.put("count", baiduHotSearch.getCount());
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
// doc.put("changeCount", changeCount);
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
data
.
add
(
doc
);
}
sougoHotSearchDAO
.
addSougoSearch
(
data
);
logger
.
info
(
"搜狗微信采集结束........"
);
}
}
src/main/resources/db.properties
View file @
ca20b119
#mongoIp=202.107.192.94
mongoIp
=
192.168.0.
101
mongoPort
=
30000
mongoIp
=
192.168.0.
247
mongoPort
=
27017
db.username
=
zzwno
db.paasword
=
zzwno1q2w3e4r
db.certifiedDB
=
admin
db.certifiedDB
=
oneDB
dbName
=
NetWork
collWeiboName
=
weibo_hotsearch2018_10
collZhihuName
=
zhihu_hotsearch2018_10
collWechatUserName
=
wechat_user
collBaiduName
=
baidu_hotsearch2019_07
collSougoName
=
sougo_hotsearch2019_07
collDouyinName
=
douyin_hotsearch2019_07
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment