Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
41dee457
Commit
41dee457
authored
Jul 17, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加抖音、微信、百度热搜采集
parent
b528f200
Show whitespace changes
Inline
Side-by-side
Showing
28 changed files
with
326 additions
and
1036 deletions
+326
-1036
src/main/java/com/zhiwei/searchhotcrawler/bean/BaiDuHotSearch.java
+0
-187
src/main/java/com/zhiwei/searchhotcrawler/bean/DouyinHotSearch.java
+0
-121
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+28
-7
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
+9
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/SougoHotSearch.java
+0
-124
src/main/java/com/zhiwei/searchhotcrawler/bean/ZhihuHotSearch.java
+0
-69
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
+2
-10
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+6
-6
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+6
-5
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+32
-32
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+8
-7
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+15
-23
src/main/java/com/zhiwei/searchhotcrawler/dao/BaiduHotSearchDAO.java
+0
-110
src/main/java/com/zhiwei/searchhotcrawler/dao/DouyinHotSearchDAO.java
+0
-102
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+39
-24
src/main/java/com/zhiwei/searchhotcrawler/dao/SougoHotSearchDAO.java
+0
-65
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
+11
-4
src/main/java/com/zhiwei/searchhotcrawler/dao/ZhihuHotSearchDAO.java
+0
-66
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
+4
-4
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
+87
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+10
-8
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+14
-12
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+5
-5
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+7
-7
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+10
-8
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+8
-6
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+17
-13
src/main/resources/db.properties
+8
-11
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/BaiDuHotSearch.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
BaiDuHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
2076919584659821600L
;
private
String
id
;
//主键(kw+时间)
private
String
url
;
//主链接
private
String
everurl
;
//相关链接
private
String
kw
;
//关键词
private
int
count
;
//搜索指数
private
String
day
;
//天
private
Date
time
;
//时间
private
int
changeCount
;
//据上分钟变化量
private
Integer
rank
;
//排名
public
BaiDuHotSearch
(){}
public
BaiDuHotSearch
(
Integer
rank
,
String
kw
,
String
everurl
,
int
count
){
this
.
id
=
kw
+
"_"
+
new
Date
().
getTime
();
this
.
rank
=
rank
;
this
.
kw
=
kw
;
this
.
count
=
count
;
this
.
everurl
=
everurl
;
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
}
@Override
public
String
toString
(){
return
"new BaiDuHotSearch["
+
"id = "
+
id
+
", url = "
+
url
+
", everurl = "
+
everurl
+
", kw = "
+
kw
+
", count = "
+
count
+
", day = "
+
day
+
", time = "
+
time
+
", rank = "
+
rank
+
", changeCount = "
+
changeCount
+
"]"
;
}
public
String
getDay
()
{
return
day
;
}
public
void
setDay
(
String
day
)
{
this
.
day
=
day
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getEverurl
()
{
return
everurl
;
}
public
void
setEverurl
(
String
everurl
)
{
this
.
everurl
=
everurl
;
}
public
String
getKw
()
{
return
kw
;
}
public
void
setKw
(
String
kw
)
{
this
.
kw
=
kw
;
}
public
int
getCount
()
{
return
count
;
}
public
void
setCount
(
int
count
)
{
this
.
count
=
count
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
int
getChangeCount
()
{
return
changeCount
;
}
public
void
setChangeCount
(
int
changeCount
)
{
this
.
changeCount
=
changeCount
;
}
public
int
getRank
()
{
return
rank
;
}
public
void
setRank
(
Integer
rank
)
{
this
.
rank
=
rank
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/DouyinHotSearch.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
DouyinHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
-
7707110236217797510L
;
private
String
id
;
//主键(word+时间)
// private String url; //消息链接
private
Integer
position
;
//排名
private
String
word
;
//热搜关键词
private
int
hot_value
;
//热度值
private
Date
time
;
//时间
private
int
changeCount
;
//据上分钟变化量
private
String
day
;
public
DouyinHotSearch
(){}
public
DouyinHotSearch
(
Integer
position
,
String
word
,
Integer
hot_value
)
{
this
.
id
=
word
+
"_"
+
new
Date
().
getTime
();
this
.
position
=
position
;
this
.
word
=
word
;
this
.
hot_value
=
hot_value
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
}
@Override
public
String
toString
(){
return
"new DouyinHotSearch["
+
"id = "
+
id
+
", position = "
+
position
+
", word = "
+
word
+
", hot_value = "
+
hot_value
+
", time = "
+
time
+
", changeCount = "
+
changeCount
+
"]"
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
Integer
getPosition
()
{
return
position
;
}
public
void
setPosition
(
Integer
position
)
{
this
.
position
=
position
;
}
public
String
getWord
()
{
return
word
;
}
public
void
setWord
(
String
word
)
{
this
.
word
=
word
;
}
public
int
getHot_value
()
{
return
hot_value
;
}
public
void
setHot_value
(
int
hot_value
)
{
this
.
hot_value
=
hot_value
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
int
getChangeCount
()
{
return
changeCount
;
}
public
void
setChangeCount
(
int
changeCount
)
{
this
.
changeCount
=
changeCount
;
}
public
String
getDay
()
{
return
day
;
}
public
void
setDay
(
String
day
)
{
this
.
day
=
day
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/
WeiboHotSearch
.java
→
src/main/java/com/zhiwei/searchhotcrawler/bean/
HotSearchList
.java
View file @
41dee457
...
...
@@ -12,7 +12,7 @@ import java.util.Date;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
WeiboHotSearch
implements
Serializable
{
public
class
HotSearchList
implements
Serializable
{
private
static
final
long
serialVersionUID
=
2076919584659821600L
;
...
...
@@ -34,10 +34,13 @@ public class WeiboHotSearch implements Serializable{
private
int
rank
;
//排名
private
String
type
;
//分类
public
WeiboHotSearch
(){}
public
WeiboHotSearch
(
String
url
,
String
name
,
int
count
,
boolean
hot
,
int
rank
){
public
HotSearchList
(){}
public
HotSearchList
(
String
url
,
String
name
,
int
count
,
boolean
hot
,
int
rank
,
String
type
){
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
url
=
url
;
this
.
name
=
name
;
...
...
@@ -46,12 +49,26 @@ public class WeiboHotSearch implements Serializable{
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
this
.
type
=
type
;
}
public
HotSearchList
(
String
url
,
String
name
,
Integer
count
,
int
rank
,
String
type
){
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
url
=
url
;
this
.
name
=
name
;
this
.
count
=
count
;
this
.
hot
=
true
;
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
this
.
type
=
type
;
}
@Override
public
String
toString
(){
return
"new
WeiboHotSearch
["
return
"new
HotSearchList
["
+
"id = "
+
id
+
", url = "
+
url
+
", name = "
+
name
...
...
@@ -61,6 +78,7 @@ public class WeiboHotSearch implements Serializable{
+
", rank = "
+
rank
+
", day = "
+
day
+
", changeCount = "
+
changeCount
+
", type = "
+
type
+
"]"
;
}
...
...
@@ -141,9 +159,12 @@ public class WeiboHotSearch implements Serializable{
this
.
rank
=
rank
;
}
public
String
getType
()
{
return
type
;
}
public
void
setType
(
String
type
)
{
this
.
type
=
type
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
0 → 100644
View file @
41dee457
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
public
enum
HotSearchType
{
百度热搜
,
微博热搜
,
知乎热搜
,
抖音热搜
,
搜狗微信热搜
}
src/main/java/com/zhiwei/searchhotcrawler/bean/SougoHotSearch.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
SougoHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
2076919584659821600L
;
private
String
id
;
//主键(关键词+时间)
private
String
url
;
//主链接
private
String
everurl
;
//相关链接
private
String
kw
;
//关键词
private
String
day
;
//天
private
Date
time
;
//时间
private
Integer
rank
;
//排名
public
SougoHotSearch
()
{}
public
SougoHotSearch
(
Integer
rank
,
String
kw
,
String
everurl
)
{
this
.
id
=
kw
+
"_"
+
new
Date
().
getTime
();
this
.
rank
=
rank
;
this
.
kw
=
kw
;
this
.
everurl
=
everurl
;
this
.
rank
=
rank
;
this
.
time
=
new
Date
();
this
.
day
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
}
@Override
public
String
toString
(){
return
"new SougoHotSearch["
+
"id = "
+
id
+
", url = "
+
url
+
", everurl = "
+
everurl
+
", kw = "
+
kw
+
", day = "
+
day
+
", time = "
+
time
+
", rank = "
+
rank
+
"]"
;
}
public
String
getId
()
{
return
id
;
}
public
void
setId
(
String
id
)
{
this
.
id
=
id
;
}
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getEverurl
()
{
return
everurl
;
}
public
void
setEverurl
(
String
everurl
)
{
this
.
everurl
=
everurl
;
}
public
String
getKw
()
{
return
kw
;
}
public
void
setKw
(
String
kw
)
{
this
.
kw
=
kw
;
}
public
String
getDay
()
{
return
day
;
}
public
void
setDay
(
String
day
)
{
this
.
day
=
day
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
public
Integer
getRank
()
{
return
rank
;
}
public
void
setRank
(
Integer
rank
)
{
this
.
rank
=
rank
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/ZhihuHotSearch.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
import
java.io.Serializable
;
import
java.util.Date
;
public
class
ZhihuHotSearch
implements
Serializable
{
private
static
final
long
serialVersionUID
=
-
7707110236217797510L
;
private
String
url
;
//消息链接
private
String
query
;
//热搜关键词
private
String
displayQuery
;
//热搜关键词
private
Date
time
;
//时间
public
ZhihuHotSearch
(){}
public
ZhihuHotSearch
(
String
url
,
String
query
,
String
displayQuery
,
Date
time
){
this
.
url
=
url
;
this
.
query
=
query
;
this
.
displayQuery
=
displayQuery
;
this
.
time
=
time
;
}
@Override
public
String
toString
(){
return
"new ZhihuHotSearch["
+
"url = "
+
url
+
", query = "
+
query
+
", displayQuery = "
+
displayQuery
+
", time = "
+
time
+
"]"
;
}
public
String
getUrl
()
{
return
url
;
}
public
void
setUrl
(
String
url
)
{
this
.
url
=
url
;
}
public
String
getQuery
()
{
return
query
;
}
public
void
setQuery
(
String
query
)
{
this
.
query
=
query
;
}
public
String
getDisplayQuery
()
{
return
displayQuery
;
}
public
void
setDisplayQuery
(
String
displayQuery
)
{
this
.
displayQuery
=
displayQuery
;
}
public
Date
getTime
()
{
return
time
;
}
public
void
setTime
(
Date
time
)
{
this
.
time
=
time
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
View file @
41dee457
...
...
@@ -18,11 +18,7 @@ public class Config {
userPwd
=
conf
.
getProperty
(
"db.paasword"
);
authDB
=
conf
.
getProperty
(
"db.certifiedDB"
);
dbName
=
conf
.
getProperty
(
"dbName"
);
collWeiboName
=
conf
.
getProperty
(
"collWeiboName"
);
collZhihuName
=
conf
.
getProperty
(
"collZhihuName"
);
collBaiduName
=
conf
.
getProperty
(
"collBaiduName"
);
collSougoName
=
conf
.
getProperty
(
"collSougoName"
);
collDouyinName
=
conf
.
getProperty
(
"collDouyinName"
);
collName
=
conf
.
getProperty
(
"collName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
}
catch
(
Exception
e
)
{
...
...
@@ -37,10 +33,6 @@ public class Config {
public
static
String
userPwd
;
public
static
String
authDB
;
public
static
String
dbName
;
public
static
String
collWeiboName
;
public
static
String
collBaiduName
;
public
static
String
collZhihuName
;
public
static
String
collName
;
public
static
String
collWechatUserName
;
public
static
String
collSougoName
;
public
static
String
collDouyinName
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
41dee457
...
...
@@ -14,7 +14,8 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
/**
* @ClassName:BaiDuHotSearch
...
...
@@ -34,7 +35,7 @@ public class BaiDuHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
BaiDuHotSearch
>
baiduHotSearch
()
{
public
static
List
<
HotSearchList
>
baiduHotSearch
()
{
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
...
...
@@ -55,8 +56,8 @@ public class BaiDuHotSearchCrawler {
* @param htmlBody
* @return
*/
private
static
List
<
BaiDuHotSearch
>
ansysData
(
String
htmlBody
){
List
<
BaiDuHotSearch
>
list
=
new
ArrayList
<>();
private
static
List
<
HotSearchList
>
ansysData
(
String
htmlBody
){
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Elements
elements
=
document
.
select
(
"table.list-table"
).
select
(
"tr"
);
...
...
@@ -94,8 +95,7 @@ public class BaiDuHotSearchCrawler {
if
(
StringUtils
.
isNotBlank
(
hot
))
{
count
=
Integer
.
valueOf
(
hot
);
}
BaiDuHotSearch
hotSearch
=
new
BaiDuHotSearch
(
rank
,
kw
,
everurl
,
count
);
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
count
,
rank
,
HotSearchType
.
百度热搜
.
name
());
if
(
Objects
.
nonNull
(
rank
))
{
list
.
add
(
hotSearch
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
41dee457
...
...
@@ -12,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
...
@@ -34,13 +35,13 @@ public class DouyinHotSearchCrawler {
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public
static
List
<
DouyinHotSearch
>
getMobileDouyinHotList
(){
List
<
DouyinHotSearch
>
list
=
null
;
public
static
List
<
HotSearchList
>
getMobileDouyinHotList
(){
List
<
HotSearchList
>
list
=
null
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"word_list"
)){
list
=
new
ArrayList
<
DouyinHotSearch
>();
list
=
new
ArrayList
<>();
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
wordList
=
data
.
getJSONObject
(
"data"
).
getJSONArray
(
"word_list"
);
String
positionStr
=
null
;
...
...
@@ -59,7 +60,7 @@ public class DouyinHotSearchCrawler {
Integer
hotValue
=
null
;
hotValue
=
Integer
.
valueOf
(
hotValueStr
);
// logger.info("热度为:::{}", hot_value);
DouyinHotSearch
douyin
=
new
DouyinHotSearch
(
position
,
word
,
hotValue
);
HotSearchList
douyin
=
new
HotSearchList
(
null
,
word
,
hotValue
,
position
,
HotSearchType
.
抖音热搜
.
name
()
);
list
.
add
(
douyin
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
41dee457
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Objects
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.SougoHotSearch
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
/**
/**
* @ClassName:SougoHotSearch
* @Description: TODO(搜狗微信关键词采集)
* @author hero
...
...
@@ -26,6 +28,7 @@ public class SougoHotSearchCrawler {
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchCrawler
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
* @Title: SougoHotSearchTest
* @author hero
...
...
@@ -33,52 +36,51 @@ public class SougoHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
SougoHotSearch
>
sougoHotSearch
()
{
public
static
List
<
HotSearchList
>
sougoHotSearch
()
{
String
url
=
"https://weixin.sogou.com"
;
List
<
SougoHotSearch
>
list
=
new
ArrayList
<
SougoHotSearch
>();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
List
<
HotSearchList
>
list
=
new
ArrayList
<
>();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"topwords"
))
{
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"topwords"
))
{
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Elements
elements
=
document
.
select
(
"ol#topwords"
).
select
(
"li"
);
for
(
Element
element
:
elements
)
{
try
{
//获取排名rank
//
获取排名rank
String
rankStr
=
null
;
if
(!
element
.
select
(
"li"
).
select
(
"i"
).
isEmpty
())
{
if
(!
element
.
select
(
"li"
).
select
(
"i"
).
isEmpty
())
{
rankStr
=
element
.
select
(
"li"
).
select
(
"i"
).
text
();
}
Integer
rank
=
null
;
if
(
StringUtils
.
isNoneBlank
(
rankStr
))
{
if
(
StringUtils
.
isNoneBlank
(
rankStr
))
{
rank
=
Integer
.
valueOf
(
rankStr
);
}
//获取关键词(String)
//
获取关键词(String)
String
kw
=
element
.
select
(
"li"
).
select
(
"a"
).
text
();
logger
.
info
(
"关键词:{}"
,
kw
);
//获取关键词相关链接everurl(String)
//
获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"href"
);
SougoHotSearch
hotSearch
=
new
SougoHotSearch
(
rank
,
kw
,
everurl
);
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
null
,
rank
,
HotSearchType
.
搜狗微信热搜
.
name
()
);
if
(
Objects
.
nonNull
(
rank
))
{
if
(
Objects
.
nonNull
(
rank
))
{
list
.
add
(
hotSearch
);
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误"
,
e
);
continue
;
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
return
null
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析搜狗微信时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
return
Collections
.
emptyList
()
;
}
}
else
{
}
else
{
logger
.
info
(
"解析搜狗微信时出现解析错误,页面结构有问题"
);
}
break
;
...
...
@@ -86,8 +88,6 @@ public class SougoHotSearchCrawler {
logger
.
error
(
"解析搜狗微信时出现解析错误,页面结构有问题"
,
e
);
}
}
logger
.
info
(
"此轮采集的数据量为:"
,
list
.
size
());
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
41dee457
...
...
@@ -17,7 +17,8 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiboHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.mail.SendMailWeibo
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
...
@@ -38,10 +39,10 @@ public class WeiboHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
WeiboHotSearch
>
weiboHotSearch
(){
public
static
List
<
HotSearchList
>
weiboHotSearch
(){
String
url
=
"https://s.weibo.com/top/summary?cate=realtimehot"
;
List
<
WeiboHotSearch
>
list
=
new
ArrayList
<
WeiboHotSearch
>();
List
<
HotSearchList
>
list
=
new
ArrayList
<
HotSearchList
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
try
{
...
...
@@ -63,7 +64,7 @@ public class WeiboHotSearchCrawler {
int
hotCount
=
Integer
.
valueOf
(
num
);
int
rankCount
=
Integer
.
valueOf
(
rank
);
WeiboHotSearch
hotSearch
=
new
WeiboHotSearch
(
id
,
name
,
hotCount
,
true
,
rankCount
);
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
true
,
rankCount
,
HotSearchType
.
微博热搜
.
name
()
);
list
.
add
(
hotSearch
);
}
catch
(
Exception
e
)
{
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
...
...
@@ -103,13 +104,13 @@ public class WeiboHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
WeiboHotSearch
>
weiboHotSearchByPhone
(){
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
String
url
=
""
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"Host"
,
"mapi.weibo.com"
);
headerMap
.
put
(
"User-Agent"
,
"Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)"
);
List
<
WeiboHotSearch
>
result
=
new
ArrayList
<
WeiboHotSearch
>();
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
String
htmlBody
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
...
...
@@ -133,7 +134,7 @@ public class WeiboHotSearchCrawler {
int
rankCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
String
id
=
"http://s.weibo.com/weibo/"
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&Refer=top"
;
WeiboHotSearch
hotSearch
=
new
WeiboHotSearch
(
id
,
name
,
hotCount
,
hot
,
rankCount
);
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
hot
,
rankCount
,
HotSearchType
.
微博热搜
.
name
()
);
logger
.
info
(
"采集到的数据:::{}"
,
hotSearch
);
result
.
add
(
hotSearch
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
41dee457
...
...
@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.crawler;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Map
;
...
...
@@ -13,7 +12,8 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
...
@@ -34,8 +34,8 @@ public class ZhihuHotSearchCrawler {
* @param 设定文件
* @return void 返回类型
*/
public
static
List
<
ZhihuHotSearch
>
getZhihuHotList
(){
List
<
ZhihuHotSearch
>
list
=
null
;
public
static
List
<
HotSearchList
>
getZhihuHotList
(){
List
<
HotSearchList
>
list
=
null
;
String
url
=
"https://www.zhihu.com/api/v4/search/top_search"
;
String
rerferer
=
"https://www.zhihu.com/search?type=content&q=%E5%BF%AB%E6%89%8B"
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
...
...
@@ -47,8 +47,7 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"Referer"
,
rerferer
);
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
.
contains
(
"words"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"words"
)){
list
=
new
ArrayList
<>();
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
words
=
topSearch
.
getJSONObject
(
"top_search"
).
getJSONArray
(
"words"
);
...
...
@@ -60,11 +59,10 @@ public class ZhihuHotSearchCrawler {
query
=
word
.
getString
(
"query"
);
displayQuery
=
word
.
getString
(
"display_query"
);
link
=
"https://www.zhihu.com/search?q="
+
URLCodeUtil
.
getURLEncode
(
query
,
"utf-8"
)+
"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content"
;
ZhihuHotSearch
zhihu
=
new
ZhihuHotSearch
(
link
,
query
,
displayQuery
,
new
Dat
e
());
HotSearchList
zhihu
=
new
HotSearchList
(
link
,
displayQuery
,
null
,
i
,
HotSearchType
.
知乎热搜
.
nam
e
());
list
.
add
(
zhihu
);
}
}
}
}
catch
(
IOException
e
)
{
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
.
fillInStackTrace
());
}
...
...
@@ -80,8 +78,8 @@ public class ZhihuHotSearchCrawler {
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public
static
List
<
ZhihuHotSearch
>
getMobileZhihuHotList
(){
List
<
ZhihuHotSearch
>
list
=
null
;
public
static
List
<
HotSearchList
>
getMobileZhihuHotList
(){
List
<
HotSearchList
>
list
=
null
;
String
url
=
"https://api.zhihu.com/topstory/hot-list?limit=40&reverse_order=0"
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
headerMap
.
put
(
"Host"
,
"api.zhihu.com"
);
...
...
@@ -93,26 +91,20 @@ public class ZhihuHotSearchCrawler {
for
(
int
j
=
0
;
j
<
3
;
j
++){
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
.
contains
(
"author"
)){
list
=
new
ArrayList
<
ZhihuHotSearch
>();
JSONObject
top_search
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
words
=
top_search
.
getJSONArray
(
"data"
);
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
)){
list
=
new
ArrayList
<>();
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
words
=
topSearch
.
getJSONArray
(
"data"
);
String
link
=
null
;
String
display_query
=
null
;
String
query
=
null
;
String
displayQuery
=
null
;
for
(
int
i
=
0
;
i
<
words
.
size
();
i
++)
{
JSONObject
word
=
words
.
getJSONObject
(
i
).
getJSONObject
(
"target"
);
query
=
word
.
getString
(
"title"
);
display_query
=
word
.
getString
(
"title"
);
displayQuery
=
word
.
getString
(
"title"
);
link
=
"https://www.zhihu.com/question/"
+
word
.
getLongValue
(
"id"
);
ZhihuHotSearch
zhihu
=
new
ZhihuHotSearch
(
link
,
query
,
display_query
,
new
Dat
e
());
HotSearchList
zhihu
=
new
HotSearchList
(
link
,
displayQuery
,
null
,
i
,
HotSearchType
.
知乎热搜
.
nam
e
());
list
.
add
(
zhihu
);
}
break
;
}
else
{
System
.
out
.
println
(
"---------------"
);
}
}
}
catch
(
IOException
e
)
{
logger
.
debug
(
"获取知乎热搜时出现问题:{}"
,
e
.
fillInStackTrace
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/BaiduHotSearchDAO.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.Calendar
;
import
java.util.List
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
BaiduHotSearchDAO
extends
MongoDBTemplate
{
public
BaiduHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
collWeiboName
;
if
(
Calendar
.
MONTH
<
6
){
collWeiboName
=
Config
.
collBaiduName
+
Calendar
.
YEAR
+
"_01"
;
}
else
{
collWeiboName
=
Config
.
collBaiduName
+
Calendar
.
YEAR
+
"_06"
;
}
super
.
setCollName
(
collWeiboName
);
}
/**
* @Title: addBaiduSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public
void
addBaiduSearch
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
continue
;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param baiduHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
BaiDuHotSearch
baiduHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"kw"
,
baiduHotSearch
.
getKw
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
result
=
baiduHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
break
;
}
cur
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
result
;
}
return
result
;
}
/**
* @Title: getWeiboHotOneHour
* @author hero
* @Description: 查询最近1小时内新增的微博热搜
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getWeiboHotOneHour(){
// List<DBObject> list = new ArrayList<DBObject>();
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
// query.put("changeCount", 0);
//
// try {
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// String name = doc.get("name").toString();
// if(CacheManager.getCacheByKey(name)==null){
// CacheManager.putCache(name, doc, 48*60*60*1000);
// list.add(doc);
// }
// }
// cur.close();
// } catch (Exception e) {
// return null;
// }
// return list;
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dao/DouyinHotSearchDAO.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.Calendar
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.mongodb.WriteConcern
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
DouyinHotSearchDAO
extends
MongoDBTemplate
{
public
DouyinHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
collWeiboName
;
if
(
Calendar
.
MONTH
<
6
){
collWeiboName
=
Config
.
collDouyinName
+
Calendar
.
YEAR
+
"_01"
;
}
else
{
collWeiboName
=
Config
.
collDouyinName
+
Calendar
.
YEAR
+
"_06"
;
}
super
.
setCollName
(
collWeiboName
);
}
@SuppressWarnings
(
"deprecation"
)
public
void
addDouyinHotSearch
(
DBObject
douyin
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
douyin
,
WriteConcern
.
SAFE
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
continue
;
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param douyinHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
DouyinHotSearch
douyinHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"word"
,
douyinHotSearch
.
getWord
());
DBObject
sort
=
new
BasicDBObject
();
sort
.
put
(
"time"
,
-
1
);
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
result
=
douyinHotSearch
.
getHot_value
()
-
Integer
.
valueOf
(
doc
.
get
(
"hot_value"
).
toString
());
break
;
}
cur
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
result
;
}
return
result
;
}
/**
* @Title: getDouyinHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
// public List<DBObject> getDouyinHotSearch(){
// List<DBObject> list = null;
// try {
// Date date = new Date((new Date().getTime()-60*60*1000));
// DBObject query = new BasicDBObject();
// query.put("time", new BasicDBObject("$gte", date));
//
// long count = this.getReadColl().count(query);
// if(count>0){
// list = new ArrayList<DBObject>();
// DBCursor cur = this.getReadColl().find(query);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// list.add(doc);
// }
// cur.close();
// }
// return list;
// } catch (Exception e) {
// e.printStackTrace();
// return list;
// }
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dao/
WeiboHotSearch
DAO.java
→
src/main/java/com/zhiwei/searchhotcrawler/dao/
HotSearchList
DAO.java
View file @
41dee457
...
...
@@ -6,58 +6,72 @@ import java.util.Calendar;
import
java.util.Date
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.
WeiboHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.
HotSearchList
;
import
com.zhiwei.searchhotcrawler.cache.CacheManager
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
WeiboHotSearchDAO
extends
MongoDBTemplate
{
public
class
HotSearchListDAO
extends
MongoDBTemplate
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiDuHotSearchCrawler
.
class
);
public
WeiboHotSearchDAO
()
{
@SuppressWarnings
(
"unused"
)
public
HotSearchListDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
String
collWeiboName
;
if
(
Calendar
.
MONTH
<
6
){
collWeiboName
=
Config
.
collWeiboName
+
Calendar
.
YEAR
+
"_01"
;
}
else
{
collWeiboName
=
Config
.
collWeiboName
+
Calendar
.
YEAR
+
"_06"
;
}
super
.
setCollName
(
collWeiboName
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(),
"yyyy-MM-dd"
);
String
year
=
time
.
substring
(
0
,
4
);
String
month
=
time
.
substring
(
5
,
7
);
String
collName
=
Config
.
collName
+
year
+
"_"
+
month
;
super
.
setCollName
(
collName
);
}
/**
* @Title: addWeiboHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
* 添加数据入库
* @param list
*/
public
void
add
WeiboHotSearch
(
List
<
DBObject
>
list
){
public
void
add
HotSearchList
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
continue
;
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
public
void
addHotSearch
(
DBObject
doc
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
save
(
doc
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
/**
* 查询据上次变化量
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param weiboHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
public
int
getChangeCount
(
WeiboHotSearch
weiboHotSearch
){
public
int
getChangeCount
(
HotSearchList
weiboHotSearch
){
int
result
=
0
;
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"name"
,
weiboHotSearch
.
getName
());
...
...
@@ -72,7 +86,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
cur
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
return
result
;
}
return
result
;
...
...
@@ -86,12 +100,13 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public
List
<
DBObject
>
get
WeiboHotOneHour
(
){
List
<
DBObject
>
list
=
new
ArrayList
<
DBObject
>();
public
List
<
DBObject
>
get
HotOneHour
(
String
type
){
List
<
DBObject
>
list
=
new
ArrayList
<>();
Date
date
=
new
Date
((
new
Date
().
getTime
()-
60
*
60
*
1000
));
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"time"
,
new
BasicDBObject
(
"$gte"
,
date
));
query
.
put
(
"changeCount"
,
0
);
query
.
put
(
"type"
,
type
);
try
{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
);
...
...
@@ -105,7 +120,7 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
cur
.
close
();
}
catch
(
Exception
e
)
{
return
null
;
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
)
;
}
return
list
;
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/SougoHotSearchDAO.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.List
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SougoHotSearchDAO
extends
MongoDBTemplate
{
public
SougoHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
super
.
setCollName
(
Config
.
collSougoName
);
}
/**
* @Title: addSougoHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param doc 设定文件
* @return void 返回类型
*/
public
void
addSougoSearch
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
/**
* @Title: getChangeCount
* @author hero
* @Description: TODO(查询据上次变化量)
* @param @param sougoHotSearch
* @param @return 设定文件
* @return int 返回类型
*/
// public int getChangeCount(SougoHotSearch sougoHotSearch){
// int result = 0;
// DBObject query = new BasicDBObject();
// query.put("kw", sougoHotSearch.getKw());
// DBObject sort = new BasicDBObject();
// sort.put("time", -1);
// try {
// DBCursor cur = this.getReadColl().find(query).sort(sort).limit(1);
// while(cur.hasNext()){
// DBObject doc = cur.next();
// result = sougoHotSearch.getCount() - Integer.valueOf(doc.get("count").toString());
// break;
// }
// cur.close();
// } catch (Exception e) {
// e.printStackTrace();
// return result;
// }
// return result;
// }
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WechatUserDao.java
View file @
41dee457
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.Collections
;
import
java.util.List
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
public
class
WechatUserDao
extends
MongoDBTemplate
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiDuHotSearchCrawler
.
class
);
public
WechatUserDao
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
...
...
@@ -31,8 +39,7 @@ public class WechatUserDao extends MongoDBTemplate{
this
.
getReadColl
().
save
(
doc
);
break
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
continue
;
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
...
...
@@ -54,9 +61,9 @@ public class WechatUserDao extends MongoDBTemplate{
return
(
List
<
String
>)
doc
.
get
(
"user"
);
}
}
catch
(
Exception
e
)
{
return
null
;
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
)
;
}
return
null
;
return
Collections
.
emptyList
()
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/ZhihuHotSearchDAO.java
deleted
100644 → 0
View file @
b528f200
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.mongodb.WriteConcern
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
ZhihuHotSearchDAO
extends
MongoDBTemplate
{
public
ZhihuHotSearchDAO
()
{
super
();
super
.
setDbName
(
Config
.
dbName
);
super
.
setCollName
(
Config
.
collZhihuName
);
}
@SuppressWarnings
(
"deprecation"
)
public
void
addZhiHuHotSearch
(
DBObject
zhihu
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
zhihu
,
WriteConcern
.
SAFE
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
continue
;
}
}
}
/**
* @Title: getZhiHuHotSearch
* @author hero
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @return 设定文件
* @return List<DBObject> 返回类型
*/
public
List
<
DBObject
>
getZhiHuHotSearch
(){
List
<
DBObject
>
list
=
null
;
try
{
Date
date
=
new
Date
((
new
Date
().
getTime
()-
60
*
60
*
1000
));
DBObject
query
=
new
BasicDBObject
();
query
.
put
(
"time"
,
new
BasicDBObject
(
"$gte"
,
date
));
long
count
=
this
.
getReadColl
().
count
(
query
);
if
(
count
>
0
){
list
=
new
ArrayList
<
DBObject
>();
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
);
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
list
.
add
(
doc
);
}
cur
.
close
();
}
return
list
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
list
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/dbtemplate/MongoDBTemplate.java
View file @
41dee457
...
...
@@ -29,13 +29,13 @@ public class MongoDBTemplate {
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
if
(
reader
==
null
)
{
//
reader = new MongoClient(address, Arrays.asList(credential));
reader
=
new
MongoClient
(
address
);
reader
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
//
reader = new MongoClient(address);
}
if
(
writer
==
null
)
{
//
writer = new MongoClient(address, Arrays.asList(credential));
writer
=
new
MongoClient
(
address
);
writer
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
//
writer = new MongoClient(address);
}
}
catch
(
MongoException
e
)
{
e
.
printStackTrace
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearchListTest.java
0 → 100644
View file @
41dee457
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.Map
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DB
;
import
com.mongodb.DBCollection
;
import
com.mongodb.DBCursor
;
import
com.mongodb.DBObject
;
import
com.mongodb.Mongo
;
import
com.mongodb.MongoClient
;
import
com.mongodb.MongoCredential
;
import
com.mongodb.ServerAddress
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.config.Config
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
HotSearchListTest
{
public
static
void
main
(
String
[]
args
)
{
MongoCredential
credential
=
MongoCredential
.
createCredential
(
Config
.
userName
,
Config
.
authDB
,
Config
.
userPwd
.
toCharArray
());
ServerAddress
address
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
Mongo
mongo
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credential
));
DB
db
=
mongo
.
getDB
(
"NetWork"
);
DBCollection
coll
=
db
.
getCollection
(
"weibo_hotsearch2018_10"
);
MongoCredential
credentialNew
=
MongoCredential
.
createCredential
(
"datapush"
,
"admin"
,
"4d8ce5c42073c"
.
toCharArray
());
ServerAddress
addressNew
=
new
ServerAddress
(
Config
.
mongoIp
,
Config
.
mongoPort
);
Mongo
mongoNew
=
new
MongoClient
(
address
,
Arrays
.
asList
(
credentialNew
));
DB
dbNew
=
mongoNew
.
getDB
(
"hot_search_list"
);
Map
<
String
,
String
>
timLine
=
TimeParse
.
getTimeMap
(
"2019-07-16 00:00:00"
,
"2019-07-16 23:59:59"
,
"HH"
,
1
);
timLine
.
forEach
((
start
,
end
)
->{
String
year
=
end
.
substring
(
0
,
4
);
String
month
=
end
.
substring
(
5
,
7
);
Date
startDate
=
TimeParse
.
stringFormartDate
(
start
);
Date
endDate
=
TimeParse
.
stringFormartDate
(
end
);
String
collName
=
"hot_search_list"
+
year
+
"_"
+
month
;
System
.
out
.
println
(
"collName=========="
+
collName
);
DBCollection
collNew
=
dbNew
.
getCollection
(
collName
);
DBObject
query
=
new
BasicDBObject
(
new
BasicDBObject
(
"time"
,
new
BasicDBObject
(
"$gte"
,
startDate
).
append
(
"$lte"
,
endDate
)));
DBCursor
cur
=
coll
.
find
(
query
);
System
.
out
.
println
(
query
+
"======="
+
cur
.
count
());
List
<
DBObject
>
dataList
=
new
ArrayList
<>();
int
i
=
0
;
while
(
cur
.
hasNext
())
{
DBObject
doc
=
cur
.
next
();
DBObject
zhihu
=
new
BasicDBObject
();
zhihu
.
put
(
"_id"
,
doc
.
get
(
"_id"
));
zhihu
.
put
(
"name"
,
doc
.
get
(
"name"
));
zhihu
.
put
(
"url"
,
doc
.
get
(
"url"
));
zhihu
.
put
(
"count"
,
doc
.
get
(
"count"
));
zhihu
.
put
(
"hot"
,
doc
.
get
(
"hot"
));
zhihu
.
put
(
"day"
,
doc
.
get
(
"day"
));
zhihu
.
put
(
"time"
,
doc
.
get
(
"time"
));
zhihu
.
put
(
"changeCount"
,
doc
.
get
(
"changeCount"
));
zhihu
.
put
(
"rank"
,
doc
.
get
(
"rank"
));
zhihu
.
put
(
"type"
,
HotSearchType
.
微博热搜
.
name
());
collNew
.
save
(
zhihu
);
dataList
.
add
(
zhihu
);
}
// System.out.println(collName +"数据量大小" +dataList.size());
// cur.close();
// if(!dataList.isEmpty()) {
// collNew.insert(dataList);
// }
});
mongo
.
close
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
41dee457
...
...
@@ -10,37 +10,39 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.BaiDuHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.
BaiduHotSearch
DAO
;
import
com.zhiwei.searchhotcrawler.dao.
HotSearchList
DAO
;
public
class
BaiduHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaiduHotSearchRun
.
class
);
private
BaiduHotSearchDAO
baiduHotSearchDAO
=
new
BaiduHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
@Override
public
void
run
()
{
logger
.
info
(
"百度风云榜采集开始........"
);
List
<
BaiDuHotSearch
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
List
<
HotSearchList
>
list
=
BaiDuHotSearchCrawler
.
baiduHotSearch
();
logger
.
info
(
"{}, 此轮百度风云榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
saveDataList
=
new
ArrayList
<>();
if
(
Objects
.
nonNull
(
list
)
&&
!
list
.
isEmpty
())
{
list
.
forEach
(
baiduHotSearch
->{
int
changeCount
=
baiduH
otSearchDAO
.
getChangeCount
(
baiduHotSearch
);
int
changeCount
=
h
otSearchDAO
.
getChangeCount
(
baiduHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
baiduHotSearch
.
getId
());
doc
.
put
(
"name"
,
baiduHotSearch
.
get
Kw
());
doc
.
put
(
"url"
,
baiduHotSearch
.
get
Everu
rl
());
doc
.
put
(
"name"
,
baiduHotSearch
.
get
Name
());
doc
.
put
(
"url"
,
baiduHotSearch
.
get
U
rl
());
doc
.
put
(
"count"
,
baiduHotSearch
.
getCount
());
doc
.
put
(
"day"
,
baiduHotSearch
.
getDay
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
百度热搜
.
name
());
saveDataList
.
add
(
doc
);
});
}
baiduHotSearchDAO
.
addBaiduSearch
(
saveDataList
);
hotSearchDAO
.
addHotSearchList
(
saveDataList
);
logger
.
info
(
"百度风云榜采集结束........"
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
41dee457
...
...
@@ -9,34 +9,36 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.DouyinHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.DouyinHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.
DouyinHotSearch
DAO
;
import
com.zhiwei.searchhotcrawler.dao.
HotSearchList
DAO
;
public
class
DouyinHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotSearchRun
.
class
);
private
DouyinHotSearchDAO
douyinHotSearchDAO
=
new
DouyinHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
@Override
public
void
run
()
{
logger
.
info
(
"抖音热搜榜采集开始........"
);
List
<
DouyinHotSearch
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
List
<
HotSearchList
>
list
=
DouyinHotSearchCrawler
.
getMobileDouyinHotList
();
logger
.
info
(
"{}, 抖音热搜榜此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
DouyinHotSearch
douyinHotSearch
:
list
){
int
changeCount
=
douyinH
otSearchDAO
.
getChangeCount
(
douyinHotSearch
);
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
douyinHotSearch
:
list
){
int
changeCount
=
h
otSearchDAO
.
getChangeCount
(
douyinHotSearch
);
DBObject
douyin
=
new
BasicDBObject
();
douyin
.
put
(
"_id"
,
douyinHotSearch
.
getId
());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getWord
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getPosition
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getHot_value
());
// douyin.put("url", douyinHotSearch.getUrl());
douyin
.
put
(
"name"
,
douyinHotSearch
.
getName
());
douyin
.
put
(
"rank"
,
douyinHotSearch
.
getRank
());
douyin
.
put
(
"count"
,
douyinHotSearch
.
getCount
());
douyin
.
put
(
"day"
,
douyinHotSearch
.
getDay
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"type"
,
HotSearchType
.
抖音热搜
.
name
());
data
.
add
(
douyin
);
douyinHotSearchDAO
.
addDouyin
HotSearch
(
douyin
);
hotSearchDAO
.
add
HotSearch
(
douyin
);
}
logger
.
info
(
"抖音热搜榜采集结束........"
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
41dee457
...
...
@@ -12,7 +12,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
...
...
@@ -20,10 +21,9 @@ import com.zhiwei.tools.timeparse.TimeParse;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SendWeiboHotSearchRun
extends
Thread
{
private
WeiboHotSearchDAO
weiboHotSearchDAO
=
new
WeiboHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SendWeiboHotSearchRun
.
class
);
@Override
public
void
run
()
{
while
(
true
)
{
...
...
@@ -32,8 +32,8 @@ public class SendWeiboHotSearchRun extends Thread {
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
logger
.
info
(
"微博推送,当前系统时间为:"
+
hour
);
if
(
hour
>
6
&&
hour
<
23
)
{
List
<
DBObject
>
list
=
weiboHotSearchDAO
.
getWeiboHotOneHour
(
);
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
微博热搜
.
name
()
);
if
(
list
!=
null
&&
!
list
.
isEmpty
()
)
{
for
(
DBObject
weibo
:
list
)
{
String
title
=
weibo
.
get
(
"name"
).
toString
();
String
time
=
TimeParse
.
dateFormartString
((
Date
)
weibo
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
41dee457
...
...
@@ -11,8 +11,9 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
import
com.zhiwei.searchhotcrawler.util.WechatConstant
;
...
...
@@ -20,7 +21,7 @@ import com.zhiwei.tools.timeparse.TimeParse;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SendZhihuHotSearchRun
extends
Thread
{
private
ZhihuHotSearchDAO
zhihuHotSearchDAO
=
new
ZhihuHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SendZhihuHotSearchRun
.
class
);
@Override
...
...
@@ -32,8 +33,8 @@ public class SendZhihuHotSearchRun extends Thread{
int
hour
=
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
);
logger
.
info
(
"知乎推送,当前系统时间为:"
+
hour
);
if
(
hour
>
6
&&
hour
<
23
){
List
<
DBObject
>
list
=
zhihuHotSearchDAO
.
getZhiHuHotSearch
(
);
if
(
list
!=
null
&&
list
.
size
()>
0
){
List
<
DBObject
>
list
=
hotSearchDAO
.
getHotOneHour
(
HotSearchType
.
知乎热搜
.
name
()
);
if
(
list
!=
null
&&
!
list
.
isEmpty
()
){
for
(
DBObject
zhihu
:
list
){
String
title
=
zhihu
.
get
(
"display_query"
).
toString
();
String
time
=
TimeParse
.
dateFormartString
((
Date
)
zhihu
.
get
(
"time"
),
"yyyy-MM-dd HH:mm:ss"
);
...
...
@@ -51,7 +52,6 @@ public class SendZhihuHotSearchRun extends Thread{
}
catch
(
Exception
e
)
{
logger
.
debug
(
"知乎热搜推送出现问题,问题为:::{}"
,
e
.
fillInStackTrace
());
ZhiWeiTools
.
sleep
(
1
*
60
*
60
*
1000
);
continue
;
}
}
}
...
...
@@ -66,7 +66,7 @@ public class SendZhihuHotSearchRun extends Thread{
*/
public
static
void
sendTemplateByUserIds
(
String
title
,
String
time
,
String
url
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
JSONObject
first
=
new
JSONObject
();
first
.
put
(
"value"
,
"您好,有一条来自知乎热搜榜的预警通知。"
);
dataMap
.
put
(
"first"
,
first
);
...
...
@@ -87,7 +87,7 @@ public class SendZhihuHotSearchRun extends Thread{
dataMap
.
put
(
"remark"
,
remark
);
List
<
String
>
userList
=
getUserList
();
if
(
userList
!=
null
&&
userList
.
size
()>
0
)
{
if
(
userList
!=
null
&&
!
userList
.
isEmpty
()
)
{
for
(
String
openId
:
userList
)
{
Template
template
=
new
Template
();
template
.
setTouser
(
openId
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
41dee457
...
...
@@ -9,32 +9,34 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.SougoHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.SougoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.
SougoHotSearch
DAO
;
import
com.zhiwei.searchhotcrawler.dao.
HotSearchList
DAO
;
public
class
SougoHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SougoHotSearchRun
.
class
);
private
SougoHotSearchDAO
sougoHotSearchDAO
=
new
SougoHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
@Override
public
void
run
()
{
logger
.
info
(
"搜狗微信采集开始........"
);
List
<
SougoHotSearch
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
List
<
HotSearchList
>
list
=
SougoHotSearchCrawler
.
sougoHotSearch
();
logger
.
info
(
"{}, 此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
SougoHotSearch
sougoHotSearch
:
list
){
for
(
HotSearchList
sougoHotSearch
:
list
){
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
sougoHotSearch
.
getId
());
doc
.
put
(
"name"
,
sougoHotSearch
.
get
Kw
());
doc
.
put
(
"url"
,
sougoHotSearch
.
get
Everu
rl
());
doc
.
put
(
"name"
,
sougoHotSearch
.
get
Name
());
doc
.
put
(
"url"
,
sougoHotSearch
.
get
U
rl
());
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
搜狗微信热搜
.
name
());
data
.
add
(
doc
);
}
sougoHotSearchDAO
.
addSougoSearch
(
data
);
hotSearchDAO
.
addHotSearchList
(
data
);
logger
.
info
(
"搜狗微信采集结束........"
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
41dee457
...
...
@@ -9,22 +9,23 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.WeiboHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.
WeiboHotSearch
DAO
;
import
com.zhiwei.searchhotcrawler.dao.
HotSearchList
DAO
;
public
class
WeiboHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeiboHotSearchRun
.
class
);
private
WeiboHotSearchDAO
weiboHotSearchDAO
=
new
WeiboHotSearch
DAO
();
private
HotSearchListDAO
weiboHotSearchDAO
=
new
HotSearchList
DAO
();
@Override
public
void
run
()
{
logger
.
info
(
"微博话题采集开始........"
);
List
<
WeiboHotSearch
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearch
();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearch
();
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
WeiboHotSearch
weiboHotSearch
:
list
){
for
(
HotSearchList
weiboHotSearch
:
list
){
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
DBObject
doc
=
new
BasicDBObject
();
doc
.
put
(
"_id"
,
weiboHotSearch
.
getId
());
...
...
@@ -36,9 +37,10 @@ public class WeiboHotSearchRun extends Thread{
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
微博热搜
.
name
());
data
.
add
(
doc
);
}
weiboHotSearchDAO
.
add
WeiboHotSearch
(
data
);
weiboHotSearchDAO
.
add
HotSearchList
(
data
);
logger
.
info
(
"微博话题采集结束........"
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
41dee457
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
...
...
@@ -9,31 +8,36 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.ZhihuHotSearch
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.ZhihuHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.
ZhihuHotSearch
DAO
;
import
com.zhiwei.searchhotcrawler.dao.
HotSearchList
DAO
;
public
class
ZhihuHotSearchRun
extends
Thread
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
ZhihuHotSearchRun
.
class
);
private
ZhihuHotSearchDAO
zhihuHotSearchDAO
=
new
ZhihuHotSearch
DAO
();
private
HotSearchListDAO
hotSearchDAO
=
new
HotSearchList
DAO
();
@Override
public
void
run
()
{
logger
.
info
(
"知乎话题采集开始........"
);
List
<
ZhihuHotSearch
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
ZhihuHotSearch
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
List
<
HotSearchList
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
HotSearchList
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
logger
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
ZhihuHotSearch
zhihuHotSearch
:
list
){
for
(
HotSearchList
zhihuHotSearch
:
list
){
DBObject
zhihu
=
new
BasicDBObject
();
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getUrl
());
zhihu
.
put
(
"query"
,
zhihuHotSearch
.
getQuery
());
zhihu
.
put
(
"display_query"
,
zhihuHotSearch
.
getDisplayQuery
());
zhihu
.
put
(
"_id"
,
zhihuHotSearch
.
getId
());
zhihu
.
put
(
"name"
,
zhihuHotSearch
.
getName
());
zhihu
.
put
(
"url"
,
zhihuHotSearch
.
getUrl
());
zhihu
.
put
(
"count"
,
zhihuHotSearch
.
getCount
());
zhihu
.
put
(
"hot"
,
zhihuHotSearch
.
isHot
());
zhihu
.
put
(
"day"
,
zhihuHotSearch
.
getDay
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
data
.
add
(
zhihu
);
zhihuHotSearchDAO
.
addZhiHuHotSearch
(
zhihu
);
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"type"
,
HotSearchType
.
知乎热搜
.
name
());
hotSearchDAO
.
addHotSearch
(
zhihu
);
}
logger
.
info
(
"知乎话题采集结束........"
);
}
...
...
src/main/resources/db.properties
View file @
41dee457
#mongoIp=202.107.192.94
mongoIp
=
192.168.0.81
mongoPort
=
27017
mongoIp
=
192.168.0.101
mongoPort
=
30000
#mongoIp=192.168.0.81
#mongoPort=27017
db.username
=
zzwno
db.paasword
=
zzwno1q2w3e4r
db.certifiedDB
=
oneDB
dbName
=
NetWork
collWeiboName
=
weibo_hotsearch
collZhihuName
=
zhihu_hotsearch
collWechatUserName
=
wechat_user
collBaiduName
=
baidu_hotsearch
collSougoName
=
sougo_hotsearch
collDouyinName
=
douyin_hotsearch
\ No newline at end of file
db.certifiedDB
=
admin
dbName
=
hot_search_list
collName
=
hot_search_list
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment