Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
d4cb16b0
Commit
d4cb16b0
authored
Jul 17, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加索引
parent
41dee457
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
80 additions
and
31 deletions
+80
-31
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+14
-14
src/main/java/com/zhiwei/searchhotcrawler/config/ProxyConfig.java
+26
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+4
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+6
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+2
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+3
-2
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
+2
-0
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+1
-1
src/main/resources/db.properties
+4
-2
src/main/resources/proxyip.properties
+6
-0
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
View file @
d4cb16b0
...
@@ -22,17 +22,17 @@ public class HotSearchList implements Serializable{
...
@@ -22,17 +22,17 @@ public class HotSearchList implements Serializable{
private
String
name
;
//热搜关键词
private
String
name
;
//热搜关键词
private
int
count
;
//时时热搜量
private
Integer
count
;
//时时热搜量
private
b
oolean
hot
;
//状态(true 为热搜; false为时时上升)
private
B
oolean
hot
;
//状态(true 为热搜; false为时时上升)
private
String
day
;
//天
private
String
day
;
//天
private
Date
time
;
//时间
private
Date
time
;
//时间
private
int
changeCount
;
//据上分钟变化量
private
Integer
changeCount
;
//据上分钟变化量
private
int
rank
;
//排名
private
Integer
rank
;
//排名
private
String
type
;
//分类
private
String
type
;
//分类
...
@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
...
@@ -40,7 +40,7 @@ public class HotSearchList implements Serializable{
public
HotSearchList
(){}
public
HotSearchList
(){}
public
HotSearchList
(
String
url
,
String
name
,
int
count
,
boolean
hot
,
int
rank
,
String
type
){
public
HotSearchList
(
String
url
,
String
name
,
Integer
count
,
Boolean
hot
,
Integer
rank
,
String
type
){
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
url
=
url
;
this
.
url
=
url
;
this
.
name
=
name
;
this
.
name
=
name
;
...
@@ -53,7 +53,7 @@ public class HotSearchList implements Serializable{
...
@@ -53,7 +53,7 @@ public class HotSearchList implements Serializable{
}
}
public
HotSearchList
(
String
url
,
String
name
,
Integer
count
,
int
rank
,
String
type
){
public
HotSearchList
(
String
url
,
String
name
,
Integer
count
,
Integer
rank
,
String
type
){
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
id
=
name
+
"_"
+
new
Date
().
getTime
();
this
.
url
=
url
;
this
.
url
=
url
;
this
.
name
=
name
;
this
.
name
=
name
;
...
@@ -107,11 +107,11 @@ public class HotSearchList implements Serializable{
...
@@ -107,11 +107,11 @@ public class HotSearchList implements Serializable{
this
.
name
=
name
;
this
.
name
=
name
;
}
}
public
int
getCount
()
{
public
Integer
getCount
()
{
return
count
;
return
count
;
}
}
public
void
setCount
(
int
count
)
{
public
void
setCount
(
Integer
count
)
{
this
.
count
=
count
;
this
.
count
=
count
;
}
}
...
@@ -123,11 +123,11 @@ public class HotSearchList implements Serializable{
...
@@ -123,11 +123,11 @@ public class HotSearchList implements Serializable{
this
.
time
=
time
;
this
.
time
=
time
;
}
}
public
int
getChangeCount
()
{
public
Integer
getChangeCount
()
{
return
changeCount
;
return
changeCount
;
}
}
public
void
setChangeCount
(
int
changeCount
)
{
public
void
setChangeCount
(
Integer
changeCount
)
{
this
.
changeCount
=
changeCount
;
this
.
changeCount
=
changeCount
;
}
}
...
@@ -135,11 +135,11 @@ public class HotSearchList implements Serializable{
...
@@ -135,11 +135,11 @@ public class HotSearchList implements Serializable{
return
serialVersionUID
;
return
serialVersionUID
;
}
}
public
b
oolean
isHot
()
{
public
B
oolean
isHot
()
{
return
hot
;
return
hot
;
}
}
public
void
setHot
(
b
oolean
hot
)
{
public
void
setHot
(
B
oolean
hot
)
{
this
.
hot
=
hot
;
this
.
hot
=
hot
;
}
}
...
@@ -151,11 +151,11 @@ public class HotSearchList implements Serializable{
...
@@ -151,11 +151,11 @@ public class HotSearchList implements Serializable{
this
.
day
=
day
;
this
.
day
=
day
;
}
}
public
int
getRank
()
{
public
Integer
getRank
()
{
return
rank
;
return
rank
;
}
}
public
void
setRank
(
int
rank
)
{
public
void
setRank
(
Integer
rank
)
{
this
.
rank
=
rank
;
this
.
rank
=
rank
;
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/ProxyConfig.java
0 → 100644
View file @
d4cb16b0
package
com
.
zhiwei
.
searchhotcrawler
.
config
;
import
java.io.InputStream
;
import
java.util.Properties
;
public
class
ProxyConfig
{
static
{
Properties
conf
=
null
;
try
{
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"proxyip.properties"
);
conf
=
new
Properties
();
conf
.
load
(
is
);
is
.
close
();
registry
=
conf
.
getProperty
(
"registry"
);
group
=
conf
.
getProperty
(
"group"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
public
static
String
registry
;
public
static
String
group
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
d4cb16b0
...
@@ -13,6 +13,7 @@ import org.slf4j.Logger;
...
@@ -13,6 +13,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
@@ -38,7 +39,7 @@ public class BaiDuHotSearchCrawler {
...
@@ -38,7 +39,7 @@ public class BaiDuHotSearchCrawler {
public
static
List
<
HotSearchList
>
baiduHotSearch
()
{
public
static
List
<
HotSearchList
>
baiduHotSearch
()
{
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
String
url
=
"http://top.baidu.com/buzz?b=1&fr=topindex"
;
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)
,
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"mainBody"
))
{
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"mainBody"
))
{
return
ansysData
(
htmlBody
);
return
ansysData
(
htmlBody
);
}
else
{
}
else
{
...
@@ -79,7 +80,7 @@ public class BaiDuHotSearchCrawler {
...
@@ -79,7 +80,7 @@ public class BaiDuHotSearchCrawler {
}
}
// 获取关键词(String)
// 获取关键词(String)
String
kw
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
text
();
String
kw
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
text
();
logger
.
info
(
"关键词:{}"
,
kw
);
//
logger.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
// 获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
attr
(
"href"
);
String
everurl
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
attr
(
"href"
);
// 获取搜索指数count(int)
// 获取搜索指数count(int)
...
@@ -95,8 +96,8 @@ public class BaiDuHotSearchCrawler {
...
@@ -95,8 +96,8 @@ public class BaiDuHotSearchCrawler {
if
(
StringUtils
.
isNotBlank
(
hot
))
{
if
(
StringUtils
.
isNotBlank
(
hot
))
{
count
=
Integer
.
valueOf
(
hot
);
count
=
Integer
.
valueOf
(
hot
);
}
}
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
count
,
rank
,
HotSearchType
.
百度热搜
.
name
());
if
(
Objects
.
nonNull
(
rank
))
{
if
(
Objects
.
nonNull
(
rank
))
{
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
count
,
rank
,
HotSearchType
.
百度热搜
.
name
());
list
.
add
(
hotSearch
);
list
.
add
(
hotSearch
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
d4cb16b0
...
@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
...
@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
@@ -39,7 +40,7 @@ public class DouyinHotSearchCrawler {
...
@@ -39,7 +40,7 @@ public class DouyinHotSearchCrawler {
List
<
HotSearchList
>
list
=
null
;
List
<
HotSearchList
>
list
=
null
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
String
url
=
"https://api.amemv.com/aweme/v1/hot/search/list/"
;
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)
,
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"word_list"
)){
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"word_list"
)){
list
=
new
ArrayList
<>();
list
=
new
ArrayList
<>();
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
d4cb16b0
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.crawler;
...
@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.crawler;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Objects
;
import
java.util.Objects
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
...
@@ -14,9 +15,11 @@ import org.slf4j.Logger;
...
@@ -14,9 +15,11 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
/**
/**
* @ClassName:SougoHotSearch
* @ClassName:SougoHotSearch
...
@@ -43,7 +46,8 @@ public class SougoHotSearchCrawler {
...
@@ -43,7 +46,8 @@ public class SougoHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
{
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
Map
<
String
,
String
>
headMap
=
HeaderTool
.
getCommonHead
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headMap
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"topwords"
))
{
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"topwords"
))
{
try
{
try
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
...
@@ -62,9 +66,8 @@ public class SougoHotSearchCrawler {
...
@@ -62,9 +66,8 @@ public class SougoHotSearchCrawler {
// 获取关键词(String)
// 获取关键词(String)
String
kw
=
element
.
select
(
"li"
).
select
(
"a"
).
text
();
String
kw
=
element
.
select
(
"li"
).
select
(
"a"
).
text
();
logger
.
info
(
"关键词:{}"
,
kw
);
//
logger.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"href"
);
String
everurl
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"href"
);
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
null
,
rank
,
HotSearchType
.
搜狗微信热搜
.
name
());
HotSearchList
hotSearch
=
new
HotSearchList
(
everurl
,
kw
,
null
,
rank
,
HotSearchType
.
搜狗微信热搜
.
name
());
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
d4cb16b0
...
@@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory;
...
@@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
@@ -46,7 +47,7 @@ public class WeiboHotSearchCrawler {
...
@@ -46,7 +47,7 @@ public class WeiboHotSearchCrawler {
for
(
int
i
=
0
;
i
<
3
;
i
++){
for
(
int
i
=
0
;
i
<
3
;
i
++){
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
{
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)
,
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"pl_top_realtimehot"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"pl_top_realtimehot"
)){
try
{
try
{
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
d4cb16b0
...
@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
...
@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
...
@@ -46,7 +47,7 @@ public class ZhihuHotSearchCrawler {
...
@@ -46,7 +47,7 @@ public class ZhihuHotSearchCrawler {
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
headerMap
.
put
(
"authorization"
,
"oauth c3cef7c66a1843f8b3a9e6a1e3160e20"
);
headerMap
.
put
(
"Referer"
,
rerferer
);
headerMap
.
put
(
"Referer"
,
rerferer
);
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)
,
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"words"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"words"
)){
list
=
new
ArrayList
<>();
list
=
new
ArrayList
<>();
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
...
@@ -90,7 +91,7 @@ public class ZhihuHotSearchCrawler {
...
@@ -90,7 +91,7 @@ public class ZhihuHotSearchCrawler {
for
(
int
j
=
0
;
j
<
3
;
j
++){
for
(
int
j
=
0
;
j
<
3
;
j
++){
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)
,
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"author"
)){
list
=
new
ArrayList
<>();
list
=
new
ArrayList
<>();
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
topSearch
=
JSONObject
.
parseObject
(
htmlBody
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchListDAO.java
View file @
d4cb16b0
...
@@ -81,9 +81,11 @@ public class HotSearchListDAO extends MongoDBTemplate{
...
@@ -81,9 +81,11 @@ public class HotSearchListDAO extends MongoDBTemplate{
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
DBCursor
cur
=
this
.
getReadColl
().
find
(
query
).
sort
(
sort
).
limit
(
1
);
while
(
cur
.
hasNext
()){
while
(
cur
.
hasNext
()){
DBObject
doc
=
cur
.
next
();
DBObject
doc
=
cur
.
next
();
if
(
doc
.
get
(
"count"
)!=
null
)
{
result
=
weiboHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
result
=
weiboHotSearch
.
getCount
()
-
Integer
.
valueOf
(
doc
.
get
(
"count"
).
toString
());
break
;
break
;
}
}
}
cur
.
close
();
cur
.
close
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
logger
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
d4cb16b0
...
@@ -4,7 +4,10 @@ import java.util.concurrent.Executors;
...
@@ -4,7 +4,10 @@ import java.util.concurrent.Executors;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.DouyinHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
...
@@ -33,6 +36,9 @@ public class HotSearchRun {
...
@@ -33,6 +36,9 @@ public class HotSearchRun {
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
ProxyConfig
.
registry
,
ProxyConfig
.
group
,
GroupType
.
PROVIDER
);
new
UpdateWechatUserRun
().
start
();
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
ZhiWeiTools
.
sleep
(
10000
);
new
HotSearchRun
().
showTimer
();
new
HotSearchRun
().
showTimer
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
d4cb16b0
...
@@ -38,7 +38,7 @@ public class BaiduHotSearchRun extends Thread{
...
@@ -38,7 +38,7 @@ public class BaiduHotSearchRun extends Thread{
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"time"
,
baiduHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
baiduHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
百度热搜
.
nam
e
());
doc
.
put
(
"type"
,
baiduHotSearch
.
getTyp
e
());
saveDataList
.
add
(
doc
);
saveDataList
.
add
(
doc
);
});
});
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
d4cb16b0
...
@@ -36,7 +36,7 @@ public class DouyinHotSearchRun extends Thread{
...
@@ -36,7 +36,7 @@ public class DouyinHotSearchRun extends Thread{
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"time"
,
douyinHotSearch
.
getTime
());
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"changeCount"
,
changeCount
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"url"
,
null
);
douyin
.
put
(
"type"
,
HotSearchType
.
抖音热搜
.
nam
e
());
douyin
.
put
(
"type"
,
douyinHotSearch
.
getTyp
e
());
data
.
add
(
douyin
);
data
.
add
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
hotSearchDAO
.
addHotSearch
(
douyin
);
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
d4cb16b0
...
@@ -33,7 +33,7 @@ public class SougoHotSearchRun extends Thread {
...
@@ -33,7 +33,7 @@ public class SougoHotSearchRun extends Thread {
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"day"
,
sougoHotSearch
.
getDay
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
doc
.
put
(
"time"
,
sougoHotSearch
.
getTime
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
sougoHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
搜狗微信热搜
.
nam
e
());
doc
.
put
(
"type"
,
sougoHotSearch
.
getTyp
e
());
data
.
add
(
doc
);
data
.
add
(
doc
);
}
}
hotSearchDAO
.
addHotSearchList
(
data
);
hotSearchDAO
.
addHotSearchList
(
data
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
d4cb16b0
...
@@ -37,7 +37,7 @@ public class WeiboHotSearchRun extends Thread{
...
@@ -37,7 +37,7 @@ public class WeiboHotSearchRun extends Thread{
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"changeCount"
,
changeCount
);
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"rank"
,
weiboHotSearch
.
getRank
());
doc
.
put
(
"type"
,
HotSearchType
.
微博热搜
.
nam
e
());
doc
.
put
(
"type"
,
weiboHotSearch
.
getTyp
e
());
data
.
add
(
doc
);
data
.
add
(
doc
);
}
}
weiboHotSearchDAO
.
addHotSearchList
(
data
);
weiboHotSearchDAO
.
addHotSearchList
(
data
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
d4cb16b0
...
@@ -36,7 +36,7 @@ public class ZhihuHotSearchRun extends Thread{
...
@@ -36,7 +36,7 @@ public class ZhihuHotSearchRun extends Thread{
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"changeCount"
,
0
);
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"rank"
,
zhihuHotSearch
.
getRank
());
zhihu
.
put
(
"type"
,
HotSearchType
.
知乎热搜
.
nam
e
());
zhihu
.
put
(
"type"
,
zhihuHotSearch
.
getTyp
e
());
hotSearchDAO
.
addHotSearch
(
zhihu
);
hotSearchDAO
.
addHotSearch
(
zhihu
);
}
}
logger
.
info
(
"知乎话题采集结束........"
);
logger
.
info
(
"知乎话题采集结束........"
);
...
...
src/main/resources/db.properties
View file @
d4cb16b0
...
@@ -3,8 +3,9 @@ mongoIp=192.168.0.101
...
@@ -3,8 +3,9 @@ mongoIp=192.168.0.101
mongoPort
=
30000
mongoPort
=
30000
#mongoIp=192.168.0.81
#mongoIp=192.168.0.81
#mongoPort=27017
#mongoPort=27017
db.username
=
zzwno
db.username
=
datapush
db.paasword
=
zzwno1q2w3e4r
db.paasword
=
4d8ce5c42073c
db.certifiedDB
=
admin
db.certifiedDB
=
admin
dbName
=
hot_search_list
dbName
=
hot_search_list
collName
=
hot_search_list
collName
=
hot_search_list
collWechatUserName
=
wechat_user
\ No newline at end of file
src/main/resources/proxyip.properties
0 → 100644
View file @
d4cb16b0
registry
=
zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181
group
=
hangzhou
########################################################
#registry=zookeeper://192.168.0.36:2181
#
group
=
local
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment