Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
a78bc0f3
Commit
a78bc0f3
authored
Oct 16, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修正热搜采集
parent
f87165c0
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
91 additions
and
30 deletions
+91
-30
dependency-reduced-pom.xml
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
+2
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboHotSearchDAO.java
+1
-3
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+7
-3
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+6
-8
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+5
-10
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
+63
-1
src/main/resources/db.properties
+4
-2
No files found.
dependency-reduced-pom.xml
View file @
a78bc0f3
...
...
@@ -4,7 +4,7 @@
<groupId>
com.zhiwei
</groupId>
<artifactId>
searchhotcrawler
</artifactId>
<name>
各平台热搜榜单采集程序
</name>
<version>
0.0.
3
-SNAPSHOT
</version>
<version>
0.0.
6
-SNAPSHOT
</version>
<description>
各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序
</description>
<developers>
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/Config.java
View file @
a78bc0f3
...
...
@@ -20,6 +20,7 @@ public class Config {
dbName
=
conf
.
getProperty
(
"dbName"
);
collWeiboName
=
conf
.
getProperty
(
"collWeiboName"
);
collZhihuName
=
conf
.
getProperty
(
"collZhihuName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
...
...
@@ -35,5 +36,6 @@ public class Config {
public
static
String
dbName
;
public
static
String
collWeiboName
;
public
static
String
collZhihuName
;
public
static
String
collWechatUserName
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
a78bc0f3
...
...
@@ -64,13 +64,13 @@ public class WeiboHotSearchCrawler {
try
{
String
id
=
"http://s.weibo.com"
+
element
.
select
(
"td.td-02"
).
select
(
"a"
).
attr
(
"href"
);
String
name
=
element
.
select
(
"td.td-02"
).
select
(
"a"
).
text
();
String
num
=
!
element
.
select
(
"td.td-0
3"
).
text
().
equals
(
""
)?
element
.
select
(
"td.td-03
"
).
text
():
"0"
;
String
num
=
!
element
.
select
(
"td.td-0
2"
).
select
(
"span"
).
text
().
equals
(
""
)?
element
.
select
(
"td.td-02"
).
select
(
"span
"
).
text
():
"0"
;
int
hotCount
=
Integer
.
valueOf
(
num
);
WeiboHotSearch
hotSearch
=
new
WeiboHotSearch
(
id
,
name
,
hotCount
,
true
);
list
.
add
(
hotSearch
);
}
catch
(
Exception
e
)
{
SendMailWeibo
.
sendMail
(
"微博热搜采集出现问题"
,
"859548429@qq.com"
);
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
.
fillInStackTrace
()
);
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
);
continue
;
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiboHotSearchDAO.java
View file @
a78bc0f3
...
...
@@ -30,11 +30,10 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
* @param @param doc 设定文件
* @return void 返回类型
*/
@SuppressWarnings
(
"deprecation"
)
public
void
addWeiboHotSearch
(
List
<
DBObject
>
list
){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
this
.
getReadColl
().
insert
(
list
,
WriteConcern
.
SAFE
);
this
.
getReadColl
().
insert
(
list
);
ZhiWeiTools
.
sleep
(
200
);
break
;
}
catch
(
Exception
e
)
{
...
...
@@ -44,7 +43,6 @@ public class WeiboHotSearchDAO extends MongoDBTemplate{
}
}
/**
* @Title: getChangeCount
* @author hero
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
a78bc0f3
...
...
@@ -7,8 +7,10 @@ import java.util.concurrent.TimeUnit;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.UpdateWechatUserRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
public
class
HotSearchRun
{
...
...
@@ -25,9 +27,11 @@ public class HotSearchRun {
}
public
static
void
main
(
String
[]
args
)
{
// new HotSearchRun().showTimer();
// new CacheListener().startListen();
new
UpdateWechatUserRun
().
start
();
ZhiWeiTools
.
sleep
(
10000
);
new
HotSearchRun
().
showTimer
();
new
CacheListener
().
startListen
();
new
SendWeiboHotSearchRun
().
start
();
//
new SendZhihuHotSearchRun().start();
new
SendZhihuHotSearchRun
().
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
a78bc0f3
...
...
@@ -9,6 +9,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
...
...
@@ -18,6 +19,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public
class
SendWeiboHotSearchRun
extends
Thread
{
private
WeiboHotSearchDAO
weiboHotSearchDAO
=
new
WeiboHotSearchDAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SendWeiboHotSearchRun
.
class
);
@Override
...
...
@@ -111,14 +113,10 @@ public class SendWeiboHotSearchRun extends Thread {
* @return List<String> 返回类型
*/
public
static
List
<
String
>
getUserList
()
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
List
<
String
>
userList
=
WechatCodeUtil
.
getUserList
(
"weibohot"
);
if
(
userList
!=
null
)
{
return
userList
;
}
else
{
continue
;
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"weibohot"
);
if
(
userList
==
null
){
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"weibohot"
);
}
}
return
null
;
return
userList
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
a78bc0f3
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Calendar
;
import
java.util.Date
;
import
java.util.HashMap
;
...
...
@@ -12,6 +11,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dao.WechatUserDao
;
import
com.zhiwei.searchhotcrawler.dao.ZhihuHotSearchDAO
;
import
com.zhiwei.searchhotcrawler.util.Template
;
import
com.zhiwei.searchhotcrawler.util.WechatCodeUtil
;
...
...
@@ -21,6 +21,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public
class
SendZhihuHotSearchRun
extends
Thread
{
private
ZhihuHotSearchDAO
zhihuHotSearchDAO
=
new
ZhihuHotSearchDAO
();
private
static
WechatUserDao
wechatUserDao
=
new
WechatUserDao
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SendZhihuHotSearchRun
.
class
);
@Override
public
void
run
()
{
...
...
@@ -115,15 +116,9 @@ public class SendZhihuHotSearchRun extends Thread{
*/
private
static
List
<
String
>
getUserList
()
{
List
<
String
>
userList
=
new
ArrayList
<
String
>();
for
(
int
i
=
0
;
i
<
3
;
i
++){
List
<
String
>
lpUserList
=
WechatCodeUtil
.
getUserList
(
"LP组"
);
if
(
lpUserList
!=
null
){
userList
.
addAll
(
lpUserList
);
break
;
}
else
{
continue
;
}
List
<
String
>
userList
=
wechatUserDao
.
getWechatUserByGroup
(
"LP组"
);
if
(
userList
==
null
){
userList
=
WechatCodeUtil
.
getUserListByGroupName
(
"LP组"
);
}
return
userList
;
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
View file @
a78bc0f3
package
com
.
zhiwei
.
searchhotcrawler
.
util
;
import
java.io.IOException
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
...
...
@@ -87,7 +88,7 @@ public class WechatCodeUtil {
* @return List<String> 返回类型
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
String
>
getUserList
(
String
groupName
)
{
public
static
List
<
String
>
getUserList
ByGroupName
(
String
groupName
)
{
try
{
String
token
=
getToken
();
if
(
token
!=
null
){
...
...
@@ -99,6 +100,37 @@ public class WechatCodeUtil {
if
(
null
!=
jsonObject
)
{
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
}
else
{
logger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
else
{
logger
.
info
(
"token 获取失败"
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
return
null
;
}
public
static
List
<
String
>
getUserListByGroupId
(
Integer
groupId
)
{
try
{
String
token
=
getToken
();
if
(
token
!=
null
){
String
url
=
"https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="
+
token
;
JSONObject
postData
=
new
JSONObject
();
postData
.
put
(
"tagid"
,
groupId
);
postData
.
put
(
"next_openid"
,
""
);
JSONObject
jsonObject
=
HttpRequest
.
httpRequest
(
url
,
"GET"
,
postData
.
toString
());
if
(
null
!=
jsonObject
)
{
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
}
else
{
logger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
else
{
...
...
@@ -150,4 +182,34 @@ public class WechatCodeUtil {
return
groupId
;
}
/**
* 查询公众号下的所有分组
* @return
*/
public
static
Map
<
String
,
Integer
>
getAllGroupIp
()
{
String
url
=
"https://api.weixin.qq.com/cgi-bin/tags/get?access_token="
+
getToken
();
Map
<
String
,
Integer
>
resultMap
=
new
HashMap
<
String
,
Integer
>();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
try
{
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
if
(
htmlBody
!=
null
)
{
if
(
htmlBody
.
contains
(
"tags"
))
{
JSONArray
jsonArry
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"tags"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
Integer
id
=
data
.
getInteger
(
"id"
);
String
name
=
data
.
getString
(
"name"
);
resultMap
.
put
(
name
,
id
);
}
}
else
{
logger
.
info
(
"获取分组id时出现错误,数据为:::{}"
,
htmlBody
);
}
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
return
null
;
}
return
resultMap
;
}
}
src/main/resources/db.properties
View file @
a78bc0f3
#
mongoIp=202.107.192.94
mongoIp
=
192.168.0.101
mongoIp
=
202.107.192.94
#
mongoIp=192.168.0.101
mongoPort
=
30000
db.username
=
zzwno
db.paasword
=
zzwno1q2w3e4r
...
...
@@ -7,3 +7,4 @@ db.certifiedDB=admin
dbName
=
NetWork
collWeiboName
=
weibo_hotsearch2018_10
collZhihuName
=
zhihu_hotsearch2018_10
collWechatUserName
=
wechat_user
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment