Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
2918236d
Commit
2918236d
authored
Jul 17, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
知乎采集添加客户端
推送改为指定分组推送
parent
c6199334
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
126 additions
and
82 deletions
+126
-82
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+0
-3
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
+5
-6
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
+4
-11
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
+7
-24
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+3
-1
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
+106
-36
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
2918236d
...
...
@@ -95,9 +95,7 @@ public class ZhihuHotSearchCrawler {
for
(
int
j
=
0
;
j
<
3
;
j
++){
try
{
System
.
out
.
println
(
url
);
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
System
.
out
.
println
(
htmlBody
);
if
(
htmlBody
!=
null
){
if
(
htmlBody
.
contains
(
"author"
)){
list
=
new
ArrayList
<
ZhihuHotSearch
>();
...
...
@@ -112,7 +110,6 @@ public class ZhihuHotSearchCrawler {
display_query
=
word
.
getString
(
"title"
);
link
=
"https://www.zhihu.com/question/"
+
word
.
getLongValue
(
"id"
);
ZhihuHotSearch
zhihu
=
new
ZhihuHotSearch
(
link
,
query
,
display_query
,
new
Date
());
System
.
out
.
println
(
zhihu
);
list
.
add
(
zhihu
);
}
break
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/run/HotSearchRun.java
View file @
2918236d
...
...
@@ -5,7 +5,6 @@ import java.util.concurrent.ScheduledExecutorService;
import
java.util.concurrent.TimeUnit
;
import
com.zhiwei.searchhotcrawler.cache.CacheListener
;
import
com.zhiwei.searchhotcrawler.timer.SendWeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.SendZhihuHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun
;
import
com.zhiwei.searchhotcrawler.timer.ZhihuHotSearchRun
;
...
...
@@ -16,14 +15,14 @@ public class HotSearchRun {
private
ScheduledExecutorService
scheduExec
;
public
HotSearchRun
()
{
this
.
scheduExec
=
Executors
.
newScheduledThreadPool
(
1
);
this
.
scheduExec
=
Executors
.
newScheduledThreadPool
(
3
);
}
public
void
showTimer
()
{
scheduExec
.
scheduleAtFixedRate
(
new
WeiboHotSearchRun
(),
1000
,
60
*
1000
,
TimeUnit
.
MILLISECOND
S
);
scheduExec
.
scheduleAtFixedRate
(
new
ZhihuHotSearchRun
(),
1000
,
60
*
1000
,
TimeUnit
.
MILLISECOND
S
);
scheduExec
.
scheduleAtFixedRate
(
new
SendZhihuHotSearchRun
(),
1000
,
60
*
60
*
1000
,
TimeUnit
.
MILLISECOND
S
);
scheduExec
.
scheduleAtFixedRate
(
new
SendWeiboHotSearchRun
(),
1000
,
60
*
60
*
1000
,
TimeUnit
.
MILLISECOND
S
);
scheduExec
.
scheduleAtFixedRate
(
new
WeiboHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTE
S
);
scheduExec
.
scheduleAtFixedRate
(
new
ZhihuHotSearchRun
(),
0
,
1
,
TimeUnit
.
MINUTE
S
);
scheduExec
.
scheduleAtFixedRate
(
new
SendZhihuHotSearchRun
(),
0
,
60
,
TimeUnit
.
MINUTE
S
);
// scheduExec.scheduleAtFixedRate(new SendWeiboHotSearchRun(), 0, 60 , TimeUnit.MINUTE
S);
}
public
static
void
main
(
String
[]
args
)
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SendWeiboHotSearchRun.java
View file @
2918236d
package
com
.
zhiwei
.
searchhotcrawler
.
timer
;
import
java.util.ArrayList
;
import
java.util.Calendar
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.dao.WeiboHotSearchDAO
;
...
...
@@ -87,8 +84,6 @@ public class SendWeiboHotSearchRun extends Thread{
}
}
/**
* @Title: getUserList
* @author hero
...
...
@@ -97,11 +92,9 @@ public class SendWeiboHotSearchRun extends Thread{
* @param @return 设定文件
* @return List<String> 返回类型
*/
public
static
List
<
String
>
getUserList
(){
List
<
String
>
userList
=
new
ArrayList
<
String
>();
userList
.
add
(
"o_J5m0ZXyC5MBb2hiwFzyUYpo1Fw"
);
return
userList
;
public
static
List
<
String
>
getUserList
()
{
List
<
String
>
userList
=
WechatCodeUtil
.
getUserList
(
"weibohot"
);
return
userList
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/SendZhihuHotSearchRun.java
View file @
2918236d
...
...
@@ -88,8 +88,6 @@ public class SendZhihuHotSearchRun extends Thread{
}
}
/**
* @Title: getUserList
* @author hero
...
...
@@ -98,27 +96,12 @@ public class SendZhihuHotSearchRun extends Thread{
* @param @return 设定文件
* @return List<String> 返回类型
*/
public
static
List
<
String
>
getUserList
(){
List
<
String
>
userList
=
new
ArrayList
<
String
>();
userList
.
add
(
"o_J5m0ZXyC5MBb2hiwFzyUYpo1Fw"
);
//mine
//JD组
userList
.
add
(
"o_J5m0Ypn3DtKCum3tViKqDNFSN8"
);
//孙熠
userList
.
add
(
"o_J5m0YgHuKvrbhxut4oukyi6nAE"
);
//demon波仔
userList
.
add
(
"o_J5m0e5KQVI3D4InxWgJizW8LHY"
);
//R
userList
.
add
(
"o_J5m0QslOcghBKzodrixE2yXbnk"
);
//尘埃眠于光年
//LP组
userList
.
add
(
"o_J5m0USUBdmXsq-z_sgMwVpYvE4"
);
//胡芸莹
userList
.
add
(
"o_J5m0a7SRz9C4wnzuZZqlY84s4A"
);
//汪宏帅
userList
.
add
(
"o_J5m0ejlbqnO3nUNQPec_gooB_w"
);
//兢兢จุ๊บ
userList
.
add
(
"o_J5m0Y8-cAjslRAz7HQXMg1vn3I"
);
//荠萸
userList
.
add
(
"o_J5m0SwoXFU9itzq3s6WTDYD-rE"
);
//mogu
userList
.
add
(
"o_J5m0UDO0qIVvo7EN7AM5oldycM"
);
//Judy
userList
.
add
(
"o_J5m0XDzVVM4Lb_CT8utIazdVRo"
);
//Booming
userList
.
add
(
"o_J5m0cy46a49Ta1h5z-jTGLfxg4"
);
//拾壹.
userList
.
add
(
"o_J5m0XomZ0MtCHNPfaiVY-CJCpY"
);
//以陌
return
userList
;
}
private
static
List
<
String
>
getUserList
()
{
List
<
String
>
userList
=
new
ArrayList
<
String
>();
List
<
String
>
lpUserList
=
WechatCodeUtil
.
getUserList
(
"LP组"
);
userList
.
addAll
(
lpUserList
);
return
userList
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
2918236d
...
...
@@ -22,6 +22,7 @@ public class WeiboHotSearchRun extends Thread{
public
void
run
()
{
logger
.
info
(
"微博话题采集开始........"
);
List
<
WeiboHotSearch
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearch
();
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
WeiboHotSearch
weiboHotSearch
:
list
){
int
changeCount
=
weiboHotSearchDAO
.
getChangeCount
(
weiboHotSearch
);
...
...
@@ -34,7 +35,6 @@ public class WeiboHotSearchRun extends Thread{
doc
.
put
(
"day"
,
weiboHotSearch
.
getDay
());
doc
.
put
(
"time"
,
weiboHotSearch
.
getTime
());
doc
.
put
(
"changeCount"
,
changeCount
);
logger
.
info
(
"{}, 话题名字:{}"
,
new
Date
(),
weiboHotSearch
.
getName
());
data
.
add
(
doc
);
}
weiboHotSearchDAO
.
addWeiboHotSearch
(
data
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
2918236d
...
...
@@ -22,6 +22,9 @@ public class ZhihuHotSearchRun extends Thread{
public
void
run
()
{
logger
.
info
(
"知乎话题采集开始........"
);
List
<
ZhihuHotSearch
>
list
=
ZhihuHotSearchCrawler
.
getZhihuHotList
();
List
<
ZhihuHotSearch
>
mobilelist
=
ZhihuHotSearchCrawler
.
getMobileZhihuHotList
();
list
.
addAll
(
mobilelist
);
logger
.
info
(
"{}, 知乎此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<
DBObject
>();
for
(
ZhihuHotSearch
zhihuHotSearch
:
list
){
DBObject
zhihu
=
new
BasicDBObject
();
...
...
@@ -29,7 +32,6 @@ public class ZhihuHotSearchRun extends Thread{
zhihu
.
put
(
"query"
,
zhihuHotSearch
.
getQuery
());
zhihu
.
put
(
"display_query"
,
zhihuHotSearch
.
getDisplayQuery
());
zhihu
.
put
(
"time"
,
zhihuHotSearch
.
getTime
());
logger
.
info
(
"{}, 知乎话题名字:{}"
,
new
Date
(),
zhihuHotSearch
.
getQuery
());
data
.
add
(
zhihu
);
zhihuHotSearchDAO
.
addZhiHuHotSearch
(
zhihu
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
View file @
2918236d
package
com
.
zhiwei
.
searchhotcrawler
.
util
;
import
java.io.BufferedReader
;
import
java.io.IOException
;
import
java.io.InputStreamReader
;
import
java.net.URL
;
import
java.net.URLConnection
;
import
com.alibaba.fastjson.JSONObject
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.zhiweiTools.httpClient.HeaderTool
;
import
com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK
;
public
class
WechatCodeUtil
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatCodeUtil
.
class
);
/**
* @Title: getToken
* @author hero
* @Description: 获取token
* @param @return
* 设定文件
* @return String 返回类型
*/
private
static
String
getToken
()
{
String
token
=
""
;
String
appId
=
"wx2f555218d66e5948"
;
String
jmAppId
=
AESUtils
.
encrypt
(
"wechat"
,
appId
);
String
path
=
"http://yuqing.zhiweidata.com/WechatPublic/common/getToken?appId="
+
jmAppId
;
BufferedReader
in
=
null
;
String
token
=
""
;
String
appId
=
"wx2f555218d66e5948"
;
String
jmAppId
=
AESUtils
.
encrypt
(
"wechat"
,
appId
);
String
path
=
"http://yuqing.zhiweidata.com/WechatPublic/common/getToken?appId="
+
jmAppId
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
()
;
try
{
String
result
=
""
;
URL
url
=
new
URL
(
path
);
URLConnection
connection
=
url
.
openConnection
();
connection
.
setConnectTimeout
(
3000
);
// 建立连接
connection
.
connect
();
// 定义 BufferedReader输入流来读取URL的响应
in
=
new
BufferedReader
(
new
InputStreamReader
(
connection
.
getInputStream
()));
String
line
;
while
((
line
=
in
.
readLine
())
!=
null
)
{
result
+=
line
;
String
result
=
HttpClientTemplateOK
.
get
(
path
,
null
,
headerMap
);
if
(
result
!=
null
)
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
result
);
JSONObject
inJson
=
JSONObject
.
parseObject
(
jsonObject
.
getString
(
"data"
));
token
=
inJson
.
getString
(
"accessToken"
);
}
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
result
);
JSONObject
inJson
=
JSONObject
.
parseObject
(
jsonObject
.
getString
(
"data"
));
token
=
inJson
.
getString
(
"accessToken"
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
finally
{
try
{
in
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
logger
.
error
(
"获取微信公众号推送token失败,问题为:::{}"
,
e
.
fillInStackTrace
());
}
return
token
;
}
/**
* @Title: sendDataJson
* @author hero
* @Description: t推送模版消息数据
* @param @param
* templateJson
* @param @return
* 设定文件
* @return int 返回类型
*/
public
static
int
sendDataJson
(
JSONObject
templateJson
)
{
int
msgid
=
0
;
String
url
=
WechatConstant
.
WECHAT_TEMPLET_SEND_URL
.
replace
(
"ACCESS_TOKEN"
,
getToken
());
...
...
@@ -62,6 +69,69 @@ public class WechatCodeUtil {
}
return
msgid
;
}
/**
* @Title: getUserList
* @author hero
* @Description: 根据用户分组名称拉取用户openid
* @param @param
* groupName
* @param @return
* 设定文件
* @return List<String> 返回类型
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
String
>
getUserList
(
String
groupName
)
{
String
url
=
"https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="
+
getToken
();
JSONObject
postData
=
new
JSONObject
();
postData
.
put
(
"tagid"
,
getGroupIp
(
groupName
));
postData
.
put
(
"next_openid"
,
""
);
try
{
JSONObject
jsonObject
=
HttpRequest
.
httpRequest
(
url
,
"GET"
,
postData
.
toString
());
if
(
null
!=
jsonObject
)
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
return
null
;
}
/***
*
* @Title: getGroupIp
* @author hero
* @Description: 根据分组名称获取分组id
* @param @param
* groupName
* @param @return
* 设定文件
* @return Integer 返回类型
*/
public
static
Integer
getGroupIp
(
String
groupName
)
{
String
url
=
"https://api.weixin.qq.com/cgi-bin/tags/get?access_token="
+
getToken
();
Integer
groupId
=
null
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
try
{
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
if
(
htmlBody
!=
null
)
{
JSONArray
jsonArry
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"tags"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
Integer
id
=
data
.
getInteger
(
"id"
);
String
name
=
data
.
getString
(
"name"
);
if
(
name
.
equals
(
groupName
))
{
groupId
=
id
;
break
;
}
}
}
}
catch
(
IOException
e
)
{
logger
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
return
null
;
}
return
groupId
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment