Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
Z
zhiwei-baike
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zengguosheng
zhiwei-baike
Commits
698618cf
Commit
698618cf
authored
Jun 08, 2021
by
曾国盛
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更新包名
parent
70551d64
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
193 additions
and
194 deletions
+193
-194
src/main/java/com/zhiwei/crawler/compare/BaiDuBaiKe.java
+1
-1
src/main/java/com/zhiwei/crawler/compare/Baike360.java
+1
-1
src/main/java/com/zhiwei/crawler/compare/SouGouBaiKeMonitor.java
+163
-19
src/main/java/com/zhiwei/crawler/main/Main.java
+2
-2
src/main/java/com/zhiwei/crawler/monitor/BaiDuBaiKeMonitor.java
+1
-1
src/main/java/com/zhiwei/crawler/monitor/BaiKe360Monitor.java
+2
-2
src/main/java/com/zhiwei/crawler/monitor/SouGouBaiKeMonitor.java
+19
-163
src/test/java/com/zhiwei/crawler/BaiKe360MonitorTest.java
+1
-2
src/test/java/com/zhiwei/crawler/MainTest.java
+2
-2
src/test/java/com/zhiwei/crawler/SouGouBaiKeMonitorTest.java
+1
-1
No files found.
src/main/java/com/zhiwei/crawler/
monitor
/BaiDuBaiKe.java
→
src/main/java/com/zhiwei/crawler/
compare
/BaiDuBaiKe.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
monitor
;
package
com
.
zhiwei
.
crawler
.
compare
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONPath
;
import
com.alibaba.fastjson.JSONPath
;
...
...
src/main/java/com/zhiwei/crawler/
monitor
/Baike360.java
→
src/main/java/com/zhiwei/crawler/
compare
/Baike360.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
monitor
;
package
com
.
zhiwei
.
crawler
.
compare
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoCollection
;
...
...
src/main/java/com/zhiwei/crawler/compare/SouGouBaiKeMonitor.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
compare
;
package
com
.
zhiwei
.
crawler
.
compare
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
okhttp3.MediaType
;
import
okhttp3.RequestBody
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.util.Strings
;
import
org.apache.logging.log4j.util.Strings
;
import
org.bson.Document
;
import
org.bson.Document
;
import
org.jsoup.Jsoup
;
import
org.seimicrawler.xpath.JXDocument
;
import
java.util.concurrent.Executors
;
import
java.util.*
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
/**
* 搜狗百科信息监控
* @author 朝花夕誓
*/
public
class
SouGouBaiKeMonitor
{
public
class
SouGouBaiKeMonitor
{
public
static
void
start
()
{
com
.
zhiwei
.
crawler
.
monitor
.
SouGouBaiKeMonitor
souGouBaiKeMonitor
=
new
com
.
zhiwei
.
crawler
.
monitor
.
SouGouBaiKeMonitor
();
ScheduledExecutorService
scheduledExecutorService
=
Executors
.
newScheduledThreadPool
(
1
);
/**
* 查找出数据需要的集合
* @return
*/
public
List
<
Map
<
String
,
Object
>>
findKeyWordAddress
(){
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoUtils
mongoUtils
=
new
MongoUtils
();
scheduledExecutorService
.
scheduleAtFixedRate
(()->{
MongoCursor
<
Document
>
allData
=
mongoUtils
.
findAllData
();
MongoCursor
<
Document
>
allData
=
mongoUtils
.
findAllData
();
String
timeStamp
=
FastDateFormat
.
getInstance
(
"yyyy-MM-dd HH:mm:ss"
).
format
(
System
.
currentTimeMillis
());
System
.
out
.
println
(
"\n\n正在访问... "
+
timeStamp
+
"\n\n"
);
while
(
allData
.
hasNext
()){
while
(
allData
.
hasNext
()){
Document
next
=
allData
.
next
();
Document
next
=
allData
.
next
();
String
url
=
next
.
getString
(
"sougoubaike_url"
);
String
keyword
=
null
;
String
keyword
=
next
.
getString
(
"keyword"
);
String
pushAddress
=
null
;
String
pushAddress
=
next
.
getString
(
"push_address"
);
String
souGouBaiKeUrl
=
null
;
if
(
Strings
.
isNotBlank
(
url
)
&&
Strings
.
isNotBlank
(
keyword
)
&&
Strings
.
isNotBlank
(
pushAddress
)){
try
{
keyword
=
next
.
getString
(
"keyword"
);
pushAddress
=
next
.
getString
(
"push_address"
);
souGouBaiKeUrl
=
next
.
getString
(
"sougoubaike_url"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
if
(
Strings
.
isNotBlank
(
keyword
)
&&
Strings
.
isNotBlank
(
pushAddress
)
&&
Strings
.
isNotBlank
(
souGouBaiKeUrl
)){
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
dataMap
.
put
(
"keyword"
,
keyword
);
dataMap
.
put
(
"pushAddress"
,
pushAddress
);
dataMap
.
put
(
"souGouBaiKeUrl"
,
souGouBaiKeUrl
);
dataList
.
add
(
dataMap
);
}
}
return
dataList
;
}
public
Document
findInDataBaseData
(
String
keyword
){
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoDatabase
mongoDataBase
=
mongoUtils
.
getMongoDataBase
();
MongoCollection
<
Document
>
souGouBaiKe
=
mongoDataBase
.
getCollection
(
"souGouBaiKe"
);
Document
query
=
new
Document
();
query
.
put
(
"keyword"
,
keyword
);
Document
createTime
=
new
Document
();
createTime
.
put
(
"create_time"
,
-
1
);
Document
first
=
souGouBaiKe
.
find
(
query
).
sort
(
createTime
).
first
();
return
first
;
}
public
void
saveData
(
Map
<
String
,
Object
>
dataMap
){
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoDatabase
mongoDataBase
=
mongoUtils
.
getMongoDataBase
();
MongoCollection
<
Document
>
souGouBaiKe
=
mongoDataBase
.
getCollection
(
"souGouBaiKe"
);
Document
document
=
new
Document
();
document
.
put
(
"create_time"
,
(
int
)(
System
.
currentTimeMillis
()/
1000
));
document
.
append
(
"keyword"
,
dataMap
.
get
(
"keyword"
))
.
append
(
"creator"
,
dataMap
.
get
(
"creator"
))
.
append
(
"edit_number"
,
dataMap
.
get
(
"editNumber"
))
.
append
(
"recent_update"
,
dataMap
.
get
(
"recentUpdate"
));
souGouBaiKe
.
insertOne
(
document
);
}
public
void
dataCompare
(
String
keyword
,
String
monitorUrl
,
String
pushAddress
){
String
htmlBody
=
getHtmlBody
(
monitorUrl
);
org
.
jsoup
.
nodes
.
Document
parse
=
Jsoup
.
parse
(
htmlBody
);
JXDocument
jxDocument
=
JXDocument
.
create
(
parse
);
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
try
{
// 创建者
String
creator
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[1]"
).
asElement
().
text
();
souGouBaiKeMonitor
.
dataCompare
(
keyword
,
url
,
pushAddress
);
// 编辑次数
String
editNumber
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[3]"
).
asElement
().
text
();
// 最近更新
String
recentUpdate
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[5]"
).
asElement
().
text
();
// 对比地址
String
compareUrl
=
"https://baike.sogou.com"
+
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[3]//a"
).
asElement
().
attr
(
"href"
);
dataMap
.
put
(
"keyword"
,
keyword
);
dataMap
.
put
(
"creator"
,
creator
);
dataMap
.
put
(
"editNumber"
,
editNumber
);
dataMap
.
put
(
"recentUpdate"
,
recentUpdate
);
dataMap
.
put
(
"compareUrl"
,
compareUrl
);
dataMap
.
put
(
"pushAddress"
,
pushAddress
);
}
catch
(
Exception
e
)
{
new
Throwable
(
"数据解析错误"
).
printStackTrace
();
}
Document
inDataBaseData
=
findInDataBaseData
(
keyword
);
if
(
Objects
.
nonNull
(
inDataBaseData
)){
String
editNumber
=
inDataBaseData
.
getString
(
"edit_number"
);
if
(!
editNumber
.
equals
(
dataMap
.
get
(
"editNumber"
))){
System
.
out
.
println
(
keyword
+
" \t数据有更新"
);
System
.
out
.
println
(
"\n\n开始推送......\n\n"
);
// 推送数据
setHotSearchDataAndPushContent
(
dataMap
);
// 存放数据
saveData
(
dataMap
);
}
else
{
System
.
out
.
println
(
keyword
+
" 在mongo中的数据: "
+
inDataBaseData
.
toString
());
System
.
out
.
println
(
keyword
+
" 无数据更新."
);
}
}
else
{
System
.
out
.
println
(
"第一次访问:"
+
keyword
);
// 存放数据
saveData
(
dataMap
);
}
}
private
static
void
sendWorkWechatByMarkdown
(
List
<
Map
<
String
,
String
>>
content
,
String
sendUrl
)
{
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
Map
<
String
,
Object
>
newsMap
=
new
HashMap
<>();
newsMap
.
put
(
"articles"
,
content
);
Map
<
String
,
Object
>
params
=
new
HashMap
<>();
params
.
put
(
"msgtype"
,
"news"
);
params
.
put
(
"news"
,
newsMap
);
String
data
=
JSONObject
.
toJSONString
(
params
);
try
{
try
{
Thread
.
sleep
(
3000
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
sendUrl
,
RequestBody
.
create
(
MediaType
.
parse
(
"application/json"
),
data
))).
body
()
}
catch
(
InterruptedException
e
)
{
.
string
();
if
(
htmlBody
.
contains
(
"ok"
))
{
System
.
out
.
println
(
"----------企业微信账号数据推送成功-----------"
);
}
else
{
System
.
out
.
println
(
"----------企业微信账号数据推送失败-----------"
);
}
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
"----------企业微信账号数据推送失败,出现错误-----------"
);
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
}
}
public
static
void
setHotSearchDataAndPushContent
(
Map
<
String
,
Object
>
dataMap
)
{
List
<
Map
<
String
,
String
>>
listContent
=
new
ArrayList
<>();
Map
<
String
,
String
>
map
=
new
HashMap
<>(
4
);
map
.
put
(
"title"
,
(
String
)
dataMap
.
get
(
"keyword"
));
map
.
put
(
"description"
,
"数据有更新\n"
+
dataMap
.
get
(
"recentUpdate"
)
+
" "
+
dataMap
.
get
(
"editNumber"
)
+
"\n"
);
map
.
put
(
"url"
,
(
String
)
dataMap
.
get
(
"compareUrl"
));
map
.
put
(
"picurl"
,
"https://login.zhiweidata.com/plogin/img/cat.8de03170.png"
);
listContent
.
add
(
map
);
sendWorkWechatByMarkdown
(
listContent
,
(
String
)
dataMap
.
get
(
"pushAddress"
));
}
public
String
getHtmlBody
(
String
url
){
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
))){
return
response
.
body
().
string
();
}
catch
(
Exception
e
){
e
.
printStackTrace
();
}
}
}
},
0
,
1
,
TimeUnit
.
HOURS
)
;
return
null
;
}
}
}
}
src/main/java/com/zhiwei/crawler/main/Main.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
main
;
package
com
.
zhiwei
.
crawler
.
main
;
import
com.zhiwei.crawler.
compare
.BaiDuBaiKeMonitor
;
import
com.zhiwei.crawler.
monitor
.BaiDuBaiKeMonitor
;
import
com.zhiwei.crawler.
compare
.SouGouBaiKeMonitor
;
import
com.zhiwei.crawler.
monitor
.SouGouBaiKeMonitor
;
/**
/**
* @Author: 朝花夕誓
* @Author: 朝花夕誓
...
...
src/main/java/com/zhiwei/crawler/
compare
/BaiDuBaiKeMonitor.java
→
src/main/java/com/zhiwei/crawler/
monitor
/BaiDuBaiKeMonitor.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
compare
;
package
com
.
zhiwei
.
crawler
.
monitor
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.commons.lang3.time.FastDateFormat
;
...
...
src/main/java/com/zhiwei/crawler/
compare
/BaiKe360Monitor.java
→
src/main/java/com/zhiwei/crawler/
monitor
/BaiKe360Monitor.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
compare
;
package
com
.
zhiwei
.
crawler
.
monitor
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoCursor
;
import
com.zhiwei.crawler.
monitor
.Baike360
;
import
com.zhiwei.crawler.
compare
.Baike360
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.logging.log4j.util.Strings
;
import
org.apache.logging.log4j.util.Strings
;
...
...
src/main/java/com/zhiwei/crawler/monitor/SouGouBaiKeMonitor.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
.
monitor
;
package
com
.
zhiwei
.
crawler
.
monitor
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
okhttp3.MediaType
;
import
okhttp3.RequestBody
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.util.Strings
;
import
org.apache.logging.log4j.util.Strings
;
import
org.bson.Document
;
import
org.bson.Document
;
import
org.jsoup.Jsoup
;
import
org.seimicrawler.xpath.JXDocument
;
import
java.util.*
;
import
java.util.concurrent.Executors
;
import
java.util.concurrent.ScheduledExecutorService
;
import
java.util.concurrent.TimeUnit
;
/**
* 搜狗百科信息监控
* @author 朝花夕誓
*/
public
class
SouGouBaiKeMonitor
{
public
class
SouGouBaiKeMonitor
{
public
static
void
start
()
{
com
.
zhiwei
.
crawler
.
compare
.
SouGouBaiKeMonitor
souGouBaiKeMonitor
=
new
com
.
zhiwei
.
crawler
.
compare
.
SouGouBaiKeMonitor
();
ScheduledExecutorService
scheduledExecutorService
=
Executors
.
newScheduledThreadPool
(
1
);
/**
* 查找出数据需要的集合
* @return
*/
public
List
<
Map
<
String
,
Object
>>
findKeyWordAddress
(){
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoUtils
mongoUtils
=
new
MongoUtils
();
scheduledExecutorService
.
scheduleAtFixedRate
(()->{
MongoCursor
<
Document
>
allData
=
mongoUtils
.
findAllData
();
MongoCursor
<
Document
>
allData
=
mongoUtils
.
findAllData
();
String
timeStamp
=
FastDateFormat
.
getInstance
(
"yyyy-MM-dd HH:mm:ss"
).
format
(
System
.
currentTimeMillis
());
System
.
out
.
println
(
"\n\n正在访问... "
+
timeStamp
+
"\n\n"
);
while
(
allData
.
hasNext
()){
while
(
allData
.
hasNext
()){
Document
next
=
allData
.
next
();
Document
next
=
allData
.
next
();
String
keyword
=
null
;
String
url
=
next
.
getString
(
"sougoubaike_url"
);
String
pushAddress
=
null
;
String
keyword
=
next
.
getString
(
"keyword"
);
String
souGouBaiKeUrl
=
null
;
String
pushAddress
=
next
.
getString
(
"push_address"
);
try
{
if
(
Strings
.
isNotBlank
(
url
)
&&
Strings
.
isNotBlank
(
keyword
)
&&
Strings
.
isNotBlank
(
pushAddress
)){
keyword
=
next
.
getString
(
"keyword"
);
pushAddress
=
next
.
getString
(
"push_address"
);
souGouBaiKeUrl
=
next
.
getString
(
"sougoubaike_url"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
if
(
Strings
.
isNotBlank
(
keyword
)
&&
Strings
.
isNotBlank
(
pushAddress
)
&&
Strings
.
isNotBlank
(
souGouBaiKeUrl
)){
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
dataMap
.
put
(
"keyword"
,
keyword
);
dataMap
.
put
(
"pushAddress"
,
pushAddress
);
dataMap
.
put
(
"souGouBaiKeUrl"
,
souGouBaiKeUrl
);
dataList
.
add
(
dataMap
);
}
}
return
dataList
;
}
public
Document
findInDataBaseData
(
String
keyword
){
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoDatabase
mongoDataBase
=
mongoUtils
.
getMongoDataBase
();
MongoCollection
<
Document
>
souGouBaiKe
=
mongoDataBase
.
getCollection
(
"souGouBaiKe"
);
Document
query
=
new
Document
();
query
.
put
(
"keyword"
,
keyword
);
Document
createTime
=
new
Document
();
createTime
.
put
(
"create_time"
,
-
1
);
Document
first
=
souGouBaiKe
.
find
(
query
).
sort
(
createTime
).
first
();
return
first
;
}
public
void
saveData
(
Map
<
String
,
Object
>
dataMap
){
MongoUtils
mongoUtils
=
new
MongoUtils
();
MongoDatabase
mongoDataBase
=
mongoUtils
.
getMongoDataBase
();
MongoCollection
<
Document
>
souGouBaiKe
=
mongoDataBase
.
getCollection
(
"souGouBaiKe"
);
Document
document
=
new
Document
();
document
.
put
(
"create_time"
,
(
int
)(
System
.
currentTimeMillis
()/
1000
));
document
.
append
(
"keyword"
,
dataMap
.
get
(
"keyword"
))
.
append
(
"creator"
,
dataMap
.
get
(
"creator"
))
.
append
(
"edit_number"
,
dataMap
.
get
(
"editNumber"
))
.
append
(
"recent_update"
,
dataMap
.
get
(
"recentUpdate"
));
souGouBaiKe
.
insertOne
(
document
);
}
public
void
dataCompare
(
String
keyword
,
String
monitorUrl
,
String
pushAddress
){
String
htmlBody
=
getHtmlBody
(
monitorUrl
);
org
.
jsoup
.
nodes
.
Document
parse
=
Jsoup
.
parse
(
htmlBody
);
JXDocument
jxDocument
=
JXDocument
.
create
(
parse
);
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
try
{
// 创建者
String
creator
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[1]"
).
asElement
().
text
();
// 编辑次数
souGouBaiKeMonitor
.
dataCompare
(
keyword
,
url
,
pushAddress
);
String
editNumber
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[3]"
).
asElement
().
text
();
// 最近更新
String
recentUpdate
=
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[5]"
).
asElement
().
text
();
// 对比地址
String
compareUrl
=
"https://baike.sogou.com"
+
jxDocument
.
selNOne
(
"//ul[@class='lemma_data']/li[3]//a"
).
asElement
().
attr
(
"href"
);
dataMap
.
put
(
"keyword"
,
keyword
);
dataMap
.
put
(
"creator"
,
creator
);
dataMap
.
put
(
"editNumber"
,
editNumber
);
dataMap
.
put
(
"recentUpdate"
,
recentUpdate
);
dataMap
.
put
(
"compareUrl"
,
compareUrl
);
dataMap
.
put
(
"pushAddress"
,
pushAddress
);
}
catch
(
Exception
e
)
{
new
Throwable
(
"数据解析错误"
).
printStackTrace
();
}
Document
inDataBaseData
=
findInDataBaseData
(
keyword
);
if
(
Objects
.
nonNull
(
inDataBaseData
)){
String
editNumber
=
inDataBaseData
.
getString
(
"edit_number"
);
if
(!
editNumber
.
equals
(
dataMap
.
get
(
"editNumber"
))){
System
.
out
.
println
(
keyword
+
" \t数据有更新"
);
System
.
out
.
println
(
"\n\n开始推送......\n\n"
);
// 推送数据
setHotSearchDataAndPushContent
(
dataMap
);
// 存放数据
saveData
(
dataMap
);
}
else
{
System
.
out
.
println
(
keyword
+
" 在mongo中的数据: "
+
inDataBaseData
.
toString
());
System
.
out
.
println
(
keyword
+
" 无数据更新."
);
}
}
else
{
System
.
out
.
println
(
"第一次访问:"
+
keyword
);
// 存放数据
saveData
(
dataMap
);
}
}
private
static
void
sendWorkWechatByMarkdown
(
List
<
Map
<
String
,
String
>>
content
,
String
sendUrl
)
{
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
Map
<
String
,
Object
>
newsMap
=
new
HashMap
<>();
newsMap
.
put
(
"articles"
,
content
);
Map
<
String
,
Object
>
params
=
new
HashMap
<>();
params
.
put
(
"msgtype"
,
"news"
);
params
.
put
(
"news"
,
newsMap
);
String
data
=
JSONObject
.
toJSONString
(
params
);
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
sendUrl
,
RequestBody
.
create
(
MediaType
.
parse
(
"application/json"
),
data
))).
body
()
Thread
.
sleep
(
3000
);
.
string
();
}
catch
(
InterruptedException
e
)
{
if
(
htmlBody
.
contains
(
"ok"
))
{
System
.
out
.
println
(
"----------企业微信账号数据推送成功-----------"
);
}
else
{
System
.
out
.
println
(
"----------企业微信账号数据推送失败-----------"
);
}
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
"----------企业微信账号数据推送失败,出现错误-----------"
);
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
}
}
public
static
void
setHotSearchDataAndPushContent
(
Map
<
String
,
Object
>
dataMap
)
{
List
<
Map
<
String
,
String
>>
listContent
=
new
ArrayList
<>();
Map
<
String
,
String
>
map
=
new
HashMap
<>(
4
);
map
.
put
(
"title"
,
(
String
)
dataMap
.
get
(
"keyword"
));
map
.
put
(
"description"
,
"数据有更新\n"
+
dataMap
.
get
(
"recentUpdate"
)
+
" "
+
dataMap
.
get
(
"editNumber"
)
+
"\n"
);
map
.
put
(
"url"
,
(
String
)
dataMap
.
get
(
"compareUrl"
));
map
.
put
(
"picurl"
,
"https://login.zhiweidata.com/plogin/img/cat.8de03170.png"
);
listContent
.
add
(
map
);
sendWorkWechatByMarkdown
(
listContent
,
(
String
)
dataMap
.
get
(
"pushAddress"
));
}
public
String
getHtmlBody
(
String
url
){
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
))){
return
response
.
body
().
string
();
}
catch
(
Exception
e
){
e
.
printStackTrace
();
}
}
}
return
null
;
},
0
,
1
,
TimeUnit
.
HOURS
)
;
}
}
}
}
src/test/java/com/zhiwei/crawler/BaiKe360MonitorTest.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoCursor
;
import
com.zhiwei.crawler.
monitor
.Baike360
;
import
com.zhiwei.crawler.
compare
.Baike360
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.logging.log4j.util.Strings
;
import
org.apache.logging.log4j.util.Strings
;
import
org.bson.Document
;
import
org.bson.Document
;
...
...
src/test/java/com/zhiwei/crawler/MainTest.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
com.zhiwei.crawler.
compare
.BaiDuBaiKeMonitor
;
import
com.zhiwei.crawler.
monitor
.BaiDuBaiKeMonitor
;
import
com.zhiwei.crawler.
compare
.SouGouBaiKeMonitor
;
import
com.zhiwei.crawler.
monitor
.SouGouBaiKeMonitor
;
import
org.junit.Test
;
import
org.junit.Test
;
/**
/**
...
...
src/test/java/com/zhiwei/crawler/SouGouBaiKeMonitorTest.java
View file @
698618cf
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
com.mongodb.client.MongoCursor
;
import
com.mongodb.client.MongoCursor
;
import
com.zhiwei.crawler.
monitor
.SouGouBaiKeMonitor
;
import
com.zhiwei.crawler.
compare
.SouGouBaiKeMonitor
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
com.zhiwei.crawler.transfertest.mongo.MongoUtils
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.logging.log4j.util.Strings
;
import
org.apache.logging.log4j.util.Strings
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment