Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
116eb891
Commit
116eb891
authored
Jun 07, 2021
by
leiliangliang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更新微博话题解析
parent
7bbc8832
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
58 deletions
+60
-58
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+23
-22
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
+4
-5
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
+4
-7
src/test/java/weiboTest/WeiboHotSearchTest.java
+29
-24
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
116eb891
...
@@ -18,6 +18,7 @@ import okhttp3.Request;
...
@@ -18,6 +18,7 @@ import okhttp3.Request;
import
okhttp3.Response
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.bson.Document
;
import
org.bson.Document
;
import
org.checkerframework.checker.units.qual.C
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.jsoup.select.Elements
;
...
@@ -47,6 +48,9 @@ public class WeiboHotSearchCrawler {
...
@@ -47,6 +48,9 @@ public class WeiboHotSearchCrawler {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
RedisDao
redisDao
=
new
RedisDao
();
private
static
RedisDao
redisDao
=
new
RedisDao
();
static
WeiBoUserDao
weiBoUserDao
=
new
WeiBoUserDao
();
static
WeiBoMassageDao
weiBoMassageDao
=
new
WeiBoMassageDao
();
/**
/**
* @Title: weiboHotSearchTest
* @Title: weiboHotSearchTest
* @author hero
* @author hero
...
@@ -274,9 +278,13 @@ public class WeiboHotSearchCrawler {
...
@@ -274,9 +278,13 @@ public class WeiboHotSearchCrawler {
}
}
try
{
try
{
//调用weiBoMassageDao添加数据
WeiBoMassageDao
weiBoMassageDao
=
new
WeiBoMassageDao
();
//解析cards,获取热门微博、人物
//解析cards,获取热门微博、人物
if
(
Objects
.
isNull
(
weiBoMassageDao
)){
weiBoMassageDao
=
new
WeiBoMassageDao
();
}
if
(
Objects
.
isNull
(
weiBoUserDao
)){
weiBoUserDao
=
new
WeiBoUserDao
();
}
for
(
JSONObject
jsonObject
:
cardsJsons
)
{
for
(
JSONObject
jsonObject
:
cardsJsons
)
{
if
(
nonNull
(
jsonObject
)
&&
!
jsonObject
.
isEmpty
())
{
if
(
nonNull
(
jsonObject
)
&&
!
jsonObject
.
isEmpty
())
{
if
(
jsonObject
.
containsKey
(
"mblog"
))
{
if
(
jsonObject
.
containsKey
(
"mblog"
))
{
...
@@ -292,14 +300,18 @@ public class WeiboHotSearchCrawler {
...
@@ -292,14 +300,18 @@ public class WeiboHotSearchCrawler {
if
(
Objects
.
nonNull
(
weiBoMassage
))
{
if
(
Objects
.
nonNull
(
weiBoMassage
))
{
weiBoMassageDao
.
addWeiBoMassage
(
weiBoMassage
);
weiBoMassageDao
.
addWeiBoMassage
(
weiBoMassage
);
}
}
analysisWeiBoUsers
(
cardGroup
,
document
.
getString
(
"name"
));
List
<
WeiBoUser
>
weiBoUserList
=
analysisWeiBoUsers
(
cardGroup
,
document
.
getString
(
"name"
));
if
(!
weiBoUserList
.
isEmpty
()){
for
(
int
i
=
0
;
i
<
weiBoUserList
.
size
();
i
++)
{
weiBoUserDao
.
addWeiBoUser
(
weiBoUserList
.
get
(
i
));
}
}
}
}
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析cards失败,未获得热门微博,人物信息"
,
e
);
log
.
error
(
"解析cards失败,未获得热门微博,人物信息"
,
e
);
}
}
return
document
;
return
document
;
}
}
}
}
...
@@ -333,10 +345,9 @@ public class WeiboHotSearchCrawler {
...
@@ -333,10 +345,9 @@ public class WeiboHotSearchCrawler {
* @param topic
* @param topic
* @return
* @return
*/
*/
public
static
void
analysisWeiBoUsers
(
JSONArray
cardGroup
,
String
topic
)
{
public
static
List
<
WeiBoUser
>
analysisWeiBoUsers
(
JSONArray
cardGroup
,
String
topic
)
{
List
<
WeiBoUser
>
weiBoUserList
=
new
ArrayList
();
//解析weibo人物信息
//解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao
weiBoUserDao
=
new
WeiBoUserDao
();
Date
date
=
new
Date
();
Date
date
=
new
Date
();
for
(
int
i
=
0
;
i
<
cardGroup
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
cardGroup
.
size
();
i
++)
{
if
(
3
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
if
(
3
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
...
@@ -361,15 +372,10 @@ public class WeiboHotSearchCrawler {
...
@@ -361,15 +372,10 @@ public class WeiboHotSearchCrawler {
}
}
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
//判断weiBoUser是否为空添加数据
weiBoUserList
.
add
(
weiBoUser
);
if
(
weiBoUser
!=
null
)
{
//调用weiBoUserDao中的方法添加数据
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
else
{
log
.
info
(
"未采集到用户信息"
);
}
}
}
}
}
return
weiBoUserList
;
}
else
if
(
10
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
}
else
if
(
10
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
if
(
cardGroup
.
getJSONObject
(
i
).
containsKey
(
"user"
)){
if
(
cardGroup
.
getJSONObject
(
i
).
containsKey
(
"user"
)){
JSONObject
user
=
cardGroup
.
getJSONObject
(
i
).
getJSONObject
(
"user"
);
JSONObject
user
=
cardGroup
.
getJSONObject
(
i
).
getJSONObject
(
"user"
);
...
@@ -379,7 +385,6 @@ public class WeiboHotSearchCrawler {
...
@@ -379,7 +385,6 @@ public class WeiboHotSearchCrawler {
String
userName
=
user
.
getString
(
"screen_name"
);
String
userName
=
user
.
getString
(
"screen_name"
);
//获取认证信息
//获取认证信息
String
attestationMassage
=
user
.
getString
(
"verified_reason"
);
String
attestationMassage
=
user
.
getString
(
"verified_reason"
);
//获取粉丝数
//获取粉丝数
String
followers_count
=
user
.
getString
(
"followers_count"
);
String
followers_count
=
user
.
getString
(
"followers_count"
);
Long
followerCount
=
null
;
Long
followerCount
=
null
;
...
@@ -391,16 +396,12 @@ public class WeiboHotSearchCrawler {
...
@@ -391,16 +396,12 @@ public class WeiboHotSearchCrawler {
}
}
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
//判断weiBoUser是否为空添加数据
weiBoUserList
.
add
(
weiBoUser
);
if
(
weiBoUser
!=
null
)
{
//调用weiBoUserDao中的方法添加数据
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
else
{
log
.
info
(
"未采集到用户信息"
);
}
}
}
return
weiBoUserList
;
}
}
}
}
return
Collections
.
emptyList
();
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
View file @
116eb891
...
@@ -33,8 +33,7 @@ public class WeiBoMassageDao {
...
@@ -33,8 +33,7 @@ public class WeiBoMassageDao {
* @param weiBoMassage
* @param weiBoMassage
*/
*/
public
void
addWeiBoMassage
(
WeiBoMassage
weiBoMassage
){
public
void
addWeiBoMassage
(
WeiBoMassage
weiBoMassage
){
try
{
log
.
info
(
"weiBoMassage对象开始转document对象"
);
Document
document
=
new
Document
();
Document
document
=
new
Document
();
document
.
put
(
"_id"
,
weiBoMassage
.
getId
());
document
.
put
(
"_id"
,
weiBoMassage
.
getId
());
document
.
put
(
"userId"
,
weiBoMassage
.
getUserId
());
document
.
put
(
"userId"
,
weiBoMassage
.
getUserId
());
...
@@ -67,14 +66,14 @@ public class WeiBoMassageDao {
...
@@ -67,14 +66,14 @@ public class WeiBoMassageDao {
document
.
put
(
"root_text"
,
weiBoMassage
.
getRoot_text
());
document
.
put
(
"root_text"
,
weiBoMassage
.
getRoot_text
());
document
.
put
(
"root_source"
,
weiBoMassage
.
getRoot_source
());
document
.
put
(
"root_source"
,
weiBoMassage
.
getRoot_source
());
}
}
log
.
info
(
"weiBoMassage对象转document对象完成"
);
try
{
try
{
mongoCollection
.
insertOne
(
document
);
mongoCollection
.
insertOne
(
document
);
log
.
info
(
"数据插入成功"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"WeiBoMassage对象转Document对象异常"
,
e
);
}
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
View file @
116eb891
...
@@ -23,7 +23,6 @@ public class WeiBoUserDao {
...
@@ -23,7 +23,6 @@ public class WeiBoUserDao {
public
WeiBoUserDao
()
{
public
WeiBoUserDao
()
{
String
collName
=
DBConfig
.
weiBoUserCollName
;
String
collName
=
DBConfig
.
weiBoUserCollName
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
}
...
@@ -33,9 +32,7 @@ public class WeiBoUserDao {
...
@@ -33,9 +32,7 @@ public class WeiBoUserDao {
* @param weiBoUser
* @param weiBoUser
*/
*/
public
void
addWeiBoUser
(
WeiBoUser
weiBoUser
){
public
void
addWeiBoUser
(
WeiBoUser
weiBoUser
){
try
{
log
.
info
(
"WeiBoUser对象开始转document对象"
);
Document
document
=
new
Document
();
Document
document
=
new
Document
();
document
.
put
(
"_id"
,
weiBoUser
.
getId
());
document
.
put
(
"_id"
,
weiBoUser
.
getId
());
document
.
put
(
"userId"
,
weiBoUser
.
getUserId
());
document
.
put
(
"userId"
,
weiBoUser
.
getUserId
());
...
@@ -46,14 +43,14 @@ public class WeiBoUserDao {
...
@@ -46,14 +43,14 @@ public class WeiBoUserDao {
document
.
put
(
"topic"
,
weiBoUser
.
getTopic
());
document
.
put
(
"topic"
,
weiBoUser
.
getTopic
());
document
.
put
(
"time"
,
weiBoUser
.
getTime
());
document
.
put
(
"time"
,
weiBoUser
.
getTime
());
document
.
put
(
"followerCount"
,
weiBoUser
.
getFollowerCount
());
document
.
put
(
"followerCount"
,
weiBoUser
.
getFollowerCount
());
log
.
info
(
"WeiBoUser对象转document对象完成"
);
try
{
try
{
mongoCollection
.
insertOne
(
document
);
mongoCollection
.
insertOne
(
document
);
log
.
info
(
"数据插入成功"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"WeiBoUser对象转Document对象异常"
,
e
);
}
}
}
}
}
src/test/java/weiboTest/WeiboHotSearchTest.java
View file @
116eb891
...
@@ -46,7 +46,10 @@ import static java.util.Objects.nonNull;
...
@@ -46,7 +46,10 @@ import static java.util.Objects.nonNull;
{
"classpath:applicationContext.xml"
})
{
"classpath:applicationContext.xml"
})
public
class
WeiboHotSearchTest
{
public
class
WeiboHotSearchTest
{
//调用weiBoMassageDao添加数据
static
WeiBoMassageDao
weiBoMassageDao
=
new
WeiBoMassageDao
();
//调用weiBoUserDao添加数据
static
WeiBoUserDao
weiBoUserDao
=
new
WeiBoUserDao
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
@Test
@Test
...
@@ -154,8 +157,14 @@ public class WeiboHotSearchTest {
...
@@ -154,8 +157,14 @@ public class WeiboHotSearchTest {
}
}
}
}
//调用weiBoMassageDao添加数据
try
{
WeiBoMassageDao
weiBoMassageDao
=
new
WeiBoMassageDao
();
if
(
Objects
.
isNull
(
weiBoMassageDao
)){
weiBoMassageDao
=
new
WeiBoMassageDao
();
}
if
(
Objects
.
isNull
(
weiBoUserDao
)){
weiBoUserDao
=
new
WeiBoUserDao
();
}
//解析cards,获取热门微博、人物
//解析cards,获取热门微博、人物
for
(
JSONObject
jsonObject
:
cardsJsons
)
{
for
(
JSONObject
jsonObject
:
cardsJsons
)
{
if
(
nonNull
(
jsonObject
)
&&
!
jsonObject
.
isEmpty
())
{
if
(
nonNull
(
jsonObject
)
&&
!
jsonObject
.
isEmpty
())
{
...
@@ -172,12 +181,18 @@ public class WeiboHotSearchTest {
...
@@ -172,12 +181,18 @@ public class WeiboHotSearchTest {
if
(
Objects
.
nonNull
(
weiBoMassage
))
{
if
(
Objects
.
nonNull
(
weiBoMassage
))
{
weiBoMassageDao
.
addWeiBoMassage
(
weiBoMassage
);
weiBoMassageDao
.
addWeiBoMassage
(
weiBoMassage
);
}
}
analysisWeiBoUsers
(
cardGroup
,
document
.
getString
(
"name"
));
List
<
WeiBoUser
>
weiBoUserList
=
analysisWeiBoUsers
(
cardGroup
,
document
.
getString
(
"name"
));
if
(!
weiBoUserList
.
isEmpty
()){
for
(
int
i
=
0
;
i
<
weiBoUserList
.
size
();
i
++)
{
weiBoUserDao
.
addWeiBoUser
(
weiBoUserList
.
get
(
i
));
}
}
}
else
{
log
.
info
(
"获取数据失败"
);
}
}
}
}
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"解析cards失败,未获取热门微博、人物信息"
,
e
);
}
break
;
break
;
}
}
...
@@ -294,10 +309,9 @@ public class WeiboHotSearchTest {
...
@@ -294,10 +309,9 @@ public class WeiboHotSearchTest {
* @param topic
* @param topic
* @return
* @return
*/
*/
public
static
void
analysisWeiBoUsers
(
JSONArray
cardGroup
,
String
topic
)
{
public
static
List
<
WeiBoUser
>
analysisWeiBoUsers
(
JSONArray
cardGroup
,
String
topic
)
{
List
<
WeiBoUser
>
weiBoUserList
=
new
ArrayList
<>();
//解析weibo人物信息
//解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao
weiBoUserDao
=
new
WeiBoUserDao
();
Date
date
=
new
Date
();
Date
date
=
new
Date
();
for
(
int
i
=
0
;
i
<
cardGroup
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
cardGroup
.
size
();
i
++)
{
if
(
3
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
if
(
3
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
...
@@ -320,15 +334,10 @@ public class WeiboHotSearchTest {
...
@@ -320,15 +334,10 @@ public class WeiboHotSearchTest {
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
}
}
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
//判断weiBoUser是否为空添加数据
weiBoUserList
.
add
(
weiBoUser
);
if
(
weiBoUser
!=
null
)
{
//调用weiBoUserDao中的方法添加数据
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
else
{
log
.
info
(
"未采集到用户信息"
);
}
}
}
}
}
return
weiBoUserList
;
}
else
if
(
10
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
}
else
if
(
10
==
Integer
.
valueOf
(
cardGroup
.
getJSONObject
(
i
).
getString
(
"card_type"
)))
{
if
(
cardGroup
.
getJSONObject
(
i
).
containsKey
(
"user"
)){
if
(
cardGroup
.
getJSONObject
(
i
).
containsKey
(
"user"
)){
JSONObject
user
=
cardGroup
.
getJSONObject
(
i
).
getJSONObject
(
"user"
);
JSONObject
user
=
cardGroup
.
getJSONObject
(
i
).
getJSONObject
(
"user"
);
...
@@ -348,18 +357,14 @@ public class WeiboHotSearchTest {
...
@@ -348,18 +357,14 @@ public class WeiboHotSearchTest {
followerCount
=
Long
.
valueOf
(
followers_count
);
followerCount
=
Long
.
valueOf
(
followers_count
);
}
}
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
//判断weiBoUser是否为空添加数据
weiBoUserList
.
add
(
weiBoUser
);
if
(
weiBoUser
!=
null
)
{
//调用weiBoUserDao中的方法添加数据
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
else
{
log
.
info
(
"未采集到用户信息"
);
}
}
}
return
weiBoUserList
;
}
}
}
}
return
Collections
.
emptyList
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment