Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
d2e5b1cc
Commit
d2e5b1cc
authored
Jun 10, 2021
by
leiliangliang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更新微博话题解析,新增用户头像地址
parent
b8f83e22
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
34 additions
and
12 deletions
+34
-12
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoMassage.java
+7
-2
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+7
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+9
-5
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
+1
-0
src/test/java/weiboTest/WeiboHotSearchTest.java
+9
-4
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoMassage.java
View file @
d2e5b1cc
...
@@ -88,7 +88,10 @@ public class WeiBoMassage implements Serializable {
...
@@ -88,7 +88,10 @@ public class WeiBoMassage implements Serializable {
* 话题
* 话题
*/
*/
private
String
topic
;
private
String
topic
;
/**
* 头像地址
*/
private
String
profileImageUrl
;
//是否转发
//是否转发
private
Integer
forward
;
private
Integer
forward
;
//转发 源微博mid
//转发 源微博mid
...
@@ -110,7 +113,7 @@ public class WeiBoMassage implements Serializable {
...
@@ -110,7 +113,7 @@ public class WeiBoMassage implements Serializable {
public
WeiBoMassage
(
String
userId
,
String
text
,
String
userName
,
String
mid
,
public
WeiBoMassage
(
String
userId
,
String
text
,
String
userName
,
String
mid
,
Date
creatTime
,
Date
editTime
,
Integer
cardType
,
Integer
showType
,
Long
repostCount
,
Date
creatTime
,
Date
editTime
,
Integer
cardType
,
Integer
showType
,
Long
repostCount
,
Long
commentCount
,
Long
attitudeCount
,
String
source
,
String
type
,
String
topic
)
{
Long
commentCount
,
Long
attitudeCount
,
String
source
,
String
type
,
String
topic
,
String
profileImageUrl
)
{
this
.
id
=
mid
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
id
=
mid
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
userId
=
userId
;
this
.
userId
=
userId
;
this
.
text
=
text
;
this
.
text
=
text
;
...
@@ -126,6 +129,8 @@ public class WeiBoMassage implements Serializable {
...
@@ -126,6 +129,8 @@ public class WeiBoMassage implements Serializable {
this
.
source
=
source
;
this
.
source
=
source
;
this
.
type
=
type
;
this
.
type
=
type
;
this
.
topic
=
topic
;
this
.
topic
=
topic
;
this
.
profileImageUrl
=
profileImageUrl
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
View file @
d2e5b1cc
...
@@ -48,10 +48,15 @@ public class WeiBoUser implements Serializable {
...
@@ -48,10 +48,15 @@ public class WeiBoUser implements Serializable {
* 粉丝数
* 粉丝数
*/
*/
private
Long
followerCount
;
private
Long
followerCount
;
/**
* 头像地址
*/
private
String
profileImageUrl
;
public
WeiBoUser
()
{
public
WeiBoUser
()
{
}
}
public
WeiBoUser
(
String
userId
,
String
attestationMassage
,
String
userName
,
String
topic
,
Date
time
,
Long
followerCount
)
{
public
WeiBoUser
(
String
userId
,
String
attestationMassage
,
String
userName
,
String
topic
,
Date
time
,
Long
followerCount
,
String
profileImageUrl
)
{
this
.
id
=
userId
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
id
=
userId
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
userId
=
userId
;
this
.
userId
=
userId
;
...
@@ -60,6 +65,7 @@ public class WeiBoUser implements Serializable {
...
@@ -60,6 +65,7 @@ public class WeiBoUser implements Serializable {
this
.
topic
=
topic
;
this
.
topic
=
topic
;
this
.
time
=
time
;
this
.
time
=
time
;
this
.
followerCount
=
followerCount
;
this
.
followerCount
=
followerCount
;
this
.
profileImageUrl
=
profileImageUrl
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
d2e5b1cc
...
@@ -370,8 +370,9 @@ public class WeiboHotSearchCrawler {
...
@@ -370,8 +370,9 @@ public class WeiboHotSearchCrawler {
String
[]
split
=
followers_count
.
split
(
"万"
);
String
[]
split
=
followers_count
.
split
(
"万"
);
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
}
}
//用户头像地址
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
String
profileImageUrl
=
users
.
getJSONObject
(
i1
).
getString
(
"profile_image_url"
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
,
profileImageUrl
);
weiBoUserList
.
add
(
weiBoUser
);
weiBoUserList
.
add
(
weiBoUser
);
}
}
}
}
...
@@ -394,8 +395,9 @@ public class WeiboHotSearchCrawler {
...
@@ -394,8 +395,9 @@ public class WeiboHotSearchCrawler {
}
else
{
}
else
{
followerCount
=
Long
.
valueOf
(
followers_count
);
followerCount
=
Long
.
valueOf
(
followers_count
);
}
}
//用户头像地址
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
String
profileImageUrl
=
user
.
getString
(
"profile_image_url"
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
,
profileImageUrl
);
weiBoUserList
.
add
(
weiBoUser
);
weiBoUserList
.
add
(
weiBoUser
);
}
}
return
weiBoUserList
;
return
weiBoUserList
;
...
@@ -473,6 +475,8 @@ public class WeiboHotSearchCrawler {
...
@@ -473,6 +475,8 @@ public class WeiboHotSearchCrawler {
String
userName
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"screen_name"
);
String
userName
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"screen_name"
);
//来源
//来源
String
source
=
mblog
.
getString
(
"source"
);
String
source
=
mblog
.
getString
(
"source"
);
//用户头像地址
String
profileImageUrl
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"profile_image_url"
);
//内容
//内容
String
content
=
null
;
String
content
=
null
;
if
(
mblog
.
getString
(
"text"
).
contains
(
"<"
))
{
if
(
mblog
.
getString
(
"text"
).
contains
(
"<"
))
{
...
@@ -485,7 +489,7 @@ public class WeiboHotSearchCrawler {
...
@@ -485,7 +489,7 @@ public class WeiboHotSearchCrawler {
}
}
WeiBoMassage
weiBoMassage
=
new
WeiBoMassage
(
userId
,
content
,
userName
,
mid
,
createTime
,
editTime
,
cardType
,
showType
,
WeiBoMassage
weiBoMassage
=
new
WeiBoMassage
(
userId
,
content
,
userName
,
mid
,
createTime
,
editTime
,
cardType
,
showType
,
repostCount
,
commentCount
,
attitudeCount
,
source
,
type
,
topic
);
repostCount
,
commentCount
,
attitudeCount
,
source
,
type
,
topic
,
profileImageUrl
);
//默认不转发为0
//默认不转发为0
weiBoMassage
.
setForward
(
0
);
weiBoMassage
.
setForward
(
0
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
View file @
d2e5b1cc
...
@@ -49,6 +49,7 @@ public class WeiBoMassageDao {
...
@@ -49,6 +49,7 @@ public class WeiBoMassageDao {
document
.
put
(
"repostCount"
,
weiBoMassage
.
getRepostCount
());
document
.
put
(
"repostCount"
,
weiBoMassage
.
getRepostCount
());
document
.
put
(
"commentCount"
,
weiBoMassage
.
getCommentCount
());
document
.
put
(
"commentCount"
,
weiBoMassage
.
getCommentCount
());
document
.
put
(
"attitudeCount"
,
weiBoMassage
.
getAttitudeCount
());
document
.
put
(
"attitudeCount"
,
weiBoMassage
.
getAttitudeCount
());
document
.
put
(
"profileImageUrl"
,
weiBoMassage
.
getProfileImageUrl
());
if
(
Objects
.
nonNull
(
weiBoMassage
.
getPlayCount
())){
if
(
Objects
.
nonNull
(
weiBoMassage
.
getPlayCount
())){
document
.
put
(
"playCount"
,
weiBoMassage
.
getPlayCount
());
document
.
put
(
"playCount"
,
weiBoMassage
.
getPlayCount
());
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
View file @
d2e5b1cc
...
@@ -43,6 +43,7 @@ public class WeiBoUserDao {
...
@@ -43,6 +43,7 @@ public class WeiBoUserDao {
document
.
put
(
"topic"
,
weiBoUser
.
getTopic
());
document
.
put
(
"topic"
,
weiBoUser
.
getTopic
());
document
.
put
(
"time"
,
weiBoUser
.
getTime
());
document
.
put
(
"time"
,
weiBoUser
.
getTime
());
document
.
put
(
"followerCount"
,
weiBoUser
.
getFollowerCount
());
document
.
put
(
"followerCount"
,
weiBoUser
.
getFollowerCount
());
document
.
put
(
"profileImageUrl"
,
weiBoUser
.
getProfileImageUrl
());
try
{
try
{
mongoCollection
.
insertOne
(
document
);
mongoCollection
.
insertOne
(
document
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/test/java/weiboTest/WeiboHotSearchTest.java
View file @
d2e5b1cc
...
@@ -333,7 +333,9 @@ public class WeiboHotSearchTest {
...
@@ -333,7 +333,9 @@ public class WeiboHotSearchTest {
String
[]
split
=
followers_count
.
split
(
"万"
);
String
[]
split
=
followers_count
.
split
(
"万"
);
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
followerCount
=
Long
.
valueOf
(
split
[
0
])*
10000
;
}
}
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
//用户头像地址
String
profileImageUrl
=
users
.
getJSONObject
(
i1
).
getString
(
"profile_image_url"
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
,
profileImageUrl
);
weiBoUserList
.
add
(
weiBoUser
);
weiBoUserList
.
add
(
weiBoUser
);
}
}
}
}
...
@@ -356,8 +358,9 @@ public class WeiboHotSearchTest {
...
@@ -356,8 +358,9 @@ public class WeiboHotSearchTest {
}
else
{
}
else
{
followerCount
=
Long
.
valueOf
(
followers_count
);
followerCount
=
Long
.
valueOf
(
followers_count
);
}
}
//用户头像地址
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
);
String
profileImageUrl
=
user
.
getString
(
"profile_image_url"
);
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userId
,
attestationMassage
,
userName
,
topic
,
date
,
followerCount
,
profileImageUrl
);
weiBoUserList
.
add
(
weiBoUser
);
weiBoUserList
.
add
(
weiBoUser
);
}
}
...
@@ -436,6 +439,8 @@ public class WeiboHotSearchTest {
...
@@ -436,6 +439,8 @@ public class WeiboHotSearchTest {
String
userName
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"screen_name"
);
String
userName
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"screen_name"
);
//来源
//来源
String
source
=
mblog
.
getString
(
"source"
);
String
source
=
mblog
.
getString
(
"source"
);
//用户头像地址
String
profileImageUrl
=
mblog
.
getJSONObject
(
"user"
).
getString
(
"profile_image_url"
);
//内容
//内容
String
content
=
null
;
String
content
=
null
;
if
(
mblog
.
getString
(
"text"
).
contains
(
"<"
))
{
if
(
mblog
.
getString
(
"text"
).
contains
(
"<"
))
{
...
@@ -448,7 +453,7 @@ public class WeiboHotSearchTest {
...
@@ -448,7 +453,7 @@ public class WeiboHotSearchTest {
}
}
WeiBoMassage
weiBoMassage
=
new
WeiBoMassage
(
userId
,
content
,
userName
,
mid
,
createTime
,
editTime
,
cardType
,
showType
,
WeiBoMassage
weiBoMassage
=
new
WeiBoMassage
(
userId
,
content
,
userName
,
mid
,
createTime
,
editTime
,
cardType
,
showType
,
repostCount
,
commentCount
,
attitudeCount
,
source
,
type
,
topic
);
repostCount
,
commentCount
,
attitudeCount
,
source
,
type
,
topic
,
profileImageUrl
);
//默认不转发为0
//默认不转发为0
weiBoMassage
.
setForward
(
0
);
weiBoMassage
.
setForward
(
0
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment