Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
600d1086
Commit
600d1086
authored
Jul 12, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Revert "Revert "新增微博话题采集话题贡献者,关于功能""
This reverts commit
37d43810
parent
9e1b3d9f
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
59 additions
and
26 deletions
+59
-26
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+53
-26
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+0
-0
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
View file @
600d1086
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this
.
profileImageUrl
=
profileImageUrl
;
this
.
profileImageUrl
=
profileImageUrl
;
}
}
public
WeiBoUser
(
String
userId
,
String
userName
,
String
topic
,
Date
time
)
{
this
.
userId
=
userId
;
this
.
userName
=
userName
;
this
.
topic
=
topic
;
this
.
time
=
time
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
600d1086
...
@@ -328,15 +328,16 @@ public class WeiboHotSearchCrawler {
...
@@ -328,15 +328,16 @@ public class WeiboHotSearchCrawler {
}
}
/**
/**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于
* 微博热搜数据更新话题贡献者排行,关于
*
* @param document
* @param document
* @return
* @return
*/
*/
public
static
Document
weiboUpdatePC
(
Document
document
)
{
public
static
Document
weiboUpdatePC
(
Document
document
)
{
document
.
getString
(
"name"
);
document
.
getString
(
"name"
);
String
name
=
document
.
getString
(
"name"
);
String
topic
=
document
.
getString
(
"name"
);
String
gb
=
"#"
+
name
+
"#"
;
String
gb
=
"#"
+
topic
+
"#"
;
String
encode
=
null
;
String
encode
=
null
;
try
{
try
{
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
//获取贡献者信息
//获取贡献者信息
try
{
try
{
Elements
li
=
documen
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
Elements
cardUser
=
documen
.
select
(
"div.card-user"
);
weiBoUserDao
=
new
WeiBoUserDao
();
for
(
Element
element
:
cardUser
)
{
}
if
(!
element
.
select
(
"div.card-head"
).
text
().
isEmpty
())
{
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
nonNull
(
li
))
{
if
(
Objects
.
nonNull
(
li
))
{
Date
date
=
new
Date
();
//循环获取话题贡献者相关信息
for
(
Element
element
:
li
)
{
for
(
Element
eleme
:
li
)
{
WeiBoUser
weiBoUser
=
new
WeiBoUser
();
//获取用户名
String
userName
=
element
.
select
(
"a.name"
).
text
();
//获取用户id
String
attr
=
element
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
String
userId
=
attr
.
substring
(
14
);
String
type
=
"话题贡献者"
;
String
type
=
"话题贡献者"
;
String
id
=
userId
+
"_"
+
type
+
"_"
+
name
;
writeUser
(
eleme
,
type
,
topic
);
weiBoUser
.
setType
(
type
);
}
weiBoUser
.
setId
(
id
);
}
weiBoUser
.
setUserName
(
userName
);
}
else
{
weiBoUser
.
setUserId
(
userId
);
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
weiBoUser
.
setTopic
(
name
);
if
(
Objects
.
nonNull
(
li
))
{
weiBoUser
.
setTime
(
date
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
//循环获取话题贡献者相关信息
for
(
Element
eleme
:
li
)
{
String
type
=
"当事人"
;
writeUser
(
eleme
,
type
,
topic
);
}
}
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
}
}
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
if
(
Objects
.
nonNull
(
dt
))
{
if
(
Objects
.
nonNull
(
dt
))
{
//获取微博关于的相关信息
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Document
dtDocument
=
new
Document
();
Document
dtDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
}
}
return
docm
;
return
docm
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
}
}
}
}
return
document
;
return
document
;
}
}
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private
static
void
writeUser
(
Element
eleme
,
String
type
,
String
topic
)
{
Date
date
=
new
Date
();
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
weiBoUserDao
=
new
WeiBoUserDao
();
}
//获取用户名
String
userName
=
eleme
.
select
(
"a.name"
).
text
();
String
attr
=
eleme
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
//获取用户id
String
userId
=
attr
.
substring
(
14
);
String
id
=
userId
+
"_"
+
type
+
"_"
+
topic
;
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userName
,
userId
,
topic
,
date
);
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
/**
/**
* 解析微博信息
* 解析微博信息
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
600d1086
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment