Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
7e156432
Commit
7e156432
authored
Jul 12, 2021
by
leiliangliang
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
新增微博话题采集话题贡献者,关于功能
parent
f986b5c8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
62 additions
and
29 deletions
+62
-29
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+56
-29
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+0
-0
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
View file @
7e156432
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
...
@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this
.
profileImageUrl
=
profileImageUrl
;
this
.
profileImageUrl
=
profileImageUrl
;
}
}
public
WeiBoUser
(
String
userId
,
String
userName
,
String
topic
,
Date
time
)
{
this
.
userId
=
userId
;
this
.
userName
=
userName
;
this
.
topic
=
topic
;
this
.
time
=
time
;
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
7e156432
...
@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
...
@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
}
}
/**
/**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于
* 微博热搜数据更新话题贡献者排行,关于
*
* @param document
* @param document
* @return
* @return
*/
*/
public
static
Document
weiboUpdatePC
(
Document
document
)
{
public
static
Document
weiboUpdatePC
(
Document
document
)
{
document
.
getString
(
"name"
);
document
.
getString
(
"name"
);
String
name
=
document
.
getString
(
"name"
);
String
topic
=
document
.
getString
(
"name"
);
String
gb
=
"#"
+
name
+
"#"
;
String
gb
=
"#"
+
topic
+
"#"
;
String
encode
=
null
;
String
encode
=
null
;
try
{
try
{
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
log
.
error
(
"字符解析成URl模式异常"
,
e
);
log
.
error
(
"字符解析成URl模式异常"
,
e
);
}
}
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
...
@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
org
.
jsoup
.
nodes
.
Document
documen
=
Jsoup
.
parse
(
htmlBody
);
//获取贡献者信息
//获取贡献者信息
try
{
try
{
Elements
li
=
documen
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
Elements
cardUser
=
documen
.
select
(
"div.card-user"
);
weiBoUserDao
=
new
WeiBoUserDao
();
for
(
Element
element
:
cardUser
)
{
}
if
(!
element
.
select
(
"div.card-head"
).
text
().
isEmpty
())
{
if
(
Objects
.
nonNull
(
li
))
{
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
);
Date
date
=
new
Date
();
if
(
Objects
.
nonNull
(
li
))
{
for
(
Element
element
:
li
)
{
//循环获取话题贡献者相关信息
WeiBoUser
weiBoUser
=
new
WeiBoUser
();
for
(
Element
eleme
:
li
)
{
//获取用户名
String
type
=
"话题贡献者"
;
String
userName
=
element
.
select
(
"a.name"
).
text
(
);
writeUser
(
eleme
,
type
,
topic
);
//获取用户id
}
String
attr
=
element
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
}
String
userId
=
attr
.
substring
(
14
);
}
else
{
String
type
=
"话题贡献者"
;
Elements
li
=
element
.
select
(
"ul.card-user-list-a"
).
select
(
"li"
)
;
String
id
=
userId
+
"_"
+
type
+
"_"
+
name
;
if
(
Objects
.
nonNull
(
li
))
{
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
//循环获取话题贡献者相关信息
weiBoUser
.
setUserName
(
userName
);
for
(
Element
eleme
:
li
)
{
weiBoUser
.
setUserId
(
userId
)
;
String
type
=
"当事人"
;
weiBoUser
.
setTopic
(
name
);
writeUser
(
eleme
,
type
,
topic
);
weiBoUser
.
setTime
(
date
);
}
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
}
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
log
.
error
(
"话题贡献者排行采集异常"
,
e
);
}
}
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
Elements
dt
=
documen
.
select
(
"div.card-about"
).
select
(
"dt"
);
if
(
Objects
.
nonNull
(
dt
))
{
if
(
Objects
.
nonNull
(
dt
))
{
//获取微博关于的相关信息
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Elements
dd
=
documen
.
select
(
"div.card-about"
).
select
(
"dd"
);
Document
dtDocument
=
new
Document
();
Document
dtDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
Document
ddDocument
=
new
Document
();
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
...
@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
}
}
return
docm
;
return
docm
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
log
.
error
(
"解析微博话题时出现解析错误"
,
e
);
}
}
}
}
return
document
;
return
document
;
}
}
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private
static
void
writeUser
(
Element
eleme
,
String
type
,
String
topic
)
{
Date
date
=
new
Date
();
if
(
Objects
.
isNull
(
weiBoUserDao
))
{
weiBoUserDao
=
new
WeiBoUserDao
();
}
//获取用户名
String
userName
=
eleme
.
select
(
"a.name"
).
text
();
String
attr
=
eleme
.
select
(
"span.avator"
).
select
(
"a"
).
first
().
attr
(
"href"
);
//获取用户id
String
userId
=
attr
.
substring
(
14
);
String
id
=
userId
+
"_"
+
type
+
"_"
+
topic
;
WeiBoUser
weiBoUser
=
new
WeiBoUser
(
userName
,
userId
,
topic
,
date
);
weiBoUser
.
setType
(
type
);
weiBoUser
.
setId
(
id
);
weiBoUserDao
.
addWeiBoUser
(
weiBoUser
);
}
/**
/**
* 解析微博信息
* 解析微博信息
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
7e156432
This diff is collapsed.
Click to expand it.
chenweitao
@chenweitao
mentioned in commit
37d43810
Jul 12, 2021
mentioned in commit
37d43810
mentioned in commit 37d43810c159c654a5fafab5c08695a669389c77
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment