Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
live-crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
live-crawler
Commits
2a8c7a4d
Commit
2a8c7a4d
authored
Jan 30, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
斗鱼、虎牙、熊猫TV数据采集
parent
7c521742
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
163 additions
and
110 deletions
+163
-110
src/main/java/com/zhiwei/live/bean/RoomInfo.java
+33
-27
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliMessage.java
+13
-1
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliMessageHandler.java
+2
-0
src/main/java/com/zhiwei/live/danmu/douyu/DouYuMessage.java
+1
-1
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
+6
-5
src/main/java/com/zhiwei/live/roominfo/DouYuRoomInfoCrawler.java
+19
-2
src/main/java/com/zhiwei/live/roominfo/HuYaRoomInfoCrawler.java
+14
-2
src/main/java/com/zhiwei/live/roominfo/PandamTVRoomInfoCrawler.java
+9
-6
src/test/java/com/zhiwei/live/test/roomInfo/RoomInfoCrawlerTest.java
+66
-66
No files found.
src/main/java/com/zhiwei/live/bean/RoomInfo.java
View file @
2a8c7a4d
package
com
.
zhiwei
.
live
.
bean
;
package
com
.
zhiwei
.
live
.
bean
;
public
class
RoomInfo
{
public
class
RoomInfo
{
String
pt
;
//平台类型
String
pt
;
// 平台类型
String
roomId
;
//房间号
String
roomId
;
// 房间号
String
nickName
;
//主播昵称
String
nickName
;
// 主播昵称
String
roomName
;
//房间名称
String
roomName
;
// 房间名称
Integer
hotNum
;
//直播间热度
Integer
hotNum
;
// 直播间热度
Integer
fans
;
// 订阅数
public
RoomInfo
(){}
public
RoomInfo
()
{
}
public
RoomInfo
(
String
pt
,
String
roomId
,
String
nickName
,
String
roomName
,
Integer
hotNum
){
public
RoomInfo
(
String
pt
,
String
roomId
,
String
nickName
,
String
roomName
,
Integer
hotNum
,
Integer
fans
)
{
this
.
pt
=
pt
;
this
.
pt
=
pt
;
this
.
roomId
=
roomId
;
this
.
roomId
=
roomId
;
this
.
nickName
=
nickName
;
this
.
nickName
=
nickName
;
this
.
roomName
=
roomName
;
this
.
roomName
=
roomName
;
this
.
hotNum
=
hotNum
;
this
.
hotNum
=
hotNum
;
this
.
fans
=
fans
;
}
}
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"new RoomInfo["
return
"new RoomInfo["
+
"pt = "
+
pt
+
", roomId = "
+
roomId
+
", roomName = "
+
roomName
+
", nickName = "
+
"pt = "
+
pt
+
nickName
+
", hotNum = "
+
hotNum
+
", fans = "
+
fans
+
"]"
;
+
", roomId = "
+
roomId
}
+
", roomName = "
+
roomName
+
", nickName = "
+
nickName
public
Integer
getFans
()
{
+
", hotNum = "
+
hotNum
return
fans
;
+
"]"
;
}
public
void
setHotNum
(
Integer
hotNum
)
{
this
.
hotNum
=
hotNum
;
}
public
void
setFans
(
Integer
fans
)
{
this
.
fans
=
fans
;
}
}
public
String
getPt
()
{
public
String
getPt
()
{
...
@@ -75,7 +83,5 @@ public class RoomInfo {
...
@@ -75,7 +83,5 @@ public class RoomInfo {
public
void
setHotNum
(
int
hotNum
)
{
public
void
setHotNum
(
int
hotNum
)
{
this
.
hotNum
=
hotNum
;
this
.
hotNum
=
hotNum
;
}
}
}
}
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliMessage.java
View file @
2a8c7a4d
...
@@ -17,6 +17,8 @@ public class BilibiliMessage {
...
@@ -17,6 +17,8 @@ public class BilibiliMessage {
String
content
;
//弹幕内容
String
content
;
//弹幕内容
String
room_id
;
//房间id
public
BilibiliMessage
(
JSONObject
json
)
throws
Exception
{
public
BilibiliMessage
(
JSONObject
json
)
throws
Exception
{
constructJson
(
json
);
constructJson
(
json
);
}
}
...
@@ -26,6 +28,7 @@ public class BilibiliMessage {
...
@@ -26,6 +28,7 @@ public class BilibiliMessage {
private
void
constructJson
(
JSONObject
json
)
throws
Exception
{
private
void
constructJson
(
JSONObject
json
)
throws
Exception
{
try
{
try
{
System
.
out
.
println
(
json
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"info"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"info"
);
messageType
=
json
.
getString
(
"cmd"
);
messageType
=
json
.
getString
(
"cmd"
);
user_id
=
jsonArray
.
getJSONArray
(
2
).
getString
(
0
);
user_id
=
jsonArray
.
getJSONArray
(
2
).
getString
(
0
);
...
@@ -43,11 +46,12 @@ public class BilibiliMessage {
...
@@ -43,11 +46,12 @@ public class BilibiliMessage {
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"new BilibiliMessage["
return
"new BilibiliMessage["
+
"user_id = "
+
user_id
+
"
user_id = "
+
user_id
+
", nickName = "
+
nickName
+
", nickName = "
+
nickName
+
", messageType = "
+
messageType
+
", messageType = "
+
messageType
+
", time = "
+
time
+
", time = "
+
time
+
", content = "
+
content
+
", content = "
+
content
+
", room_id = "
+
room_id
+
"]"
;
+
"]"
;
}
}
...
@@ -91,5 +95,13 @@ public class BilibiliMessage {
...
@@ -91,5 +95,13 @@ public class BilibiliMessage {
this
.
content
=
content
;
this
.
content
=
content
;
}
}
public
String
getRoom_id
()
{
return
room_id
;
}
public
void
setRoom_id
(
String
room_id
)
{
this
.
room_id
=
room_id
;
}
}
}
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliMessageHandler.java
View file @
2a8c7a4d
...
@@ -71,6 +71,8 @@ public class BilibiliMessageHandler extends ChannelInboundHandlerAdapter{
...
@@ -71,6 +71,8 @@ public class BilibiliMessageHandler extends ChannelInboundHandlerAdapter{
while
(
matcher
.
find
())
{
while
(
matcher
.
find
())
{
JSONObject
dataJson
=
JSONObject
.
parseObject
(
matcher
.
group
());
JSONObject
dataJson
=
JSONObject
.
parseObject
(
matcher
.
group
());
BilibiliMessage
bilibiliMessage
=
new
BilibiliMessage
(
dataJson
);
BilibiliMessage
bilibiliMessage
=
new
BilibiliMessage
(
dataJson
);
bilibiliMessage
.
setRoom_id
(
roomId
);
dataCallBack
.
onData
(
bilibiliMessage
);
dataCallBack
.
onData
(
bilibiliMessage
);
// System.out.println(bilibiliMessage);
// System.out.println(bilibiliMessage);
}
}
...
...
src/main/java/com/zhiwei/live/danmu/douyu/DouYuMessage.java
View file @
2a8c7a4d
...
@@ -42,7 +42,7 @@ public class DouYuMessage {
...
@@ -42,7 +42,7 @@ public class DouYuMessage {
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"new
Bilibili
Message["
return
"new
DouYu
Message["
+
"user_id = "
+
user_id
+
"user_id = "
+
user_id
+
", nickName = "
+
nickName
+
", nickName = "
+
nickName
+
", time = "
+
time
+
", time = "
+
time
...
...
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
View file @
2a8c7a4d
...
@@ -45,17 +45,18 @@ public class BilibiliRoomInfoCrawler {
...
@@ -45,17 +45,18 @@ public class BilibiliRoomInfoCrawler {
Integer
person_num
=
data
.
getIntValue
(
"online"
);
Integer
person_num
=
data
.
getIntValue
(
"online"
);
String
roomname
=
data
.
getString
(
"title"
);
String
roomname
=
data
.
getString
(
"title"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
Integer
fans
=
data
.
getInteger
(
"attention"
);
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
username
=
null
;
String
username
=
null
;
//通过房间id获取用户信息
//通过房间id获取用户信息
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room
_i
d
;
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room
I
d
;
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
}
}
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
@@ -67,7 +68,7 @@ public class BilibiliRoomInfoCrawler {
...
@@ -67,7 +68,7 @@ public class BilibiliRoomInfoCrawler {
public
static
RoomInfo
getRoomInfo
ByRoomUrlProxy
(
String
roomUrl
)
throws
Exception
{
public
static
RoomInfo
getRoomInfo
ProxyByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息
//判断页面中是否包含房间信息
...
@@ -85,7 +86,7 @@ public class BilibiliRoomInfoCrawler {
...
@@ -85,7 +86,7 @@ public class BilibiliRoomInfoCrawler {
String
roomname
=
data
.
getString
(
"title"
);
String
roomname
=
data
.
getString
(
"title"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
Integer
fans
=
data
.
getInteger
(
"attention"
);
String
username
=
null
;
String
username
=
null
;
//通过房间id获取用户信息
//通过房间id获取用户信息
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
...
@@ -94,7 +95,7 @@ public class BilibiliRoomInfoCrawler {
...
@@ -94,7 +95,7 @@ public class BilibiliRoomInfoCrawler {
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
}
}
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
...
src/main/java/com/zhiwei/live/roominfo/DouYuRoomInfoCrawler.java
View file @
2a8c7a4d
...
@@ -32,6 +32,7 @@ public class DouYuRoomInfoCrawler {
...
@@ -32,6 +32,7 @@ public class DouYuRoomInfoCrawler {
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
)
&&
roomBody
.
contains
(
"ROOM.room_id ="
))
{
if
(!
StringUtils
.
isBlank
(
roomBody
)
&&
roomBody
.
contains
(
"ROOM.room_id ="
))
{
String
roomId
=
roomBody
.
split
(
"ROOM\\.room_id = "
)[
1
].
split
(
"; "
)[
0
].
trim
();
String
roomId
=
roomBody
.
split
(
"ROOM\\.room_id = "
)[
1
].
split
(
"; "
)[
0
].
trim
();
//获取房间信息
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
...
@@ -40,7 +41,15 @@ public class DouYuRoomInfoCrawler {
...
@@ -40,7 +41,15 @@ public class DouYuRoomInfoCrawler {
String
user_name
=
data
.
getString
(
"owner_name"
);
String
user_name
=
data
.
getString
(
"owner_name"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
int
online
=
data
.
getInteger
(
"online"
);
int
online
=
data
.
getInteger
(
"online"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
Integer
fans
=
0
;
//获取用户信息
String
userUrl
=
"https://www.douyu.com/swf_api/h5room/"
+
roomId
;
String
userBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
userUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
userBody
))
{
JSONObject
userData
=
JSONObject
.
parseObject
(
userBody
).
getJSONObject
(
"data"
);
fans
=
Integer
.
valueOf
(
userData
.
getString
(
"fans"
));
}
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
@@ -71,7 +80,15 @@ public class DouYuRoomInfoCrawler {
...
@@ -71,7 +80,15 @@ public class DouYuRoomInfoCrawler {
String
user_name
=
data
.
getString
(
"owner_name"
);
String
user_name
=
data
.
getString
(
"owner_name"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
int
online
=
data
.
getInteger
(
"online"
);
int
online
=
data
.
getInteger
(
"online"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
Integer
fans
=
0
;
//获取用户信息
String
userUrl
=
"https://www.douyu.com/swf_api/h5room/"
+
roomId
;
String
userBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
userUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
userBody
))
{
JSONObject
userData
=
JSONObject
.
parseObject
(
userBody
).
getJSONObject
(
"data"
);
fans
=
Integer
.
valueOf
(
userData
.
getString
(
"fans"
));
}
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
...
src/main/java/com/zhiwei/live/roominfo/HuYaRoomInfoCrawler.java
View file @
2a8c7a4d
...
@@ -32,8 +32,14 @@ public class HuYaRoomInfoCrawler {
...
@@ -32,8 +32,14 @@ public class HuYaRoomInfoCrawler {
Integer
liveCount
=
Integer
.
valueOf
(
document
.
select
(
"em#live-count"
).
text
().
replaceAll
(
","
,
""
));
Integer
liveCount
=
Integer
.
valueOf
(
document
.
select
(
"em#live-count"
).
text
().
replaceAll
(
","
,
""
));
String
username
=
document
.
select
(
"h3.host-name"
).
text
();
String
username
=
document
.
select
(
"h3.host-name"
).
text
();
String
activityCount
=
document
.
select
(
"div#activityCount"
).
text
();
String
activityCount
=
document
.
select
(
"div#activityCount"
).
text
();
Integer
fans
=
0
;
try
{
fans
=
Integer
.
valueOf
(
activityCount
.
replaceAll
(
","
,
""
));
}
catch
(
Exception
e
)
{
fans
=
0
;
}
String
room_id
=
document
.
select
(
"span.host-rid"
).
text
();
String
room_id
=
document
.
select
(
"span.host-rid"
).
text
();
return
new
RoomInfo
(
PT
,
room_id
,
roomName
,
username
,
liveCount
);
return
new
RoomInfo
(
PT
,
room_id
,
roomName
,
username
,
liveCount
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
@@ -58,7 +64,13 @@ public class HuYaRoomInfoCrawler {
...
@@ -58,7 +64,13 @@ public class HuYaRoomInfoCrawler {
String
username
=
document
.
select
(
"h3.host-name"
).
text
();
String
username
=
document
.
select
(
"h3.host-name"
).
text
();
String
activityCount
=
document
.
select
(
"div#activityCount"
).
text
();
String
activityCount
=
document
.
select
(
"div#activityCount"
).
text
();
String
room_id
=
document
.
select
(
"span.host-rid"
).
text
();
String
room_id
=
document
.
select
(
"span.host-rid"
).
text
();
return
new
RoomInfo
(
PT
,
room_id
,
roomName
,
username
,
liveCount
);
Integer
fans
=
0
;
try
{
fans
=
Integer
.
valueOf
(
activityCount
.
replaceAll
(
","
,
""
));
}
catch
(
Exception
e
)
{
fans
=
0
;
}
return
new
RoomInfo
(
PT
,
room_id
,
roomName
,
username
,
liveCount
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
...
src/main/java/com/zhiwei/live/roominfo/PandamTVRoomInfoCrawler.java
View file @
2a8c7a4d
...
@@ -47,8 +47,9 @@ public class PandamTVRoomInfoCrawler {
...
@@ -47,8 +47,9 @@ public class PandamTVRoomInfoCrawler {
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"name"
);
String
username
=
hostinfo
.
getString
(
"name"
);
int
fans
=
0
;
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
}
//判断页面中是否包含房间信息,此为使用手机端直播
//判断页面中是否包含房间信息,此为使用手机端直播
else
if
(
htmlBody
.
contains
(
"window.HOSTINFO="
)){
else
if
(
htmlBody
.
contains
(
"window.HOSTINFO="
)){
...
@@ -65,8 +66,8 @@ public class PandamTVRoomInfoCrawler {
...
@@ -65,8 +66,8 @@ public class PandamTVRoomInfoCrawler {
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
int
fans
=
0
;
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
@@ -103,7 +104,8 @@ public class PandamTVRoomInfoCrawler {
...
@@ -103,7 +104,8 @@ public class PandamTVRoomInfoCrawler {
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"name"
);
String
username
=
hostinfo
.
getString
(
"name"
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
int
fans
=
0
;
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
}
//判断页面中是否包含房间信息,此为使用手机端直播
//判断页面中是否包含房间信息,此为使用手机端直播
else
if
(
htmlBody
.
contains
(
"window.HOSTINFO="
)){
else
if
(
htmlBody
.
contains
(
"window.HOSTINFO="
)){
...
@@ -120,7 +122,8 @@ public class PandamTVRoomInfoCrawler {
...
@@ -120,7 +122,8 @@ public class PandamTVRoomInfoCrawler {
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
int
fans
=
0
;
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
,
fans
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
...
src/test/java/com/zhiwei/live/test/roomInfo/RoomInfoCrawlerTest.java
View file @
2a8c7a4d
package
com
.
zhiwei
.
live
.
test
.
roomInfo
;
//
package com.zhiwei.live.test.roomInfo;
//
import
org.junit.jupiter.api.Test
;
//
import org.junit.jupiter.api.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.live.bean.RoomInfo
;
//
import com.zhiwei.live.bean.RoomInfo;
import
com.zhiwei.live.roominfo.BilibiliRoomInfoCrawler
;
//
import com.zhiwei.live.roominfo.BilibiliRoomInfoCrawler;
import
com.zhiwei.live.roominfo.DouYuRoomInfoCrawler
;
//
import com.zhiwei.live.roominfo.DouYuRoomInfoCrawler;
import
com.zhiwei.live.roominfo.HuYaRoomInfoCrawler
;
//
import com.zhiwei.live.roominfo.HuYaRoomInfoCrawler;
import
com.zhiwei.live.roominfo.PandamTVRoomInfoCrawler
;
//
import com.zhiwei.live.roominfo.PandamTVRoomInfoCrawler;
//
public
class
RoomInfoCrawlerTest
{
//
public class RoomInfoCrawlerTest {
private
static
final
String
registry
=
"zookeeper://192.168.0.36:2181"
;
//
private static final String registry = "zookeeper://192.168.0.36:2181";
private
static
final
String
group
=
"local"
;
//
private static final String group = "local";
//
static
{
//
static {
ProxyFactory
.
init
(
registry
,
group
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init(registry, group, GroupType.PROVIDER);
}
//
}
//
@Test
//
@Test
public
void
getBilibiliRoomInfoByRoomUrl
()
{
//
public void getBilibiliRoomInfoByRoomUrl() {
String
roomUrl
=
"https://live.bilibili.com/483"
;
//
String roomUrl = "https://live.bilibili.com/483";
try
{
//
try {
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoomUrl
(
roomUrl
);
//
RoomInfo roomInfo = BilibiliRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
//
System.out.println("房间信息:::"+ roomInfo);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
}
//
}
//
//
@Test
//
@Test
public
void
getDouyuRoomInfoByRoomUrl
()
{
//
public void getDouyuRoomInfoByRoomUrl() {
String
roomUrl
=
"https://www.douyu.com/4372875"
;
//
String roomUrl = "https://www.douyu.com/4372875";
try
{
//
try {
RoomInfo
roomInfo
=
DouYuRoomInfoCrawler
.
getRoomInfoByRoomUrl
(
roomUrl
);
//
RoomInfo roomInfo = DouYuRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
//
System.out.println("房间信息:::"+ roomInfo);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
}
//
}
//
//
@Test
//
@Test
public
void
getHuYaRoomInfoByRoomUrl
()
{
//
public void getHuYaRoomInfoByRoomUrl() {
String
roomUrl
=
"https://www.huya.com/baishaling"
;
//
String roomUrl = "https://www.huya.com/baishaling";
try
{
//
try {
RoomInfo
roomInfo
=
HuYaRoomInfoCrawler
.
getRoomInfoByRoomUrl
(
roomUrl
);
//
RoomInfo roomInfo = HuYaRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
//
System.out.println("房间信息:::"+ roomInfo);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
}
//
}
//
@Test
//
@Test
public
void
getPandamTVRoomInfoByRoomUrl
()
{
//
public void getPandamTVRoomInfoByRoomUrl() {
String
roomUrl
=
"https://www.panda.tv/337852"
;
//
String roomUrl = "https://www.panda.tv/337852";
try
{
//
try {
RoomInfo
roomInfo
=
PandamTVRoomInfoCrawler
.
getRoomInfoByRoomUrl
(
roomUrl
);
//
RoomInfo roomInfo = PandamTVRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
//
System.out.println("房间信息:::"+ roomInfo);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
}
//
}
}
//
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment