Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
live-crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
live-crawler
Commits
65bd8bb3
Commit
65bd8bb3
authored
Jan 29, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改更新房间信息方式,由房间id修改为地址
parent
325fad04
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
144 additions
and
58 deletions
+144
-58
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliClient.java
+1
-1
src/main/java/com/zhiwei/live/danmu/douyu/DouyuClient.java
+1
-1
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
+11
-13
src/main/java/com/zhiwei/live/roominfo/DouYuRoomInfoCrawler.java
+37
-22
src/main/java/com/zhiwei/live/roominfo/HuYaRoomInfoCrawler.java
+26
-4
src/main/java/com/zhiwei/live/roominfo/PandamTVRoomInfoCrawler.java
+56
-5
src/test/java/com/zhiwei/live/test/roomInfo/RoomInfoCrawlerTest.java
+12
-12
No files found.
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliClient.java
View file @
65bd8bb3
...
@@ -33,7 +33,7 @@ public class BilibiliClient {
...
@@ -33,7 +33,7 @@ public class BilibiliClient {
*/
*/
public
static
void
getDanmu
(
DataCallBack
dataCallBack
,
String
roomId
)
throws
Exception
{
public
static
void
getDanmu
(
DataCallBack
dataCallBack
,
String
roomId
)
throws
Exception
{
//根据房间号获取真实房间号
//根据房间号获取真实房间号
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomId
);
if
(
Objects
.
nonNull
(
roomInfo
))
{
if
(
Objects
.
nonNull
(
roomInfo
))
{
//获取弹幕服务器地址
//获取弹幕服务器地址
String
url
=
"https://live.bilibili.com/api/player?id=cid:"
+
roomInfo
.
getRoomId
();
String
url
=
"https://live.bilibili.com/api/player?id=cid:"
+
roomInfo
.
getRoomId
();
...
...
src/main/java/com/zhiwei/live/danmu/douyu/DouyuClient.java
View file @
65bd8bb3
...
@@ -33,7 +33,7 @@ public class DouyuClient {
...
@@ -33,7 +33,7 @@ public class DouyuClient {
*/
*/
public
static
void
getDanmu
(
DataCallBack
dataCallBack
,
String
roomId
)
throws
Exception
{
public
static
void
getDanmu
(
DataCallBack
dataCallBack
,
String
roomId
)
throws
Exception
{
// 根据房间号获取真实房间号
// 根据房间号获取真实房间号
RoomInfo
roomInfo
=
DouYuRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
DouYuRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomId
);
if
(
Objects
.
nonNull
(
roomInfo
))
{
if
(
Objects
.
nonNull
(
roomInfo
))
{
// 建立弹幕连接
// 建立弹幕连接
Connector
.
asynchronizedTcpConnect
(
new
NioEventLoopGroup
(),
HOST
,
PORT
,
new
IdleStateHandler
(
0
,
30
,
45
),
Connector
.
asynchronizedTcpConnect
(
new
NioEventLoopGroup
(),
HOST
,
PORT
,
new
IdleStateHandler
(
0
,
30
,
45
),
...
...
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
View file @
65bd8bb3
...
@@ -28,9 +28,8 @@ public class BilibiliRoomInfoCrawler {
...
@@ -28,9 +28,8 @@ public class BilibiliRoomInfoCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
url
=
"https://live.bilibili.com/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息
//判断页面中是否包含房间信息
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
...
@@ -46,17 +45,17 @@ public class BilibiliRoomInfoCrawler {
...
@@ -46,17 +45,17 @@ public class BilibiliRoomInfoCrawler {
Integer
person_num
=
data
.
getIntValue
(
"online"
);
Integer
person_num
=
data
.
getIntValue
(
"online"
);
String
roomname
=
data
.
getString
(
"title"
);
String
roomname
=
data
.
getString
(
"title"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
String
roomId
s
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
username
=
null
;
String
username
=
null
;
//通过房间id获取用户信息
//通过房间id获取用户信息
String
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
}
}
return
new
RoomInfo
(
PT
,
roomId
s
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
@@ -68,9 +67,8 @@ public class BilibiliRoomInfoCrawler {
...
@@ -68,9 +67,8 @@ public class BilibiliRoomInfoCrawler {
public
static
RoomInfo
getRoomInfoByRoomIdProxy
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomUrlProxy
(
String
roomUrl
)
throws
Exception
{
String
url
=
"https://live.bilibili.com/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息
//判断页面中是否包含房间信息
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
...
@@ -86,17 +84,17 @@ public class BilibiliRoomInfoCrawler {
...
@@ -86,17 +84,17 @@ public class BilibiliRoomInfoCrawler {
Integer
person_num
=
data
.
getIntValue
(
"online"
);
Integer
person_num
=
data
.
getIntValue
(
"online"
);
String
roomname
=
data
.
getString
(
"title"
);
String
roomname
=
data
.
getString
(
"title"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
String
roomId
s
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
roomId
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
username
=
null
;
String
username
=
null
;
//通过房间id获取用户信息
//通过房间id获取用户信息
String
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
}
}
return
new
RoomInfo
(
PT
,
roomId
s
,
roomname
,
username
,
person_num
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
...
...
src/main/java/com/zhiwei/live/roominfo/DouYuRoomInfoCrawler.java
View file @
65bd8bb3
...
@@ -27,18 +27,26 @@ public class DouYuRoomInfoCrawler {
...
@@ -27,18 +27,26 @@ public class DouYuRoomInfoCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
roomBody
)
&&
roomBody
.
contains
(
"ROOM.room_id ="
))
{
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
String
roomId
=
roomBody
.
split
(
"ROOM\\.room_id = "
)[
1
].
split
(
"; "
)[
0
].
trim
();
String
room_name
=
data
.
getString
(
"room_name"
);
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
user_name
=
data
.
getString
(
"owner_name"
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
Integer
hn
=
data
.
getInteger
(
"hn"
);
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
int
online
=
data
.
getInteger
(
"online"
);
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
String
room_name
=
data
.
getString
(
"room_name"
);
String
user_name
=
data
.
getString
(
"owner_name"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
int
online
=
data
.
getInteger
(
"online"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
}
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
html
Body
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
room
Body
);
return
null
;
return
null
;
}
}
}
}
...
@@ -50,18 +58,25 @@ public class DouYuRoomInfoCrawler {
...
@@ -50,18 +58,25 @@ public class DouYuRoomInfoCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
RoomInfo
getRoomInfoProxyByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoProxyByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
)
&&
roomBody
.
contains
(
"ROOM.room_id ="
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
String
roomId
=
roomBody
.
split
(
"ROOM\\.room_id = "
)[
1
].
split
(
"; "
)[
0
].
trim
();
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
String
url
=
"http://open.douyucdn.cn/api/RoomApi/room/"
+
roomId
;
String
room_name
=
data
.
getString
(
"room_name"
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
user_name
=
data
.
getString
(
"owner_name"
);
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
Integer
hn
=
data
.
getInteger
(
"hn"
);
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
int
online
=
data
.
getInteger
(
"online"
);
String
room_name
=
data
.
getString
(
"room_name"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
String
user_name
=
data
.
getString
(
"owner_name"
);
Integer
hn
=
data
.
getInteger
(
"hn"
);
int
online
=
data
.
getInteger
(
"online"
);
return
new
RoomInfo
(
PT
,
roomId
,
room_name
,
user_name
,
hn
);
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
}
}
else
{
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
html
Body
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
room
Body
);
return
null
;
return
null
;
}
}
}
}
...
...
src/main/java/com/zhiwei/live/roominfo/HuYaRoomInfoCrawler.java
View file @
65bd8bb3
...
@@ -24,9 +24,8 @@ public class HuYaRoomInfoCrawler {
...
@@ -24,9 +24,8 @@ public class HuYaRoomInfoCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
url
=
"http://www.huya.com/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
String
roomName
=
document
.
select
(
"h1#J_roomTitle"
).
text
();
String
roomName
=
document
.
select
(
"h1#J_roomTitle"
).
text
();
...
@@ -39,7 +38,30 @@ public class HuYaRoomInfoCrawler {
...
@@ -39,7 +38,30 @@ public class HuYaRoomInfoCrawler {
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
return
null
;
}
}
}
}
/**
* 根据房间id获取房间信息
*
* @param roomId
* @return
* @throws Exception
*/
public
static
RoomInfo
getRoomInfoProxyByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
String
roomName
=
document
.
select
(
"h1#J_roomTitle"
).
text
();
Integer
liveCount
=
Integer
.
valueOf
(
document
.
select
(
"em#live-count"
).
text
().
replaceAll
(
","
,
""
));
String
username
=
document
.
select
(
"h3.host-name"
).
text
();
String
activityCount
=
document
.
select
(
"div#activityCount"
).
text
();
String
room_id
=
document
.
select
(
"span.host-rid"
).
text
();
return
new
RoomInfo
(
PT
,
room_id
,
roomName
,
username
,
liveCount
);
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
}
}
}
}
src/main/java/com/zhiwei/live/roominfo/PandamTVRoomInfoCrawler.java
View file @
65bd8bb3
...
@@ -27,9 +27,8 @@ public class PandamTVRoomInfoCrawler {
...
@@ -27,9 +27,8 @@ public class PandamTVRoomInfoCrawler {
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
url
=
"https://www.panda.tv/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
)).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息,此为pc端直播
//判断页面中是否包含房间信息,此为pc端直播
if
(
htmlBody
.
contains
(
"window._config_roominfo = "
))
{
if
(
htmlBody
.
contains
(
"window._config_roominfo = "
))
{
...
@@ -39,11 +38,11 @@ public class PandamTVRoomInfoCrawler {
...
@@ -39,11 +38,11 @@ public class PandamTVRoomInfoCrawler {
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
)
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
)
.
replaceAll
(
"\"param\":\""
,
"\"param\":"
).
replaceAll
(
"}\","
,
"},"
);
//
.
replaceAll
(
"\"param\":\""
,
"\"param\":"
).
replaceAll
(
"}\","
,
"},"
);
//
//解析json数据
//解析json数据
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
callbackParam
=
json
.
getJSONObject
(
"callbackParam"
);
JSONObject
callbackParam
=
json
.
getJSONObject
(
"callbackParam"
);
Integer
person_num
=
callbackParam
.
getJSONObject
(
"param"
).
getIntValue
(
"person_num"
);
Integer
person_num
=
callbackParam
.
getJSONObject
(
"param"
).
getIntValue
(
"person_num"
);
String
roomId
=
callbackParam
.
getJSONObject
(
"param"
).
getString
(
"roomid"
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
...
@@ -58,11 +57,11 @@ public class PandamTVRoomInfoCrawler {
...
@@ -58,11 +57,11 @@ public class PandamTVRoomInfoCrawler {
htmlBody
=
ZhiWeiTools
.
decodeUnicode
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
decodeUnicode
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
);
//解析json数据
//解析json数据
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
Integer
person_num
=
roominfo
.
getIntValue
(
"personnum"
);
Integer
person_num
=
roominfo
.
getIntValue
(
"personnum"
);
String
roomId
=
roominfo
.
getString
(
"roomid"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
...
@@ -77,5 +76,57 @@ public class PandamTVRoomInfoCrawler {
...
@@ -77,5 +76,57 @@ public class PandamTVRoomInfoCrawler {
}
}
/**
* 根据房间id获取房间信息
* @param roomId
* @return
* @throws Exception
*/
public
static
RoomInfo
getRoomInfoProxyByRoomUrl
(
String
roomUrl
)
throws
Exception
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息,此为pc端直播
if
(
htmlBody
.
contains
(
"window._config_roominfo = "
))
{
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody
=
htmlBody
.
split
(
"window._config_roominfo = "
)[
1
].
split
(
"} };"
)[
0
]+
"} }"
;
htmlBody
=
ZhiWeiTools
.
decodeUnicode
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
)
.
replaceAll
(
"\"param\":\""
,
"\"param\":"
).
replaceAll
(
"}\","
,
"},"
);
//
//解析json数据
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
callbackParam
=
json
.
getJSONObject
(
"callbackParam"
);
Integer
person_num
=
callbackParam
.
getJSONObject
(
"param"
).
getIntValue
(
"person_num"
);
String
roomId
=
callbackParam
.
getJSONObject
(
"param"
).
getString
(
"roomid"
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"name"
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
}
//判断页面中是否包含房间信息,此为使用手机端直播
else
if
(
htmlBody
.
contains
(
"window.HOSTINFO="
)){
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody
=
htmlBody
.
split
(
"window.HOSTINFO="
)[
1
].
split
(
";</script>"
)[
0
];
htmlBody
=
ZhiWeiTools
.
decodeUnicode
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
);
//解析json数据
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
roominfo
=
json
.
getJSONObject
(
"roominfo"
);
Integer
person_num
=
roominfo
.
getIntValue
(
"personnum"
);
String
roomId
=
roominfo
.
getString
(
"roomid"
);
String
roomname
=
roominfo
.
getString
(
"name"
);
JSONObject
hostinfo
=
json
.
getJSONObject
(
"hostinfo"
);
String
username
=
hostinfo
.
getString
(
"nickName"
);
return
new
RoomInfo
(
PT
,
roomId
,
roomname
,
username
,
person_num
);
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
}
}
return
null
;
}
}
}
src/test/java/com/zhiwei/live/test/roomInfo/RoomInfoCrawlerTest.java
View file @
65bd8bb3
...
@@ -19,10 +19,10 @@ public class RoomInfoCrawlerTest {
...
@@ -19,10 +19,10 @@ public class RoomInfoCrawlerTest {
}
}
@Test
@Test
public
void
getBilibiliRoomInfoByRoom
Id
()
{
public
void
getBilibiliRoomInfoByRoom
Url
()
{
String
room
Id
=
"24
"
;
String
room
Url
=
"https://live.bilibili.com/483
"
;
try
{
try
{
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomUrl
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
@@ -31,10 +31,10 @@ public class RoomInfoCrawlerTest {
...
@@ -31,10 +31,10 @@ public class RoomInfoCrawlerTest {
@Test
@Test
public
void
getDouyuRoomInfoByRoom
Id
()
{
public
void
getDouyuRoomInfoByRoom
Url
()
{
String
room
Id
=
"5723238
"
;
String
room
Url
=
"https://www.douyu.com/topic/LEC
"
;
try
{
try
{
RoomInfo
roomInfo
=
DouYuRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
DouYuRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomUrl
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
@@ -43,10 +43,10 @@ public class RoomInfoCrawlerTest {
...
@@ -43,10 +43,10 @@ public class RoomInfoCrawlerTest {
@Test
@Test
public
void
getHuYaRoomInfoByRoom
Id
()
{
public
void
getHuYaRoomInfoByRoom
Url
()
{
String
room
Id
=
"blizzardgame1
"
;
String
room
Url
=
"https://www.huya.com/baishaling
"
;
try
{
try
{
RoomInfo
roomInfo
=
HuYaRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
HuYaRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomUrl
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
@@ -54,10 +54,10 @@ public class RoomInfoCrawlerTest {
...
@@ -54,10 +54,10 @@ public class RoomInfoCrawlerTest {
}
}
@Test
@Test
public
void
getPandamTVRoomInfoByRoom
Id
()
{
public
void
getPandamTVRoomInfoByRoom
Url
()
{
String
room
Id
=
"1564821
"
;
String
room
Url
=
"https://www.panda.tv/337852
"
;
try
{
try
{
RoomInfo
roomInfo
=
PandamTVRoomInfoCrawler
.
getRoomInfoByRoom
Id
(
roomId
);
RoomInfo
roomInfo
=
PandamTVRoomInfoCrawler
.
getRoomInfoByRoom
Url
(
roomUrl
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
System
.
out
.
println
(
"房间信息:::"
+
roomInfo
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment