Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
live-crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
live-crawler
Commits
c472b5c4
Commit
c472b5c4
authored
Jan 29, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
根据房间链接获取真实房间id
parent
66faa6d8
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
92 additions
and
19 deletions
+92
-19
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliClient.java
+50
-12
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliMessage.java
+4
-4
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
+38
-3
No files found.
src/main/java/com/zhiwei/live/danmu/bilibili/BilibiliClient.java
View file @
c472b5c4
package
com
.
zhiwei
.
live
.
danmu
.
bilibili
;
package
com
.
zhiwei
.
live
.
danmu
.
bilibili
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.concurrent.TimeUnit
;
import
java.util.concurrent.TimeUnit
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.live.bean.RoomInfo
;
import
com.zhiwei.live.danmu.util.Connector
;
import
com.zhiwei.live.danmu.util.Connector
;
import
com.zhiwei.live.roominfo.BilibiliRoomInfoCrawler
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
io.netty.channel.nio.NioEventLoopGroup
;
import
io.netty.channel.nio.NioEventLoopGroup
;
import
io.netty.handler.timeout.IdleStateHandler
;
import
io.netty.handler.timeout.IdleStateHandler
;
...
@@ -17,29 +26,58 @@ import okhttp3.Response;
...
@@ -17,29 +26,58 @@ import okhttp3.Response;
public
class
BilibiliClient
{
public
class
BilibiliClient
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
Logger
logger
=
LogManager
.
getLogger
(
BilibiliClient
.
class
);
private
static
final
int
PORT
=
2243
;
public
static
void
main
(
String
[]
args
)
throws
InterruptedException
{
public
static
void
main
(
String
[]
args
)
throws
InterruptedException
{
String
roomUrl
=
"https://live.bilibili.com/439"
;
String
roomId
=
roomUrl
.
replaceAll
(
"https://live.bilibili.com/"
,
""
);
try
{
getDanmu
(
roomId
);
List
<
BilibiliMessage
>
messagesList
=
BilibiliMessageListener
.
messages
;
while
(
true
)
{
try
{
BilibiliMessage
messages
=
messagesList
.
remove
(
0
);
System
.
out
.
println
(
messages
);
}
catch
(
Exception
e
)
{
break
;
}
ZhiWeiTools
.
sleep
(
50
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
// String address = "62.234.213.204";
// int port = 2243;
String
roomId
=
"1029"
;
}
Request
request
=
RequestUtils
.
wrapGet
(
"https://live.bilibili.com/api/player?id=cid:"
+
roomId
);
String
host
=
null
;
int
port
=
2243
;
/**
* 根据房间号获取弹幕信息
* @param roomId
* @throws Exception
*/
public
static
void
getDanmu
(
String
roomId
)
throws
Exception
{
//根据房间号获取真实房间号
RoomInfo
roomInfo
=
BilibiliRoomInfoCrawler
.
getRoomInfoByRoomId
(
roomId
);
if
(
Objects
.
nonNull
(
roomInfo
))
{
//获取弹幕服务器地址
String
url
=
"https://live.bilibili.com/api/player?id=cid:"
+
roomInfo
.
getRoomId
();
Request
request
=
RequestUtils
.
wrapGet
(
url
);
String
host
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
Document
doc
=
Jsoup
.
parse
(
response
.
body
().
string
());
Document
doc
=
Jsoup
.
parse
(
response
.
body
().
string
());
String
[]
address
=
doc
.
select
(
"dm_server_list"
).
text
().
split
(
","
);
String
[]
address
=
doc
.
select
(
"dm_server_list"
).
text
().
split
(
","
);
host
=
address
[
0
];
host
=
address
[
0
];
System
.
out
.
println
(
host
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
throw
new
IllegalArgumentException
(
"获取聊天服务器地址失败"
,
e
);
throw
new
IllegalArgumentException
(
"获取聊天服务器地址失败"
,
e
);
}
}
Connector
.
asynchronizedTcpConnect
(
new
NioEventLoopGroup
(),
host
,
port
,
//建立弹幕连接
new
IdleStateHandler
(
0
,
30
,
0
,
TimeUnit
.
SECONDS
),
new
BilibiliMessageHandler
(
"1029"
)).
sync
();
Connector
.
asynchronizedTcpConnect
(
new
NioEventLoopGroup
(),
host
,
PORT
,
new
IdleStateHandler
(
0
,
30
,
0
,
TimeUnit
.
SECONDS
),
new
BilibiliMessageHandler
(
roomInfo
.
getRoomId
())).
sync
();
}
else
{
logger
.
info
(
"获取真实房间号出现问题,请及时检查程序"
);
}
}
}
}
}
src/main/java/com/zhiwei/live/danmu/bilibili/Bilibili
Entity
.java
→
src/main/java/com/zhiwei/live/danmu/bilibili/Bilibili
Message
.java
View file @
c472b5c4
...
@@ -5,7 +5,7 @@ import java.util.Date;
...
@@ -5,7 +5,7 @@ import java.util.Date;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
public
class
Bilibili
Entity
{
public
class
Bilibili
Message
{
String
messageType
;
//弹幕消息类型
String
messageType
;
//弹幕消息类型
...
@@ -17,11 +17,11 @@ public class BilibiliEntity {
...
@@ -17,11 +17,11 @@ public class BilibiliEntity {
String
content
;
//弹幕内容
String
content
;
//弹幕内容
public
Bilibili
Entity
(
JSONObject
json
)
throws
Exception
{
public
Bilibili
Message
(
JSONObject
json
)
throws
Exception
{
constructJson
(
json
);
constructJson
(
json
);
}
}
public
Bilibili
Entity
(){
public
Bilibili
Message
(){
}
}
private
void
constructJson
(
JSONObject
json
)
throws
Exception
{
private
void
constructJson
(
JSONObject
json
)
throws
Exception
{
...
@@ -42,7 +42,7 @@ public class BilibiliEntity {
...
@@ -42,7 +42,7 @@ public class BilibiliEntity {
@Override
@Override
public
String
toString
()
{
public
String
toString
()
{
return
"new
Pandam
Message["
return
"new
Bilibili
Message["
+
"user_id = "
+
user_id
+
"user_id = "
+
user_id
+
", nickName = "
+
nickName
+
", nickName = "
+
nickName
+
", messageType = "
+
messageType
+
", messageType = "
+
messageType
...
...
src/main/java/com/zhiwei/live/roominfo/BilibiliRoomInfoCrawler.java
View file @
c472b5c4
package
com
.
zhiwei
.
live
.
roominfo
;
package
com
.
zhiwei
.
live
.
roominfo
;
import
java.io.IOException
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
...
@@ -32,7 +30,7 @@ public class BilibiliRoomInfoCrawler {
...
@@ -32,7 +30,7 @@ public class BilibiliRoomInfoCrawler {
*/
*/
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
public
static
RoomInfo
getRoomInfoByRoomId
(
String
roomId
)
throws
Exception
{
String
url
=
"https://live.bilibili.com/"
+
roomId
;
String
url
=
"https://live.bilibili.com/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)
,
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息
//判断页面中是否包含房间信息
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
...
@@ -70,5 +68,42 @@ public class BilibiliRoomInfoCrawler {
...
@@ -70,5 +68,42 @@ public class BilibiliRoomInfoCrawler {
public
static
RoomInfo
getRoomInfoByRoomIdProxy
(
String
roomId
)
throws
Exception
{
String
url
=
"https://live.bilibili.com/"
+
roomId
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
htmlBody
))
{
//判断页面中是否包含房间信息
if
(
htmlBody
.
contains
(
"window.__NEPTUNE_IS_MY_WAIFU__="
))
{
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody
=
htmlBody
.
split
(
"<script>window.__NEPTUNE_IS_MY_WAIFU__="
)[
1
].
split
(
"</script>"
)[
0
];
htmlBody
=
ZhiWeiTools
.
decodeUnicode
(
htmlBody
);
htmlBody
=
ZhiWeiTools
.
delHTMLTag
(
htmlBody
);
htmlBody
=
htmlBody
.
replaceAll
(
"\\\\"
,
""
).
replaceAll
(
"'"
,
"\""
);
//解析json数据
JSONObject
baseInfoRes
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
data
=
baseInfoRes
.
getJSONObject
(
"baseInfoRes"
).
getJSONObject
(
"data"
);
Integer
person_num
=
data
.
getIntValue
(
"online"
);
String
roomname
=
data
.
getString
(
"title"
);
Integer
room_id
=
data
.
getInteger
(
"room_id"
);
String
roomIds
=
room_id
!=
null
?
room_id
.
toString
():
null
;
String
username
=
null
;
//通过房间id获取用户信息
String
roomUrl
=
"https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="
+
room_id
;
String
roomBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
roomUrl
),
ProxyHolder
.
NAT_PROXY
).
body
().
string
();
if
(!
StringUtils
.
isBlank
(
roomBody
))
{
JSONObject
roomData
=
JSONObject
.
parseObject
(
roomBody
).
getJSONObject
(
"data"
);
username
=
roomData
.
getJSONObject
(
"info"
).
getString
(
"uname"
);
}
return
new
RoomInfo
(
PT
,
roomIds
,
roomname
,
username
,
person_num
);
}
else
{
logger
.
info
(
"此次采集页面中不包含房间信息字段, 此次页面信息为:{}"
,
htmlBody
);
return
null
;
}
}
return
null
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment