Commit 2a8c7a4d by zhiwei

斗鱼、虎牙、熊猫TV数据采集

parent 7c521742
package com.zhiwei.live.bean; package com.zhiwei.live.bean;
public class RoomInfo { public class RoomInfo {
String pt; //平台类型 String pt; // 平台类型
String roomId; //房间号 String roomId; // 房间号
String nickName; //主播昵称 String nickName; // 主播昵称
String roomName; //房间名称 String roomName; // 房间名称
Integer hotNum; //直播间热度 Integer hotNum; // 直播间热度
Integer fans; // 订阅数
public RoomInfo(){}
public RoomInfo() {
}
public RoomInfo(String pt, String roomId, String nickName, String roomName, Integer hotNum){
public RoomInfo(String pt, String roomId, String nickName, String roomName, Integer hotNum, Integer fans) {
this.pt = pt; this.pt = pt;
this.roomId = roomId; this.roomId = roomId;
this.nickName = nickName; this.nickName = nickName;
this.roomName = roomName; this.roomName = roomName;
this.hotNum = hotNum; this.hotNum = hotNum;
this.fans = fans;
} }
@Override @Override
public String toString() { public String toString() {
return "new RoomInfo[" return "new RoomInfo[" + "pt = " + pt + ", roomId = " + roomId + ", roomName = " + roomName + ", nickName = "
+ "pt = " + pt + nickName + ", hotNum = " + hotNum + ", fans = " + fans + "]";
+ ", roomId = " + roomId }
+ ", roomName = " + roomName
+ ", nickName = " + nickName public Integer getFans() {
+ ", hotNum = " + hotNum return fans;
+ "]"; }
public void setHotNum(Integer hotNum) {
this.hotNum = hotNum;
}
public void setFans(Integer fans) {
this.fans = fans;
} }
public String getPt() { public String getPt() {
...@@ -75,7 +83,5 @@ public class RoomInfo { ...@@ -75,7 +83,5 @@ public class RoomInfo {
public void setHotNum(int hotNum) { public void setHotNum(int hotNum) {
this.hotNum = hotNum; this.hotNum = hotNum;
} }
} }
...@@ -17,6 +17,8 @@ public class BilibiliMessage { ...@@ -17,6 +17,8 @@ public class BilibiliMessage {
String content; //弹幕内容 String content; //弹幕内容
String room_id; //房间id
public BilibiliMessage(JSONObject json) throws Exception { public BilibiliMessage(JSONObject json) throws Exception {
constructJson(json); constructJson(json);
} }
...@@ -26,6 +28,7 @@ public class BilibiliMessage { ...@@ -26,6 +28,7 @@ public class BilibiliMessage {
private void constructJson(JSONObject json) throws Exception{ private void constructJson(JSONObject json) throws Exception{
try { try {
System.out.println(json);
JSONArray jsonArray = json.getJSONArray("info"); JSONArray jsonArray = json.getJSONArray("info");
messageType = json.getString("cmd"); messageType = json.getString("cmd");
user_id = jsonArray.getJSONArray(2).getString(0); user_id = jsonArray.getJSONArray(2).getString(0);
...@@ -43,11 +46,12 @@ public class BilibiliMessage { ...@@ -43,11 +46,12 @@ public class BilibiliMessage {
@Override @Override
public String toString() { public String toString() {
return "new BilibiliMessage[" return "new BilibiliMessage["
+ "user_id = " + user_id + " user_id = " + user_id
+ ", nickName = " + nickName + ", nickName = " + nickName
+ ", messageType = " + messageType + ", messageType = " + messageType
+ ", time = " + time + ", time = " + time
+ ", content = " + content + ", content = " + content
+ ", room_id = " + room_id
+ "]"; + "]";
} }
...@@ -91,5 +95,13 @@ public class BilibiliMessage { ...@@ -91,5 +95,13 @@ public class BilibiliMessage {
this.content = content; this.content = content;
} }
public String getRoom_id() {
return room_id;
}
public void setRoom_id(String room_id) {
this.room_id = room_id;
}
} }
...@@ -71,6 +71,8 @@ public class BilibiliMessageHandler extends ChannelInboundHandlerAdapter{ ...@@ -71,6 +71,8 @@ public class BilibiliMessageHandler extends ChannelInboundHandlerAdapter{
while(matcher.find()) { while(matcher.find()) {
JSONObject dataJson = JSONObject.parseObject(matcher.group()); JSONObject dataJson = JSONObject.parseObject(matcher.group());
BilibiliMessage bilibiliMessage = new BilibiliMessage(dataJson); BilibiliMessage bilibiliMessage = new BilibiliMessage(dataJson);
bilibiliMessage.setRoom_id(roomId);
dataCallBack.onData(bilibiliMessage); dataCallBack.onData(bilibiliMessage);
// System.out.println(bilibiliMessage); // System.out.println(bilibiliMessage);
} }
......
...@@ -42,7 +42,7 @@ public class DouYuMessage { ...@@ -42,7 +42,7 @@ public class DouYuMessage {
@Override @Override
public String toString() { public String toString() {
return "new BilibiliMessage[" return "new DouYuMessage["
+ "user_id = " + user_id + "user_id = " + user_id
+ ", nickName = " + nickName + ", nickName = " + nickName
+ ", time = " + time + ", time = " + time
......
...@@ -45,17 +45,18 @@ public class BilibiliRoomInfoCrawler { ...@@ -45,17 +45,18 @@ public class BilibiliRoomInfoCrawler {
Integer person_num = data.getIntValue("online"); Integer person_num = data.getIntValue("online");
String roomname = data.getString("title"); String roomname = data.getString("title");
Integer room_id = data.getInteger("room_id"); Integer room_id = data.getInteger("room_id");
Integer fans = data.getInteger("attention");
String roomId = room_id!=null?room_id.toString():null; String roomId = room_id!=null?room_id.toString():null;
String username = null; String username = null;
//通过房间id获取用户信息 //通过房间id获取用户信息
roomUrl = "https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="+room_id; roomUrl = "https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="+roomId;
String roomBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl)).body().string(); String roomBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl)).body().string();
if(!StringUtils.isBlank(roomBody)) { if(!StringUtils.isBlank(roomBody)) {
JSONObject roomData = JSONObject.parseObject(roomBody).getJSONObject("data"); JSONObject roomData = JSONObject.parseObject(roomBody).getJSONObject("data");
username = roomData.getJSONObject("info").getString("uname"); username = roomData.getJSONObject("info").getString("uname");
} }
return new RoomInfo(PT, roomId, roomname, username, person_num); return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
...@@ -67,7 +68,7 @@ public class BilibiliRoomInfoCrawler { ...@@ -67,7 +68,7 @@ public class BilibiliRoomInfoCrawler {
public static RoomInfo getRoomInfoByRoomUrlProxy(String roomUrl) throws Exception{ public static RoomInfo getRoomInfoProxyByRoomUrl(String roomUrl) throws Exception{
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl), ProxyHolder.NAT_PROXY).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl), ProxyHolder.NAT_PROXY).body().string();
if(!StringUtils.isBlank(htmlBody)) { if(!StringUtils.isBlank(htmlBody)) {
//判断页面中是否包含房间信息 //判断页面中是否包含房间信息
...@@ -85,7 +86,7 @@ public class BilibiliRoomInfoCrawler { ...@@ -85,7 +86,7 @@ public class BilibiliRoomInfoCrawler {
String roomname = data.getString("title"); String roomname = data.getString("title");
Integer room_id = data.getInteger("room_id"); Integer room_id = data.getInteger("room_id");
String roomId = room_id!=null?room_id.toString():null; String roomId = room_id!=null?room_id.toString():null;
Integer fans = data.getInteger("attention");
String username = null; String username = null;
//通过房间id获取用户信息 //通过房间id获取用户信息
roomUrl = "https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="+room_id; roomUrl = "https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="+room_id;
...@@ -94,7 +95,7 @@ public class BilibiliRoomInfoCrawler { ...@@ -94,7 +95,7 @@ public class BilibiliRoomInfoCrawler {
JSONObject roomData = JSONObject.parseObject(roomBody).getJSONObject("data"); JSONObject roomData = JSONObject.parseObject(roomBody).getJSONObject("data");
username = roomData.getJSONObject("info").getString("uname"); username = roomData.getJSONObject("info").getString("uname");
} }
return new RoomInfo(PT, roomId, roomname, username, person_num); return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
......
...@@ -32,6 +32,7 @@ public class DouYuRoomInfoCrawler { ...@@ -32,6 +32,7 @@ public class DouYuRoomInfoCrawler {
String roomBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl)).body().string(); String roomBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl)).body().string();
if(!StringUtils.isBlank(roomBody) && roomBody.contains("ROOM.room_id =")) { if(!StringUtils.isBlank(roomBody) && roomBody.contains("ROOM.room_id =")) {
String roomId = roomBody.split("ROOM\\.room_id = ")[1].split("; ")[0].trim(); String roomId = roomBody.split("ROOM\\.room_id = ")[1].split("; ")[0].trim();
//获取房间信息
String url = "http://open.douyucdn.cn/api/RoomApi/room/" + roomId; String url = "http://open.douyucdn.cn/api/RoomApi/room/" + roomId;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(!StringUtils.isBlank(htmlBody)) { if(!StringUtils.isBlank(htmlBody)) {
...@@ -40,7 +41,15 @@ public class DouYuRoomInfoCrawler { ...@@ -40,7 +41,15 @@ public class DouYuRoomInfoCrawler {
String user_name = data.getString("owner_name"); String user_name = data.getString("owner_name");
Integer hn = data.getInteger("hn"); Integer hn = data.getInteger("hn");
int online = data.getInteger("online"); int online = data.getInteger("online");
return new RoomInfo(PT, roomId, room_name, user_name , hn); Integer fans = 0;
//获取用户信息
String userUrl = "https://www.douyu.com/swf_api/h5room/" + roomId;
String userBody = httpBoot.syncCall(RequestUtils.wrapGet(userUrl)).body().string();
if(!StringUtils.isBlank(userBody)) {
JSONObject userData = JSONObject.parseObject(userBody).getJSONObject("data");
fans = Integer.valueOf(userData.getString("fans"));
}
return new RoomInfo(PT, roomId, room_name, user_name , hn, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
...@@ -71,7 +80,15 @@ public class DouYuRoomInfoCrawler { ...@@ -71,7 +80,15 @@ public class DouYuRoomInfoCrawler {
String user_name = data.getString("owner_name"); String user_name = data.getString("owner_name");
Integer hn = data.getInteger("hn"); Integer hn = data.getInteger("hn");
int online = data.getInteger("online"); int online = data.getInteger("online");
return new RoomInfo(PT, roomId, room_name, user_name , hn); Integer fans = 0;
//获取用户信息
String userUrl = "https://www.douyu.com/swf_api/h5room/" + roomId;
String userBody = httpBoot.syncCall(RequestUtils.wrapGet(userUrl)).body().string();
if(!StringUtils.isBlank(userBody)) {
JSONObject userData = JSONObject.parseObject(userBody).getJSONObject("data");
fans = Integer.valueOf(userData.getString("fans"));
}
return new RoomInfo(PT, roomId, room_name, user_name , hn, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
......
...@@ -32,8 +32,14 @@ public class HuYaRoomInfoCrawler { ...@@ -32,8 +32,14 @@ public class HuYaRoomInfoCrawler {
Integer liveCount = Integer.valueOf(document.select("em#live-count").text().replaceAll(",", "")); Integer liveCount = Integer.valueOf(document.select("em#live-count").text().replaceAll(",", ""));
String username = document.select("h3.host-name").text(); String username = document.select("h3.host-name").text();
String activityCount = document.select("div#activityCount").text(); String activityCount = document.select("div#activityCount").text();
Integer fans = 0;
try {
fans = Integer.valueOf(activityCount.replaceAll(",", ""));
} catch (Exception e) {
fans = 0;
}
String room_id = document.select("span.host-rid").text(); String room_id = document.select("span.host-rid").text();
return new RoomInfo(PT, room_id, roomName, username, liveCount); return new RoomInfo(PT, room_id, roomName, username, liveCount, fans);
} else { } else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
...@@ -58,7 +64,13 @@ public class HuYaRoomInfoCrawler { ...@@ -58,7 +64,13 @@ public class HuYaRoomInfoCrawler {
String username = document.select("h3.host-name").text(); String username = document.select("h3.host-name").text();
String activityCount = document.select("div#activityCount").text(); String activityCount = document.select("div#activityCount").text();
String room_id = document.select("span.host-rid").text(); String room_id = document.select("span.host-rid").text();
return new RoomInfo(PT, room_id, roomName, username, liveCount); Integer fans = 0;
try {
fans = Integer.valueOf(activityCount.replaceAll(",", ""));
} catch (Exception e) {
fans = 0;
}
return new RoomInfo(PT, room_id, roomName, username, liveCount, fans);
} else { } else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
......
...@@ -47,8 +47,9 @@ public class PandamTVRoomInfoCrawler { ...@@ -47,8 +47,9 @@ public class PandamTVRoomInfoCrawler {
String roomname = roominfo.getString("name"); String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo"); JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("name"); String username = hostinfo.getString("name");
int fans = 0;
return new RoomInfo(PT, roomId, roomname, username, person_num);
return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
} }
//判断页面中是否包含房间信息,此为使用手机端直播 //判断页面中是否包含房间信息,此为使用手机端直播
else if(htmlBody.contains("window.HOSTINFO=")){ else if(htmlBody.contains("window.HOSTINFO=")){
...@@ -65,8 +66,8 @@ public class PandamTVRoomInfoCrawler { ...@@ -65,8 +66,8 @@ public class PandamTVRoomInfoCrawler {
String roomname = roominfo.getString("name"); String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo"); JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("nickName"); String username = hostinfo.getString("nickName");
int fans = 0;
return new RoomInfo(PT, roomId, roomname, username, person_num); return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
...@@ -103,7 +104,8 @@ public class PandamTVRoomInfoCrawler { ...@@ -103,7 +104,8 @@ public class PandamTVRoomInfoCrawler {
String roomname = roominfo.getString("name"); String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo"); JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("name"); String username = hostinfo.getString("name");
return new RoomInfo(PT, roomId, roomname, username, person_num); int fans = 0;
return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
} }
//判断页面中是否包含房间信息,此为使用手机端直播 //判断页面中是否包含房间信息,此为使用手机端直播
else if(htmlBody.contains("window.HOSTINFO=")){ else if(htmlBody.contains("window.HOSTINFO=")){
...@@ -120,7 +122,8 @@ public class PandamTVRoomInfoCrawler { ...@@ -120,7 +122,8 @@ public class PandamTVRoomInfoCrawler {
String roomname = roominfo.getString("name"); String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo"); JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("nickName"); String username = hostinfo.getString("nickName");
return new RoomInfo(PT, roomId, roomname, username, person_num); int fans = 0;
return new RoomInfo(PT, roomId, roomname, username, person_num, fans);
}else { }else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody); logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null; return null;
......
package com.zhiwei.live.test.roomInfo; //package com.zhiwei.live.test.roomInfo;
//
import org.junit.jupiter.api.Test; //import org.junit.jupiter.api.Test;
//
import com.zhiwei.common.config.GroupType; //import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.live.bean.RoomInfo; //import com.zhiwei.live.bean.RoomInfo;
import com.zhiwei.live.roominfo.BilibiliRoomInfoCrawler; //import com.zhiwei.live.roominfo.BilibiliRoomInfoCrawler;
import com.zhiwei.live.roominfo.DouYuRoomInfoCrawler; //import com.zhiwei.live.roominfo.DouYuRoomInfoCrawler;
import com.zhiwei.live.roominfo.HuYaRoomInfoCrawler; //import com.zhiwei.live.roominfo.HuYaRoomInfoCrawler;
import com.zhiwei.live.roominfo.PandamTVRoomInfoCrawler; //import com.zhiwei.live.roominfo.PandamTVRoomInfoCrawler;
//
public class RoomInfoCrawlerTest { //public class RoomInfoCrawlerTest {
private static final String registry = "zookeeper://192.168.0.36:2181"; // private static final String registry = "zookeeper://192.168.0.36:2181";
private static final String group = "local"; // private static final String group = "local";
//
static { // static {
ProxyFactory.init(registry, group, GroupType.PROVIDER); // ProxyFactory.init(registry, group, GroupType.PROVIDER);
} // }
//
@Test // @Test
public void getBilibiliRoomInfoByRoomUrl() { // public void getBilibiliRoomInfoByRoomUrl() {
String roomUrl = "https://live.bilibili.com/483"; // String roomUrl = "https://live.bilibili.com/483";
try { // try {
RoomInfo roomInfo = BilibiliRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl); // RoomInfo roomInfo = BilibiliRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System.out.println("房间信息:::"+ roomInfo); // System.out.println("房间信息:::"+ roomInfo);
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
//
//
@Test // @Test
public void getDouyuRoomInfoByRoomUrl() { // public void getDouyuRoomInfoByRoomUrl() {
String roomUrl = "https://www.douyu.com/4372875"; // String roomUrl = "https://www.douyu.com/4372875";
try { // try {
RoomInfo roomInfo = DouYuRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl); // RoomInfo roomInfo = DouYuRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System.out.println("房间信息:::"+ roomInfo); // System.out.println("房间信息:::"+ roomInfo);
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
//
//
@Test // @Test
public void getHuYaRoomInfoByRoomUrl() { // public void getHuYaRoomInfoByRoomUrl() {
String roomUrl = "https://www.huya.com/baishaling"; // String roomUrl = "https://www.huya.com/baishaling";
try { // try {
RoomInfo roomInfo = HuYaRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl); // RoomInfo roomInfo = HuYaRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System.out.println("房间信息:::"+ roomInfo); // System.out.println("房间信息:::"+ roomInfo);
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
//
@Test // @Test
public void getPandamTVRoomInfoByRoomUrl() { // public void getPandamTVRoomInfoByRoomUrl() {
String roomUrl = "https://www.panda.tv/337852"; // String roomUrl = "https://www.panda.tv/337852";
try { // try {
RoomInfo roomInfo = PandamTVRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl); // RoomInfo roomInfo = PandamTVRoomInfoCrawler.getRoomInfoByRoomUrl(roomUrl);
System.out.println("房间信息:::"+ roomInfo); // System.out.println("房间信息:::"+ roomInfo);
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
} //}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment