Commit 5bbec004 by zhiwei

提交根据房间id查询房间信息

parent eb80b6f4
package com.zhiwei.live.bean;
public class RoomInfo {
String pt; //平台类型
String roomId; //房间号
String nickName; //主播昵称
String roomName; //房间名称
Integer hotNum; //直播间热度
public RoomInfo(){}
public RoomInfo(String pt, String roomId, String nickName, String roomName, Integer hotNum){
this.pt = pt;
this.roomId = roomId;
this.nickName = nickName;
this.roomName = roomName;
this.hotNum = hotNum;
}
@Override
public String toString() {
return "new RoomInfo["
+ "pt = " + pt
+ ", roomId = " + roomId
+ ", roomName = " + roomName
+ ", nickName = " + nickName
+ ", hotNum = " + hotNum
+ "]";
}
public String getPt() {
return pt;
}
public String getRoomId() {
return roomId;
}
public String getNickName() {
return nickName;
}
public String getRoomName() {
return roomName;
}
public int getHotNum() {
return hotNum;
}
public void setPt(String pt) {
this.pt = pt;
}
public void setRoomId(String roomId) {
this.roomId = roomId;
}
public void setNickName(String nickName) {
this.nickName = nickName;
}
public void setRoomName(String roomName) {
this.roomName = roomName;
}
public void setHotNum(int hotNum) {
this.hotNum = hotNum;
}
}
package com.zhiwei.live.roominfo;
import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.live.bean.RoomInfo;
import com.zhiwei.tools.tools.ZhiWeiTools;
/**
* bilibili 直播间信息爬取
* @author qq859
*
*/
public class BilibiliRoomInfoCrawler {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LogManager.getLogger(BilibiliRoomInfoCrawler.class);
private static final String PT = "B站";
/**
* 根据房间id获取房间信息
* @param roomId
* @return
* @throws Exception
*/
public static RoomInfo getRoomInfoByRoomId(String roomId) throws Exception{
String url = "https://live.bilibili.com/" + roomId;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_PROXY).body().string();
if(!StringUtils.isBlank(htmlBody)) {
//判断页面中是否包含房间信息
if(htmlBody.contains("window.__NEPTUNE_IS_MY_WAIFU__=")) {
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody = htmlBody.split("<script>window.__NEPTUNE_IS_MY_WAIFU__=")[1].split("</script>")[0];
htmlBody = ZhiWeiTools.decodeUnicode(htmlBody);
htmlBody = ZhiWeiTools.delHTMLTag(htmlBody);
htmlBody = htmlBody.replaceAll("\\\\", "").replaceAll("'", "\"");
//解析json数据
JSONObject baseInfoRes = JSONObject.parseObject(htmlBody);
JSONObject data = baseInfoRes.getJSONObject("baseInfoRes").getJSONObject("data");
Integer person_num = data.getIntValue("online");
String roomname = data.getString("title");
Integer room_id = data.getInteger("room_id");
String roomIds = room_id!=null?room_id.toString():null;
String username = null;
//通过房间id获取用户信息
String roomUrl = "https://api.live.bilibili.com/live_user/v1/UserInfo/get_anchor_in_room?roomid="+room_id;
String roomBody = httpBoot.syncCall(RequestUtils.wrapGet(roomUrl)).body().string();
if(!StringUtils.isBlank(roomBody)) {
JSONObject roomData = JSONObject.parseObject(roomBody).getJSONObject("data");
username = roomData.getJSONObject("info").getString("uname");
}
return new RoomInfo(PT, roomIds, roomname, username, person_num);
}else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null;
}
}
return null;
}
}
package com.zhiwei.live.roominfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.live.bean.RoomInfo;
/**
* 斗鱼直播间信息获取
* @author qq859
*
*/
public class DouYuRoomInfoCrawler {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LogManager.getLogger(DouYuRoomInfoCrawler.class);
private static final String PT = "斗鱼";
/**
* 根据房间id获取房间信息
* @param roomId
* @return
* @throws Exception
*/
public static RoomInfo getRoomInfoByRoomId(String roomId) throws Exception{
String url = "http://open.douyucdn.cn/api/RoomApi/room/" + roomId;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_PROXY).body().string();
if(!StringUtils.isBlank(htmlBody)) {
JSONObject data = JSONObject.parseObject(htmlBody).getJSONObject("data");
String room_name = data.getString("room_name");
String user_name = data.getString("owner_name");
Integer hn = data.getInteger("hn");
int online = data.getInteger("online");
return new RoomInfo(PT, roomId, room_name, user_name , hn);
}else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null;
}
}
}
package com.zhiwei.live.roominfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.live.bean.RoomInfo;
public class HuYaRoomInfoCrawler {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LogManager.getLogger(HuYaRoomInfoCrawler.class);
private static final String PT = "虎牙";
/**
* 根据房间id获取房间信息
*
* @param roomId
* @return
* @throws Exception
*/
public static RoomInfo getRoomInfoByRoomId(String roomId) throws Exception {
String url = "http://www.huya.com/" + roomId;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_PROXY).body().string();
if (!StringUtils.isBlank(htmlBody)) {
Document document = Jsoup.parse(htmlBody);
String roomName = document.select("h1#J_roomTitle").text();
Integer liveCount = Integer.valueOf(document.select("em#live-count").text().replaceAll(",", ""));
String username = document.select("h3.host-name").text();
String activityCount = document.select("div#activityCount").text();
String room_id = document.select("span.host-rid").text();
return new RoomInfo(PT, room_id, roomName, username, liveCount);
} else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null;
}
}
}
package com.zhiwei.live.roominfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.live.bean.RoomInfo;
import com.zhiwei.tools.tools.ZhiWeiTools;
/**
* 熊猫TV直播间信息
* @author qq859
*
*/
public class PandamTVRoomInfoCrawler {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LogManager.getLogger(PandamTVRoomInfoCrawler.class);
private static final String PT = "熊猫TV";
/**
* 根据房间id获取房间信息
* @param roomId
* @return
* @throws Exception
*/
public static RoomInfo getRoomInfoByRoomId(String roomId) throws Exception{
String url = "https://www.panda.tv/" + roomId;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_PROXY).body().string();
if(!StringUtils.isBlank(htmlBody)) {
//判断页面中是否包含房间信息,此为pc端直播
if(htmlBody.contains("window._config_roominfo = ")) {
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody = htmlBody.split("window._config_roominfo = ")[1].split("} };")[0]+"} }";
htmlBody = ZhiWeiTools.decodeUnicode(htmlBody);
htmlBody = ZhiWeiTools.delHTMLTag(htmlBody);
htmlBody = htmlBody.replaceAll("\\\\", "").replaceAll("'", "\"")
.replaceAll("\"param\":\"", "\"param\":").replaceAll("}\",", "},"); //
//解析json数据
JSONObject json = JSONObject.parseObject(htmlBody);
JSONObject callbackParam = json.getJSONObject("callbackParam");
Integer person_num = callbackParam.getJSONObject("param").getIntValue("person_num");
JSONObject roominfo = json.getJSONObject("roominfo");
String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("name");
return new RoomInfo(PT, roomId, roomname, username, person_num);
}
//判断页面中是否包含房间信息,此为使用手机端直播
else if(htmlBody.contains("window.HOSTINFO=")){
//通过截取获取直播间信息字段,将截取的字段处理为json格式方便解析
htmlBody = htmlBody.split("window.HOSTINFO=")[1].split(";</script>")[0];
htmlBody = ZhiWeiTools.decodeUnicode(htmlBody);
htmlBody = ZhiWeiTools.delHTMLTag(htmlBody);
htmlBody = htmlBody.replaceAll("\\\\", "").replaceAll("'", "\"");
//解析json数据
JSONObject json = JSONObject.parseObject(htmlBody);
JSONObject roominfo = json.getJSONObject("roominfo");
Integer person_num = roominfo.getIntValue("personnum");
String roomname = roominfo.getString("name");
JSONObject hostinfo = json.getJSONObject("hostinfo");
String username = hostinfo.getString("nickName");
return new RoomInfo(PT, roomId, roomname, username, person_num);
}else {
logger.info("此次采集页面中不包含房间信息字段, 此次页面信息为:{}", htmlBody);
return null;
}
}
return null;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment