Commit 800b4216 by 朽木不可雕也

backup

parent dd512bec
package com.zhiweidata.automatictest.barragecrawlerserver.createtasks; package com.zhiweidata.automatictest.barragecrawlerserver.createtasks;
import org.quartz.Job; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zhiweidata.automatictest.barragecrawlerserver.exception.BarrageHomePageException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
import org.quartz.JobExecutionContext; import org.quartz.JobExecutionContext;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;
import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
/** /**
* 从斗鱼直播的首页获得直播间的地址,并发送到弹幕采集服务器进行测试 * 从斗鱼直播的首页获得直播间的地址,并发送到弹幕采集服务器进行测试
...@@ -10,14 +31,101 @@ import org.quartz.JobExecutionContext; ...@@ -10,14 +31,101 @@ import org.quartz.JobExecutionContext;
* @createTime 2021-08-23 17:43:24 * @createTime 2021-08-23 17:43:24
* @ide IntelliJ IDEA * @ide IntelliJ IDEA
*/ */
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection"}) @Slf4j
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection", "GrazieInspection", "DuplicatedCode"})
public class DouyuCreateTaskJob extends CreateTaskJob { public class DouyuCreateTaskJob extends CreateTaskJob {
private static final Pattern PATTERN = Pattern.compile("var \\$mainData = [\\w\\W]+?;");
@Override @Override
public void execute(JobExecutionContext jobExecutionContext) { public void execute(JobExecutionContext jobExecutionContext) {
try {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet("https://www.douyu.com/"));
String html = EntityUtils.toString(response.getEntity());
Set<String> liveUrls = this.getLiveUrl(html);
liveUrls.forEach(super::createCollectionTask);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
/**
* 网页源代码获取直播间地址
*/
@NotNull
private Set<String> getLiveUrl(String html) throws JsonProcessingException {
Matcher matcher = PATTERN.matcher(html);
if (matcher.find()) {
String jsonString = matcher.group().replaceFirst("var \\$mainData = ", "");
jsonString = jsonString.substring(0, jsonString.length() - 1);
final List<?> jsonList = new ObjectMapper().readValue(jsonString, List.class);
final Set<String> liveUrls = new HashSet<>();
final String baseUrl = "https://www.douyu.com/";
for (Object jsonObj : jsonList) {
final Map<?, ?> jsonMap = (Map<?, ?>) jsonObj;
final String type = (String) jsonMap.get("type");
// 根据type的不同,解析json
if ("slideBackground".equals(type)) {
// 首页幻灯片
List<?> ds = (List<?>) jsonMap.get("ds");
for (Object dsObj : ds) {
Number rid = (Number) ((Map<?, ?>) dsObj).get("rid");
liveUrls.add(baseUrl + rid);
}
} else if ("roomList4Mix".equals(type) || "roomList8Mix".equals(type)) {
// 各种游戏混合
List<?> modules = (List<?>) jsonMap.get("modules");
for (Object moduleObj : modules) {
List<?> rl = (List<?>) ((Map<?, ?>) moduleObj).get("rl");
if (Objects.isNull(rl)) continue;
for (Object rlObj : rl) {
String urlString = (String) ((Map<?, ?>) rlObj).get("url");
if (urlString.startsWith("/")) urlString = urlString.replaceFirst("/", "");
liveUrls.add(baseUrl + urlString);
}
}
} else if ("recommend".equals(type) || "roomList10".equals(type)) {
// 精彩推荐 和 热门游戏(如 英雄联盟,穿越火线,王者荣耀,CFHD,和平精英,COD手游,云顶模式,天刀手游,竞技游戏,网络游戏,客厅游戏,手机游戏,暴雪专区,科技文化,官方活动 等)
List<?> rl = (List<?>) jsonMap.get("rl");
if (Objects.isNull(rl)) continue;
for (Object rlObj : rl) {
String urlString = (String) ((Map<?, ?>) rlObj).get("url");
if (urlString.startsWith("/")) urlString = urlString.replaceFirst("/", "");
liveUrls.add(baseUrl + urlString);
}
} else if ("footerHotroom".equals(type)) {
List<?> ds = (List<?>) jsonMap.get("ds");
for (Object dsObj : ds) {
// 页脚热门
String urlString = (String) ((Map<?, ?>) dsObj).get("url");
if (urlString.startsWith("/")) urlString = urlString.replaceFirst("/", "");
liveUrls.add(baseUrl + urlString);
}
}
}
return liveUrls;
} else {
throw new BarrageHomePageException("斗鱼直播间弹幕获取失败");
}
} }
@Override @Override
public String getLiveName(String liveUrl) { public String getLiveName(String liveUrl) {
try {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(liveUrl));
StatusLine statusLine = response.getStatusLine();
if (statusLine.getStatusCode() != HttpStatus.SC_OK) {
throw new HttpException(String.format("code: %d, message: %s", statusLine.getStatusCode(), statusLine.getReasonPhrase()));
}
String html = EntityUtils.toString(response.getEntity());
JXDocument document = JXDocument.create(html);
JXNode node = document.selNOne("//h3[@class='Title-header']");
return node.asElement().text();
} catch (Exception e) {
log.error(e.getMessage(), e);
return null; return null;
} }
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment