Commit 94af376c by 朽木不可雕也

添加虎牙自动创建任务

parent cee6683e
package com.zhiweidata.automatictest.barragecrawlerserver.config;
/**
* 基本配置
*
* @author aszswaz
* @createTime 2021-08-24 15:47:13
* @ide IntelliJ IDEA
*/
@SuppressWarnings("JavaDoc")
public class BaseConfig {
public static final String MONGO_LIVE_COLLECTION = "live_url";
/**
* 接口的基础url
*/
public static final String BASE_URL = "http://192.168.0.119:8080/barrage-crawler-server/";
/**
* 采集任务接口
*/
public static final String TASK_URL = BASE_URL + "tasks/";
}
package com.zhiweidata.automatictest.barragecrawlerserver.createtasks;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.client.MongoCollection;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask;
import com.zhiweidata.automatictest.barragecrawlerserver.exception.BarrageHomePageException;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.util.EntityUtils;
import org.bson.Document;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Element;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.MONGO_LIVE_COLLECTION;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.TASK_URL;
import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
import static com.zhiweidata.automatictest.publics.BeanContainer.MONGO_DATABASE;
import static com.zhiweidata.automatictest.publics.BeanContainer.STANDARD_DATE_FORMAT;
/**
* 从虎牙直播的首页获得直播间的地址,并发送到弹幕采集服务器进行测试
......@@ -17,17 +44,102 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
* @ide IntelliJ IDEA
*/
@Slf4j
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection"})
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection", "GrazieInspection"})
public class HuyaCreateTaskJob implements Job {
/**
* 幻灯片中,直播间的url匹配
*/
private static final Pattern LIVE_URL = Pattern.compile("var slides= \\[\\{[\\w\\W]+?}];");
/**
* 直播间地址集合
*/
private final MongoCollection<Document> liveUrls = MONGO_DATABASE.getCollection(MONGO_LIVE_COLLECTION);
@Override
public void execute(JobExecutionContext jobExecutionContext) {
try {
HttpGet get = new HttpGet("https://www.huya.com/");
HttpResponse response = HTTP_CLIENT.execute(get);
String html = EntityUtils.toString(response.getEntity());
System.out.println(html);
StatusLine line = response.getStatusLine();
if (line.getStatusCode() != HttpStatus.SC_OK) {
log.error("status code: {}, message: {}", line.getStatusCode(), line.getReasonPhrase());
return;
}
this.createTaskByHtml(EntityUtils.toString(response.getEntity()));
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
/**
* 从 html 代码中解析到直播间的 url
*/
private void createTaskByHtml(@NonNull String html) throws JsonProcessingException {
JXDocument document = JXDocument.create(html);
final Set<String> liveUrls = new HashSet<>();
List<JXNode> nodes = document.selN("//a[@class='remen-item j_game-classify-remen-item']");
nodes.addAll(document.selN("//div[@class='box-hd']/ul[@class='more-list']/li/a"));
nodes.addAll(document.selN("//li[@class='game-live-item']/a[@class='title']"));
nodes.forEach(jxNode -> {
Element element = jxNode.asElement();
liveUrls.add(element.attr("href"));
});
Matcher matcher = LIVE_URL.matcher(html);
if (matcher.find()) {
String javaScript = matcher.group();
String jsonString = javaScript.replaceFirst("var slides= ", "");
jsonString = jsonString.substring(0, jsonString.length() - 1);
final List<?> jsonList = new ObjectMapper().readValue(jsonString, List.class);
jsonList.forEach(element -> {
Map<?, ?> elementMap = (Map<?, ?>) element;
liveUrls.add(String.format("https://www.huya.com/%s", elementMap.get("profileRoom")));
});
}
if (liveUrls.isEmpty()) {
throw new BarrageHomePageException("虎牙弹幕获取失败");
}
int size = liveUrls.size();
if (size >= 50) {
log.info("虎牙直播间地址数量:{}", size);
} else {
log.warn("虎牙直播间地址数量:{}", size);
}
liveUrls.forEach(this::createCollectionTask);
}
/**
* 请求弹幕采集服务器创建采集任务
*/
private void createCollectionTask(String liveUrl) {
try {
BarrageCollectionTask collectionTask = new BarrageCollectionTask(liveUrl, this.getLiveName(liveUrl));
if (log.isDebugEnabled()) {
String startTime = STANDARD_DATE_FORMAT.format(collectionTask.getStartTime() * 1000L);
String endTime = STANDARD_DATE_FORMAT.format(collectionTask.getEndTime() * 1000L);
log.info("collection start time: {}, collection end time: {}", startTime, endTime);
}
HttpPost post = new HttpPost(TASK_URL);
ObjectMapper objectMapper = new ObjectMapper();
post.setEntity(new ByteArrayEntity(objectMapper.writeValueAsBytes(collectionTask), ContentType.APPLICATION_JSON));
HttpResponse response = HTTP_CLIENT.execute(post);
Map<?, ?> jsonMap = objectMapper.readValue(response.getEntity().getContent(), Map.class);
int code = (Integer) jsonMap.get("code");
Document document = new Document("liveUrl", liveUrl);
jsonMap.forEach((key, value) -> document.put((String) key, value));
this.liveUrls.insertOne(document);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
@NotNull
private String getLiveName(String liveUrl) throws IOException {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(liveUrl));
String html = EntityUtils.toString(response.getEntity());
JXDocument document = JXDocument.create(html);
JXNode node = document.selNOne("//h1[@id='J_roomTitle']");
return node.asElement().text();
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.entity;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Calendar;
import java.util.Random;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
/**
* @author aszswaz
* @createTime 2021-08-24 15:53:39
* @ide IntelliJ IDEA
*/
@SuppressWarnings("JavaDoc")
@Getter
@Setter
@EqualsAndHashCode
@ToString
public class BarrageCollectionTask {
/**
* 任务名称
*/
@JsonProperty(value = "name")
private String name;
/**
* 直播间地址
*/
@JsonProperty(value = "url")
private String url;
/**
* 开始采集时间
*/
@JsonProperty(value = "startTime")
private Integer startTime;
/**
* 采集结束时间
*/
@JsonProperty(value = "endTime")
private Integer endTime;
public BarrageCollectionTask(String liveUrl, String liveName) {
this.url = liveUrl;
this.name = liveName;
Calendar calendar = Calendar.getInstance();
calendar.set(Calendar.MINUTE, 0);
calendar.set(Calendar.SECOND, 0);
calendar.set(Calendar.MILLISECOND, 0);
calendar.add(Calendar.HOUR_OF_DAY, 1);
this.startTime = (int) (calendar.getTimeInMillis() / 1000);
Random random = new Random();
int minute;
do {
minute = random.nextInt(301);
} while (minute <= 0);
calendar.add(Calendar.MINUTE, minute);
this.endTime = (int) (calendar.getTimeInMillis() / 1000);
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.exception;
/**
* 直播首页解析异常
*
* @author aszswaz
* @createTime 2021-08-24 15:23:45
* @ide IntelliJ IDEA
*/
@SuppressWarnings({"JavaDoc", "unused"})
public class BarrageHomePageException extends RuntimeException {
public BarrageHomePageException(String message) {
super(message);
}
public BarrageHomePageException(String message, Throwable cause) {
super(message, cause);
}
}
......@@ -71,6 +71,13 @@
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>RELEASE</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
......@@ -18,6 +18,7 @@
<httpclient.version>4.5.13</httpclient.version>
<lo4j.version>2.14.1</lo4j.version>
<quartz.version>2.3.2</quartz.version>
<mongodb.version>4.3.0</mongodb.version>
</properties>
<dependencies>
......@@ -27,18 +28,27 @@
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${lo4j.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>${quartz.version}</version>
</dependency>
<!--mongodb client-->
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongodb-driver-sync</artifactId>
<version>${mongodb.version}</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiweidata.automatictest.publics;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import com.mongodb.client.MongoDatabase;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.quartz.Scheduler;
import org.quartz.SchedulerException;
......@@ -25,9 +31,20 @@ public class BeanContainer {
* 全局任务调度器
*/
public static final Scheduler SCHEDULER;
/**
* mongodb 客户端
*/
public static final MongoClient MONGO_CLIENT;
/**
* 整个项目专用数据库
*/
public static final MongoDatabase MONGO_DATABASE;
/**
* 标准日期格式化
*/
public static final FastDateFormat STANDARD_DATE_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss.SSS");
static {
HTTP_CLIENT = HttpClients.createMinimal();
Scheduler scheduler = null;
try {
......@@ -37,5 +54,17 @@ public class BeanContainer {
log.error(e.getMessage(), e);
}
SCHEDULER = scheduler;
HttpClientBuilder clientBuilder = HttpClients.custom();
RequestConfig.Builder configBuilder = RequestConfig.custom();
int timeout = 60 * 1000;
configBuilder.setConnectionRequestTimeout(timeout);
configBuilder.setConnectTimeout(timeout);
configBuilder.setSocketTimeout(timeout);
clientBuilder.setDefaultRequestConfig(configBuilder.build());
HTTP_CLIENT = clientBuilder.build();
MONGO_CLIENT = MongoClients.create("mongodb://admin:z199809051593@192.168.0.119");
MONGO_DATABASE = MONGO_CLIENT.getDatabase("automatic_test");
}
}
......@@ -12,6 +12,9 @@
<!-- sync/async -->
<Loggers>
<logger name="org.mongodb" level="ERROR"/>
<logger name="com.zhiweidata.automatictest.barragecrawlerserver" level="DEBUG"/>
<Root level="info" includeLocation="true">
<AppenderRef ref="console"/>
</Root>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment