Commit 0e81444e by 朽木不可雕也

修改异常处理方式

parent e5c635df
......@@ -5,6 +5,7 @@ import java.net.URL;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.quartz.DisallowConcurrentExecution;
import org.quartz.JobExecutionContext;
import static java.util.Objects.requireNonNull;
......@@ -17,9 +18,9 @@ import static java.util.Objects.requireNonNull;
* @ide IntelliJ IDEA
*/
@Slf4j
@DisallowConcurrentExecution
@SuppressWarnings("JavaDoc")
public class BiliBiliCreateTaskJob extends CreateTaskJob {
private String liveTitle;
@Override
public void execute(JobExecutionContext jobExecutionContext) {
......@@ -28,13 +29,17 @@ public class BiliBiliCreateTaskJob extends CreateTaskJob {
liveJson = (Map<?, ?>) requireNonNull(liveJson.get("data"));
final List<?> moduleList = (List<?>) liveJson.get("room_list");
final String baseUrl = "https://live.bilibili.com";
final long currentTime = System.currentTimeMillis();
moduleList.forEach(liveModule -> {
Map<?, ?> liveModuleMap = (Map<?, ?>) liveModule;
List<?> roomList = (List<?>) liveModuleMap.get("list");
roomList.forEach(room -> {
Map<?, ?> roomMap = (Map<?, ?>) room;
this.liveTitle = (String) roomMap.get("title");
BiliBiliCreateTaskJob.super.createCollectionTask(baseUrl + roomMap.get("link"));
try {
Map<?, ?> roomMap = (Map<?, ?>) room;
BiliBiliCreateTaskJob.super.createCollectionTask(baseUrl + roomMap.get("link"), currentTime);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
});
});
} catch (Exception e) {
......@@ -43,9 +48,4 @@ public class BiliBiliCreateTaskJob extends CreateTaskJob {
super.finish();
}
}
@Override
protected String getLiveName(String liveUrl) {
return this.liveTitle;
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.createtasks;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.client.MongoCollection;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask;
......@@ -7,12 +8,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ByteArrayEntity;
import org.apache.http.entity.ContentType;
import org.bson.Document;
import org.jetbrains.annotations.NotNull;
import org.quartz.Job;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.MONGO_LIVE_COLLECTION;
......@@ -27,42 +30,38 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.STANDARD_DATE_F
* @ide IntelliJ IDEA
*/
@Slf4j
@SuppressWarnings({"JavaDoc", "GrazieInspection"})
@SuppressWarnings({"JavaDoc"})
public abstract class CreateTaskJob implements Job {
private final List<Document> liveUrls = new ArrayList<>();
/**
* 获取直播间名称
*/
protected abstract String getLiveName(String liveUrl) throws IOException;
/**
* 请求弹幕采集服务器创建采集任务
*/
protected final void createCollectionTask(String liveUrl) {
try {
String liveName = this.getLiveName(liveUrl);
BarrageCollectionTask collectionTask = new BarrageCollectionTask(liveUrl, liveName);
if (log.isDebugEnabled()) {
String startTime = STANDARD_DATE_FORMAT.format(collectionTask.getStartTime() * 1000L);
String endTime = STANDARD_DATE_FORMAT.format(collectionTask.getEndTime() * 1000L);
log.debug("collection start time: {}, collection end time: {}", startTime, endTime);
}
HttpPost post = new HttpPost(TASK_URL);
ObjectMapper objectMapper = new ObjectMapper();
post.setEntity(new ByteArrayEntity(objectMapper.writeValueAsBytes(collectionTask), ContentType.APPLICATION_JSON));
HttpResponse response = HTTP_CLIENT.execute(post);
Map<?, ?> jsonMap = objectMapper.readValue(response.getEntity().getContent(), Map.class);
Document document = new Document("liveUrl", liveUrl);
jsonMap.forEach((key, value) -> document.put((String) key, value));
this.liveUrls.add(document);
} catch (Exception e) {
log.error(e.getMessage(), e);
@NotNull
protected final BarrageCollectionTask createCollectionTask(String liveUrl, long currentTime) throws IOException {
final String liveName = UUID.randomUUID().toString();
BarrageCollectionTask collectionTask = new BarrageCollectionTask(liveUrl, liveName, currentTime);
if (log.isDebugEnabled()) {
String startTime = STANDARD_DATE_FORMAT.format(collectionTask.getStartTime() * 1000L);
String endTime = STANDARD_DATE_FORMAT.format(collectionTask.getEndTime() * 1000L);
log.debug("collection start time: {}, collection end time: {}", startTime, endTime);
}
HttpPost post = new HttpPost(TASK_URL);
ObjectMapper objectMapper = new ObjectMapper();
post.setEntity(new ByteArrayEntity(objectMapper.writeValueAsBytes(collectionTask), ContentType.APPLICATION_JSON));
HttpResponse response = HTTP_CLIENT.execute(post);
Map<?, ?> jsonMap = objectMapper.readValue(response.getEntity().getContent(), Map.class);
Document document = new Document("liveUrl", liveUrl);
jsonMap.forEach((key, value) -> document.put((String) key, value));
this.liveUrls.add(document);
return collectionTask;
}
protected final void finish() {
MongoCollection<Document> liveUrls = MONGO_DATABASE.getCollection(MONGO_LIVE_COLLECTION);
liveUrls.insertMany(this.liveUrls);
if (!this.liveUrls.isEmpty()) {
MongoCollection<Document> liveUrls = MONGO_DATABASE.getCollection(MONGO_LIVE_COLLECTION);
liveUrls.insertMany(this.liveUrls);
}
}
}
......@@ -20,6 +20,7 @@ import org.apache.http.StatusLine;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
import org.quartz.DisallowConcurrentExecution;
import org.quartz.JobExecutionContext;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;
......@@ -34,6 +35,7 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
* @ide IntelliJ IDEA
*/
@Slf4j
@DisallowConcurrentExecution
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection", "GrazieInspection", "DuplicatedCode"})
public class DouyuCreateTaskJob extends CreateTaskJob {
private static final Pattern PATTERN = Pattern.compile("var \\$mainData = [\\w\\W]+?;");
......@@ -44,7 +46,14 @@ public class DouyuCreateTaskJob extends CreateTaskJob {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet("https://www.douyu.com/"));
String html = EntityUtils.toString(response.getEntity());
Set<String> liveUrls = this.getLiveUrl(html);
liveUrls.forEach(super::createCollectionTask);
final long currentTime = System.currentTimeMillis();
liveUrls.forEach(liveUrl -> {
try {
super.createCollectionTask(liveUrl, currentTime);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
});
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
......@@ -114,28 +123,4 @@ public class DouyuCreateTaskJob extends CreateTaskJob {
throw new BarrageHomePageException("斗鱼直播间弹幕获取失败");
}
}
@Override
public String getLiveName(String liveUrl) {
try {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(liveUrl));
StatusLine statusLine = response.getStatusLine();
if (statusLine.getStatusCode() != HttpStatus.SC_OK) {
throw new HttpException(String.format("code: %d, message: %s", statusLine.getStatusCode(), statusLine.getReasonPhrase()));
}
String html = EntityUtils.toString(response.getEntity());
JXDocument document = JXDocument.create(html);
JXNode node = document.selNOne("//h3[@class='Title-header']");
String name = node.asElement().text();
if (StringUtils.isBlank(name)) {
String roomId = liveUrl.substring(liveUrl.lastIndexOf('/'));
Map<?, ?> jsonMap = new ObjectMapper().readValue(new URL("https://www.douyu.com/betard" + roomId), Map.class);
jsonMap = (Map<?, ?>) Objects.requireNonNull(jsonMap.get("room"));
name = (String) jsonMap.get("room_name");
}
return name;
} catch (Exception e) {
return "autio-create";
}
}
}
......@@ -4,7 +4,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.mongodb.client.MongoCollection;
import com.zhiweidata.automatictest.barragecrawlerserver.exception.BarrageHomePageException;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
......@@ -19,8 +18,8 @@ import org.apache.http.StatusLine;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.bson.Document;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Element;
import org.quartz.DisallowConcurrentExecution;
import org.quartz.JobExecutionContext;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;
......@@ -37,6 +36,7 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.MONGO_DATABASE;
* @ide IntelliJ IDEA
*/
@Slf4j
@DisallowConcurrentExecution
@SuppressWarnings({"JavaDoc", "unused", "SpellCheckingInspection", "GrazieInspection"})
public class HuyaCreateTaskJob extends CreateTaskJob {
/**
......@@ -101,15 +101,13 @@ public class HuyaCreateTaskJob extends CreateTaskJob {
} else {
log.warn("虎牙直播间地址数量:{}", size);
}
liveUrls.forEach(super::createCollectionTask);
}
@NotNull
public String getLiveName(String liveUrl) throws IOException {
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(liveUrl));
String html = EntityUtils.toString(response.getEntity());
JXDocument document = JXDocument.create(html);
JXNode node = document.selNOne("//h1[@id='J_roomTitle']");
return node.asElement().text();
final long currentTime = System.currentTimeMillis();
liveUrls.forEach(liveUrl -> {
try {
super.createCollectionTask(liveUrl, currentTime);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
});
}
}
......@@ -40,11 +40,12 @@ public class BarrageCollectionTask {
@JsonProperty(value = "endTime")
private Integer endTime;
public BarrageCollectionTask(String liveUrl, String liveName) {
public BarrageCollectionTask(String liveUrl, String liveName, long currentTime) {
this.url = liveUrl;
this.name = liveName;
Calendar calendar = Calendar.getInstance();
calendar.setTimeInMillis(currentTime);
calendar.set(Calendar.MINUTE, 0);
calendar.set(Calendar.SECOND, 0);
calendar.set(Calendar.MILLISECOND, 0);
......
......@@ -6,7 +6,6 @@
<!--输出日志的格式,使用SpringBoot配色(仅能在SpringBoot项目中使用) -->
<PatternLayout
pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %highlight{%5p} --- [%-30t] %-90c %-40M %-5L: %m%n"/>
<ThresholdFilter level="info" onMatch="ACCEPT" onMismatch="DENY"/>
</Console>
</Appenders>
......@@ -15,7 +14,7 @@
<logger name="org.mongodb" level="ERROR"/>
<logger name="com.zhiweidata.automatictest.barragecrawlerserver" level="DEBUG"/>
<Root level="info" includeLocation="true">
<Root level="INFO" includeLocation="true">
<AppenderRef ref="console"/>
</Root>
</Loggers>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment