Commit bb50db18 by 朽木不可雕也

添加excel文件下载

parent c84691e2
......@@ -2,6 +2,8 @@ package com.zhiweidata.automatictest.barragecrawlerserver.config;
import com.mongodb.client.MongoCollection;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask;
import java.io.File;
import lombok.extern.slf4j.Slf4j;
import static com.zhiweidata.automatictest.publics.BeanContainer.MONGO_DATABASE;
......@@ -12,16 +14,17 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.MONGO_DATABASE;
* @createTime 2021-08-24 15:47:13
* @ide IntelliJ IDEA
*/
@Slf4j
@SuppressWarnings("JavaDoc")
public class BaseConfig {
/**
* 接口的基础url
*/
public static final String BASE_URL = "http://192.168.0.119:8080/barrage-crawler-server/";
public static final String BASE_URL = "http://192.168.0.119:8080/barrage-crawler-server";
/**
* 采集任务接口
*/
public static final String TASK_URL = BASE_URL + "tasks/";
public static final String TASK_URL = BASE_URL + "/tasks";
/**
* 弹幕采集任务的 map 的 key
*/
......@@ -42,4 +45,16 @@ public class BaseConfig {
* 测试采集任务的信息集合
*/
public static final MongoCollection<BarrageCollectionTask> MONGO_TASK_REQUEST_COLLECTION = MONGO_DATABASE.getCollection(MODULE_NAME + "_task_requests", BarrageCollectionTask.class);
/**
* 文件缓存文件夹
*/
public static final File CACHE_DIR = new File("cache");
static {
if (!CACHE_DIR.exists()) {
if (!CACHE_DIR.mkdirs()) {
log.error("文件夹在{}创建失败", CACHE_DIR.getName());
}
}
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.exporttasks;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.quartz.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.CACHE_DIR;
import static java.util.Objects.isNull;
import static java.util.Objects.requireNonNull;
/**
* 弹幕导出任务
......@@ -11,4 +25,32 @@ import org.quartz.Job;
*/
@SuppressWarnings("JavaDoc")
public interface BarrageExportTaskJob extends Job {
Logger log = LoggerFactory.getLogger(BarrageExportTaskJob.class);
/**
* 下载并解压zip文件
*
* @return 解压得到的文件
*/
default List<File> write(String taskId, InputStream inputStream) throws IOException {
final File outDir = new File(CACHE_DIR, taskId);
if (!outDir.exists()) if (!outDir.mkdirs()) log.error("文件夹{}创建失败", outDir.getName());
final List<File> outFiles = new ArrayList<>();
try (ZipInputStream zipInputStream = new ZipInputStream(inputStream)) {
byte[] buff = new byte[8192];
int len;
while (true) {
ZipEntry zipEntry = zipInputStream.getNextEntry();
if (isNull(zipEntry)) break;
File execFile = new File(requireNonNull(zipEntry.getName()));
try (FileOutputStream fileOutputStream = new FileOutputStream(execFile)) {
while ((len = zipInputStream.read(buff)) != -1) fileOutputStream.write(buff, 0, len);
}
zipInputStream.closeEntry();
outFiles.add(execFile);
}
}
return outFiles;
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.exporttasks;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.event.SyncReadListener;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageTaskResponse;
import com.zhiweidata.automatictest.barragecrawlerserver.util.TimeUtil;
import java.util.Date;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.entity.ContentType;
import org.apache.http.util.EntityUtils;
import org.jetbrains.annotations.NotNull;
import org.quartz.JobDataMap;
import org.quartz.JobExecutionContext;
import org.quartz.Trigger;
import org.quartz.TriggerBuilder;
import org.quartz.SchedulerException;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.BARRAGE_COLLECTION_TASK_JOB_KEY;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.BASE_URL;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.MONGO_TASK_REQUEST_COLLECTION;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.TASK_URL;
import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
import static com.zhiweidata.automatictest.publics.BeanContainer.SCHEDULER;
import static java.util.Objects.requireNonNull;
/**
......@@ -46,44 +48,60 @@ public class BiliBiliExportTaskJob implements BarrageExportTaskJob {
try {
JobDataMap jobDataMap = requireNonNull(context.getMergedJobDataMap());
BarrageCollectionTask collectionTask = (BarrageCollectionTask) requireNonNull(jobDataMap.get(BARRAGE_COLLECTION_TASK_JOB_KEY));
String url = BASE_URL + "tasks?limit=20&page=1&name=" + collectionTask.getName();
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(url));
ObjectMapper jsonMapper = new ObjectMapper();
Map<?, ?> jsonMap = jsonMapper.readValue(EntityUtils.toString(response.getEntity()), Map.class);
int code = (Integer) jsonMap.get("code");
if (code != 200) {
log.error((String) jsonMap.get("message"));
return;
}
Map<?, ?> dataMap = (Map<?, ?>) requireNonNull(jsonMap.get("data"));
List<?> dataList = (List<?>) requireNonNull(dataMap.get("list"));
BarrageTaskResponse taskResponse = jsonMapper.convertValue(dataList.get(0), BarrageTaskResponse.class);
switch (taskResponse.getStatus()) {
case ENDED:
this.checkFile(taskResponse);
break;
case FAIL:
log.error("任务:{} 采集失败", taskResponse.getId());
break;
case WAIT:
case COLLECTING:
Date nextTime = TimeUtil.randmoDate(2);
TriggerBuilder<Trigger> triggerBuilder = TriggerBuilder.newTrigger();
triggerBuilder.startAt(nextTime);
SCHEDULER.scheduleJob(context.getJobDetail(), triggerBuilder.build());
default:
throw new RuntimeException("未知枚举:" + taskResponse.getStatus());
}
this.export(collectionTask);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
/**
* 尝试导出弹幕
*/
public void export(@NotNull BarrageCollectionTask collectionTask) throws IOException, SchedulerException {
String url = TASK_URL + "?limit=20&page=1&name=" + collectionTask.getName();
HttpResponse response = HTTP_CLIENT.execute(new HttpGet(url));
ObjectMapper jsonMapper = new ObjectMapper();
Map<?, ?> jsonMap = jsonMapper.readValue(EntityUtils.toString(response.getEntity()), Map.class);
int code = (Integer) jsonMap.get("code");
if (code != 200) {
log.error((String) jsonMap.get("message"));
return;
}
Map<?, ?> dataMap = (Map<?, ?>) requireNonNull(jsonMap.get("data"));
List<?> dataList = (List<?>) requireNonNull(dataMap.get("list"));
BarrageTaskResponse taskResponse = jsonMapper.convertValue(dataList.get(0), BarrageTaskResponse.class);
this.checkFile(collectionTask, taskResponse);
}
/**
* 下载excel文件,并检查excel文件
*/
private void checkFile(@NotNull BarrageTaskResponse taskResponse) {
private void checkFile(@NotNull BarrageCollectionTask collectionTask, @NotNull BarrageTaskResponse taskResponse) throws IOException {
String url = TASK_URL + "/" + taskResponse.getId() + "/barrage";
HttpGet get = new HttpGet(url);
HttpResponse response = HTTP_CLIENT.execute(get);
String contentType = response.getFirstHeader("Content-Type").getValue();
if (ContentType.APPLICATION_JSON.getMimeType().equals(contentType)) {
ObjectMapper jsonMapper = new ObjectMapper();
Map<?, ?> jsonMap = jsonMapper.readValue(response.getEntity().getContent(), Map.class);
String message = (String) jsonMap.get("message");
log.info(message);
collectionTask.setCode((Integer) jsonMap.get("code"));
collectionTask.setMessage(message);
MONGO_TASK_REQUEST_COLLECTION.insertOne(collectionTask);
} else {
List<File> excelFiles = this.write(taskResponse.getId(), response.getEntity().getContent());
// 检查文件中的数据量
excelFiles.forEach(excelFile -> {
List<Map<Integer, String>> dataList = EasyExcel.read(excelFile, new SyncReadListener()).doReadAllSync();
if (dataList.isEmpty()) {
log.error("任务ID:{},文件:{}为空", taskResponse.getId(), excelFile.getName());
return;
}
log.info("任务ID:{},文件:{}弹幕数量:{}", taskResponse.getId(), excelFile.getName(), dataList.size());
});
}
}
}
package com.zhiweidata.automatictest.barragecrawlerserver.exporttasks;
import com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask;
import org.quartz.JobBuilder;
import org.quartz.JobDataMap;
import java.io.IOException;
import org.quartz.SchedulerException;
import org.quartz.Trigger;
import org.quartz.TriggerBuilder;
import static com.zhiweidata.automatictest.barragecrawlerserver.config.BaseConfig.BARRAGE_COLLECTION_TASK_JOB_KEY;
import static com.zhiweidata.automatictest.publics.BeanContainer.SCHEDULER;
/**
* @author aszswaz
......@@ -18,20 +12,12 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.SCHEDULER;
@SuppressWarnings("JavaDoc")
class BiliBiliExportTaskJobTest {
public static void main(String[] args) throws SchedulerException, InterruptedException {
JobBuilder builder = JobBuilder.newJob(BiliBiliExportTaskJob.class);
public static void main(String[] args) throws SchedulerException, IOException {
BarrageCollectionTask collectionTask = new BarrageCollectionTask(
"https://live.bilibili.com/4354601?hotRank=0", "1857a60e-a70a-4977-977d-97d8e6e839a8", 1630894154000L
"https://live.bilibili.com/21544906?hotRank=0", "6783d422-840f-4230-9875-81ee91c06d09", System.currentTimeMillis()
);
JobDataMap dataMap = new JobDataMap();
dataMap.put(BARRAGE_COLLECTION_TASK_JOB_KEY, collectionTask);
builder.usingJobData(dataMap);
TriggerBuilder<Trigger> triggerBuilder = TriggerBuilder.newTrigger();
triggerBuilder.startNow();
SCHEDULER.scheduleJob(builder.build(), triggerBuilder.build());
Thread.sleep(100);
SCHEDULER.shutdown(true);
BiliBiliExportTaskJob exportTaskJob = new BiliBiliExportTaskJob();
exportTaskJob.export(collectionTask);
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment