Commit 720a2127 by liuyu

2023年3月2日 第一次提交

parents
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>middleware-automatic-center-autoconfigure</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<automatic.version>1.0-SNAPSHOT</automatic.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center-client</artifactId>
<version>${automatic.version}</version>
<exclusions>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.configuration;
import com.zhiwei.middleware.automatic.server.core.*;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import org.springframework.boot.autoconfigure.AutoConfigureAfter;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ConditionalOnProperty(prefix = "auto.matic.center.client", name = "enable", matchIfMissing = true)
@EnableConfigurationProperties(AutoMaticClientConfigurationProperties.class)
@AutoConfigureAfter(AutoMaticClientConfigurationProperties.class)
@Configuration
public class AutoMaticClientConfiguration {
@Bean
@ConditionalOnMissingBean(AutoMaticClient.class)
public AutoMaticClient authClient(AutoMaticClientConfigurationProperties properties) {
return new AutoMaticClient(AutoMaticClientFactory.createInstance(AutoMaticService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(CommonClient.class)
public CommonClient commonClient(AutoMaticClientConfigurationProperties properties) {
return new CommonClient(AutoMaticClientFactory.createInstance(CommonService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataCollectionClient.class)
public DataCollectionClient dataCollectionClient(AutoMaticClientConfigurationProperties properties) {
return new DataCollectionClient(AutoMaticClientFactory.createInstance(DataCollectionService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataUploadClient.class)
public DataUploadClient dataUploadClient(AutoMaticClientConfigurationProperties properties) {
return new DataUploadClient(AutoMaticClientFactory.createInstance(DataUploadService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
}
package com.zhiwei.middleware.automatic.configuration;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties("auto.matic.center.client")
public class AutoMaticClientConfigurationProperties {
private ApplicationConfig application;
private RegistryConfig registry;
private ConsumerConfig consumer;
public ApplicationConfig getApplication() {
return application;
}
public void setApplication(ApplicationConfig application) {
this.application = application;
}
public RegistryConfig getRegistry() {
return registry;
}
public void setRegistry(RegistryConfig registry) {
this.registry = registry;
}
public ConsumerConfig getConsumer() {
return consumer;
}
public void setConsumer(ConsumerConfig consumer) {
this.consumer = consumer;
}
}
org.springframework.boot.autoconfigure.EnableAutoConfiguration=com.zhiwei.middleware.automatic.configuration.AutoMaticClientConfiguration
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>middleware-automatic-center-client</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<curator.version>2.12.0</curator.version>
<base.version>2.0.0-SNAPSHOT</base.version>
<easyexcel.version>2.1.2</easyexcel.version>
<json.version>1.2.58</json.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/easyexcel -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>${easyexcel.version}</version>
<scope>provided</scope>
</dependency>
<!-- 日志依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-1.2-api -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
<version>${dubbo.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.base</groupId>
<artifactId>base-objects-application</artifactId>
<version>${base.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public class AutoMaticClient {
private final AutoMaticService autoMaticService;
public AutoMaticClient(AutoMaticService autoMaticService) {
this.autoMaticService = autoMaticService;
}
public void autoMark(List<MarkInfo> infos) {
autoMaticService.autoMark(infos);
}
public void autoMarkMulti(List<MarkInfoMulti> infos) {
autoMaticService.autoMarkMulti(infos);
}
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
return autoMaticService.modifyTemplateTitle(group, templateTitle, fixTag);
}
public List<String> getMupdateByTemplateTitle(String group, String templateTitle) {
return autoMaticService.getMupdateByTemplateTitle(group, templateTitle);
}
public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) {
return autoMaticService.tryGetTemplateTitleByMupdate(group, title, mupdate);
}
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return autoMaticService.compareWithTemplateTileOL(project, title);
}
public boolean resetTemplate(String group, String templateTitle) {
return autoMaticService.resetTemplate(group, templateTitle);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import org.apache.dubbo.common.utils.StringUtils;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.ReferenceConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.locks.ReentrantLock;
import static java.util.Objects.*;
import static java.util.Objects.isNull;
public class AutoMaticClientFactory {
private static final Logger logger = LogManager.getLogger(AutoMaticClientFactory.class);
private static final ReentrantLock lock = new ReentrantLock();
private static final Map<String, ReferenceConfig<?>> REFERENCES = new HashMap<>();
private AutoMaticClientFactory() {
}
public static <T> T createInstance(Class<T> clazz, ApplicationConfig application, RegistryConfig registry, ConsumerConfig consumer) {
lock.lock();
try {
if (isNull(application)) {
throw new NullPointerException("获取dubbo配置文件失败");
}
// 检查参数 (zookeeper地址,服务端名字)
requireNonNull(registry);
requireNonNull(consumer);
// 生成唯一缓存 key
String[] cacheAdd = { clazz.getName(),registry.getGroup(),registry.getAddress(),consumer.getGroup(),consumer.getVersion()};
String cacheKey = StringUtils.join(cacheAdd,"|");
ReferenceConfig<?> reference = REFERENCES.get(cacheKey);
if (nonNull(reference)) {
logger.info("{}实例已存在,返回复用实例", clazz.getSimpleName());
return (T) reference.get();
}
if (isNull(application.getQosEnable())) {
// 如果没有配置 Qos,则默认关闭 Qos
application.setQosEnable(false);
}
reference = new ReferenceConfig<>();
reference.setApplication(application);
//向注册中心注册
registry.setTimeout(600000);
reference.setRegistry(registry);
if (isNull(consumer.isCheck())) {
// 如果消费者没有配置检查,则默认不检查
consumer.setCheck(false);
// reference 没有配置时会使用 consumer 配置
reference.setCheck(false);
}
// 设置消费者配置
consumer.setTimeout(600000);
reference.setConsumer(consumer);
reference.setInterface(clazz);
REFERENCES.put(cacheKey, reference);
//获取目标接口
return (T) reference.get();
} catch (Exception e) {
logger.error("创建{}实例出错", clazz.getName(), e);
} finally {
lock.unlock();
}
return null;
}
public static <T> T createInstance(Class<T> clazz, String registry, String group, String appName) {
ApplicationConfig application = new ApplicationConfig();
RegistryConfig reg = new RegistryConfig(registry);
application.setName(appName);
ConsumerConfig consumer = new ConsumerConfig();
consumer.setGroup(group);
return createInstance(clazz, application, reg, consumer);
}
/**
* 获取自动标注client
* @param autoMaticService 代理接口
* @return 自动标注client
*/
public static AutoMaticClient getAutoMaticClient(AutoMaticService autoMaticService) {
return new AutoMaticClient(autoMaticService);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public class CommonClient {
private final CommonService commonService;
public CommonClient(CommonService commonService) {
this.commonService = commonService;
}
public String generateAggreeOrder() {
return commonService.generateAggreeOrder();
}
public boolean appendAggreeOrder(String id, List<AggreeDTO> list) {
return commonService.appendAggreeOrder(id, list);
}
public boolean startAggree(String id) {
return commonService.startAggree(id);
}
public boolean startAggree(String id, double limit) {
return commonService.startAggree(id, limit);
}
public CommonAggreeResult getAggreeResult(String id) {
return commonService.getAggreeResult(id);
}
public CommonAggreeResult getAggreeResult(String id, int page, int pageLimit) {
return commonService.getAggreeResult(id, page, pageLimit);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import java.util.List;
import java.util.Map;
public class DataCollectionClient {
private final DataCollectionService dataCollectionService;
public DataCollectionClient(DataCollectionService dataCollectionService) {
this.dataCollectionService = dataCollectionService;
}
public void cleanCache(String group, String id) {
dataCollectionService.cleanCache(group, id);
}
public void cleanCacheExceptNoise(String group, String id) {
dataCollectionService.cleanCacheExceptNoise(group, id);
}
public void addDataCollection(String group, String id, List<String> compressedlist) {
dataCollectionService.addDataCollection(group, id, compressedlist);
}
public void startAggree(String group, String id, String highWords) {
dataCollectionService.startAggree(group, id, highWords);
}
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.batchModifyFatherTag(group, id, fatherIds, mtag, mperson, typeB);
}
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollectionService.modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.modifySonTag(group, id, fatherId, sonId, mtag, mperson, typeB);
}
public boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.throwIntoNoise(group, id, fatherId, typeB);
}
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB) {
return dataCollectionService.batchThrowIntoNoise(group, id, fatherIds, typeB);
}
public boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.restoreFromNoise(group, id, fatherId, typeB);
}
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getNoiseFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getNoiseSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public void checkedThenInsert(String group, String id) {
dataCollectionService.checkedThenInsert(group, id);
}
public int getAggreResultNow(String group, String id) {
return dataCollectionService.getAggreResultNow(group, id);
}
public int getInsertResultNow(String group, String id) {
return dataCollectionService.getInsertResultNow(group, id);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public class DataUploadClient {
private final DataUploadService dataUploadService;
public DataUploadClient(DataUploadService dataUploadService) {
this.dataUploadService = dataUploadService;
}
public void addUploadList(String group, String id, String sourceStr) {
dataUploadService.addUploadList(group, id, sourceStr);
}
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType) {
dataUploadService.startUpload(group, id, mperson, mtagType, filterType, projectId, insertType);
}
public Map<String, Object> getUploadStatus(String group, String id) {
return dataUploadService.getUploadStatus(group, id);
}
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType) {
return dataUploadService.getUploadInfoList(group, id, page, size, isAsc, searchField, keyword, uploadType);
}
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
return dataUploadService.getDataType(json, typeB);
}
public void cleanUploadResult(String group, String id) {
dataUploadService.cleanUploadResult(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public interface AutoMaticService {
void autoMark(List<MarkInfo> markInfos);
void autoMarkMulti(List<MarkInfoMulti> markInfoMultis);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 根据模板标题获取数据(仅最新100条)
*
* @param group 项目
* @param templateTitle 模板标题
* @return 特征值
*/
List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/**
* 根据标题和特征值尝试搜索模板标题
*
* @param group 项目
* @param title 标题
* @param mupdate 特征值
* @return 模板标题
*/
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/**
* 根据项目组和标题在线匹配已有聚合标题
*
* @param project 项目
* @param title 标题
* @return 返回值
*/
public Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB.TypeB;
import java.util.List;
import java.util.Map;
/**
* @ClassName: DataCollectionService
* @Description: 数据采集模块服务
* @author SJJ
* @date 2020年4月7日 下午3:02:05
*/
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group
* @param id
*/
public void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group
* @param id
*/
public void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group
* @param id
*/
public void addDataCollection(String group, String id, List<String> compressedlist);
/**
* 启动聚合
*
* @param group
* @param id
*/
public void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherIds
* @param mtag
* @param mperson
* @param typeB
* @return
*/
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean throwIntoNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean restoreFromNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
public void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
public int getInsertResultNow(String group, String id);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
/**
* @ClassName: DataUploadService
* @Description: 数据上传服务
* @author SJJ
* @date 2020年2月25日 下午6:02:26
*/
public interface DataUploadService {
/**
* 添加源数据集
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*
* @param group
* @param id
* @param mperson
*
* @return void
*/
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType);
/**
* 获取上传状态(进度)
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param searchField
* @param keyword
* @param uploadType
* @return
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
public UploadInfo.DataType getDataType(JSONObject json, TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
public void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.graphs;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
/**
*
* @ClassName: Graphs
* @Description: 关键词匹配图
* @author SJJ
* @date 2020年8月11日18:31:57
*/
public class Graphs {
/** 内置图指针 **/
protected Map<Character, Node<Character>> innerPoint = new HashMap<>();
/**
* 添加内置图指针
*
* @param keywords 关键字集
*/
public void addGraph(List<String> keywords) {
if (null == keywords) {
return;
}
keywords.forEach(this::addGraph);
}
/**
* 添加内置图指针
*
* @param keyword 关键字
*/
public synchronized void addGraph(String keyword) {
if (StringUtils.isEmpty(keyword)) {
return;
}
keyword = StringUtils.lowerCase(keyword);
// 图指针 头节点
Map<Character, Node<Character>> point = innerPoint;
for (int i = 0; i < keyword.length(); i++) {
char c = keyword.charAt(i);
// 搜索当前指向的子图是否包含节点
Node<Character> node = point.computeIfAbsent(c, k -> {
Node<Character> newNode = new Node<>();
newNode.setKey(c);
return newNode;
});
// 完整关键字标识
if (i == keyword.length() - 1) {
node.setEnd(true);
} else {
if (node.getNext() == null) {
node.setNext(new HashMap<>());
}
point = node.getNext();
}
}
}
/**
* 搜索匹配结果
*
* @param text
* @return
*/
public List<Keyword> find(String text) {
List<Keyword> keywords = new LinkedList<>();
if (StringUtils.isEmpty(text)) {
return keywords;
}
text = StringUtils.lowerCase(text);
// 迭代器,匹配图第一层级节点信息,一直往下迭代
Map<Integer, Map<Character, Node<Character>>> points = new TreeMap<>();
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
// 2.1 第一层级字符子图
Iterator<Map.Entry<Integer, Map<Character, Node<Character>>>> iterator = points.entrySet().iterator();
while (iterator.hasNext()) {
// 文本开始,子图指针
Map.Entry<Integer, Map<Character, Node<Character>>> entry = iterator.next();
Node<Character> node = entry.getValue().get(c);
// 2.2 无法连续命中 匹配失败
if (null == node) {
iterator.remove();
continue;
}
// 2.3 一条路径结尾 (关键字匹配必须是连续命中字符,一直到命中一条完整的关键字)
if (node.isEnd()) {
Keyword keyword = new Keyword();
keyword.setStart(entry.getKey());
keyword.setEnd(i);
keyword.setKey(text.substring(keyword.getStart(), keyword.getEnd() + 1));
keywords.add(keyword);
}
// 2.4 路径彻底结束(存在词语完全包含的情况,AB,ABC)
if (node.getNext() == null) {
iterator.remove();
} else {
// 2.5 继续迭代,直到无法连续命中子节点为止
entry.setValue(node.getNext());
}
}
// 1.1入口:字符命中第一层级,添加子图 开始向下探索
Node<Character> node = innerPoint.get(c);
if (null == node) {
continue;
}
// 2020/7/24 10:19 支持单字符匹配
if (node.isEnd()) {
Keyword keyword = new Keyword();
keyword.setStart(i);
keyword.setEnd(i);
keyword.setKey(text.substring(keyword.getStart(), keyword.getEnd() + 1));
keywords.add(keyword);
}
// 1.2 添加迭代器,向下探索
if (null != node.getNext()) {
points.put(i, node.getNext());
}
}
return keywords;
}
/**
* 转换成关键词-词频统计结果
*
* @param keywords 关键字
* @return 词频
*/
public Map<String, Integer> change2Statistics(List<Keyword> keywords) {
Map<String, Integer> res = new HashMap<>();
keywords.forEach(keyword -> {
String k = StringUtils.lowerCase(keyword.getKey());
res.putIfAbsent(k, 0);
res.put(k, res.get(k) + 1);
});
return res;
}
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.List;
import java.util.Map;
public interface GraphsServer<T, O> {
void addGraph(List<T> t);
List<O> find(String text);
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.io.Serializable;
public class Keyword implements Serializable {
private static final long serialVersionUID = 6917681073354631602L;
private String key;
private int start;
private int end;
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
Keyword keyword = (Keyword) o;
if (start != keyword.start)
return false;
return end == keyword.end;
}
@Override
public int hashCode() {
int result = start;
result = 31 * result + end;
return result;
}
@Override
public String toString() {
return "Keyword{key=" + key + ", start=" + start + ", end=" +
end + '}';
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.graphs;
import com.zhiwei.middleware.automatic.server.pojo.GroupTerm;
import com.zhiwei.middleware.automatic.server.pojo.MonitorKeyword;
import com.zhiwei.middleware.automatic.server.pojo.QbjcRuleMatchedInfo;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
public class MonitorGraphsImpl implements GraphsServer<MonitorKeyword, QbjcRuleMatchedInfo> {
/**
* 匹配图
*/
private Graphs graphs;
/**
* 匹配图对应的绑定信息
*/
private Map<String, List<GroupTerm>> terms;
public MonitorGraphsImpl() {
this.graphs = new Graphs();
this.terms = new HashMap<>();
}
@Override
public void addGraph(List<MonitorKeyword> monitorKeyword) {
Graphs tempGraphs = new Graphs();
Map<String, List<GroupTerm>> tempTerms = new HashMap<>();
monitorKeyword.forEach(keyword -> {
if (null != keyword.getMonitorLevel()) {
preGraphs(keyword.getKeywords(), tempGraphs);
preTerms(keyword, tempTerms);
}
});
graphs = tempGraphs;
terms = tempTerms;
}
@Override
public List<QbjcRuleMatchedInfo> find(String text) {
text = StringUtils.lowerCase(text);
// 返回值
List<QbjcRuleMatchedInfo> res = new ArrayList<>();
// 匹配
List<Keyword> results = graphs.find(text);
// 根据Keyword:key 统计分组
Map<String, List<Keyword>> kResults = results.stream().collect(Collectors.groupingBy(Keyword::getKey));
// 已统计列表,防止A&B 被统计两次
Set<String> hasMatched = new HashSet<>();
// 统计结果
Map<String, Integer> statis = graphs.change2Statistics(results);
statis.forEach((keyword, rate) -> {
Set<Keyword> hitKeywords = new HashSet<>();
List<GroupTerm> list = terms.get(keyword);
if (null == list) {
throw new IllegalStateException("keyword不存在:" + keyword);
}
// 可能符合的结果
list.forEach(groupTerm -> {
MonitorKeyword monitorKeyword = groupTerm.getMonitorKeyword();
int count = -1;
for (String checkWord : groupTerm.getAndKeywords()) {
// 已统计过,跳过该词组
if (hasMatched.contains(checkWord)) {
count = -1;
break;
}
int current = statis.getOrDefault(checkWord, 0);
if (current > 0) {
hitKeywords.addAll(kResults.get(checkWord));
}
// 初次重置或大于最小值
count = (count == -1 || current < count) ? current : count;
}
// 大于0判定为命中
if (count > 0) {
QbjcRuleMatchedInfo ruleMatchedInfo = new QbjcRuleMatchedInfo();
// 设置基本信息
ruleMatchedInfo.setId(monitorKeyword.getId());
ruleMatchedInfo.setProject(monitorKeyword.getProject());
ruleMatchedInfo.setRuleType(QbjcRuleMatchedInfo.RuleType.getByName(monitorKeyword.getType()));
ruleMatchedInfo.setChannels(monitorKeyword.getChannels());
ruleMatchedInfo.setPlatforms(monitorKeyword.getPlatforms());
ruleMatchedInfo.setMonitorLevel(monitorKeyword.getMonitorLevel());
// 设置匹配关键词相关信息
List<QbjcRuleMatchedInfo.HitInfo> infos = new ArrayList<>();
infos.add(new QbjcRuleMatchedInfo.HitInfo(hitKeywords, groupTerm.getFullName(), count));
ruleMatchedInfo.setHitInfos(infos);
res.add(ruleMatchedInfo);
}
});
hasMatched.add(keyword);
});
return res;
}
/**
* 添加词关联表
*/
private void preTerms(MonitorKeyword monitorKeyword, Map<String, List<GroupTerm>> terms) {
List<String> usedKeywords = monitorKeyword.getKeywords();
// 解析字词
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
// 拆分或逻辑(都可以作为主键)
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
String[] ands = andStr.trim().split(" +");
for (String str : ands) {
// 初次创建
terms.putIfAbsent(str, new ArrayList<>());
// 添加对应节点
terms.get(str).add(new GroupTerm(Arrays.asList(ands), usedKeyword, monitorKeyword));
}
}
});
}
/**
* 添加关键字表
*
* @param usedKeywords void
*/
private void preGraphs(List<String> usedKeywords, Graphs graphs) {
// 拆分关键词到子
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
for (String str : andStr.trim().split(" +")) {
graphs.addGraph(str);
}
}
});
}
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.Map;
public class Node<K> {
/** 主键 **/
private K key;
/** 是否满足任一路径完结的条件 **/
private boolean end;
/** 下个节点,为null则表示彻底完结 **/
private Map<K, Node<K>> next;
public K getKey() {
return key;
}
public void setKey(K key) {
this.key = key;
}
public boolean isEnd() {
return end;
}
public void setEnd(boolean end) {
this.end = end;
}
public Map<K, Node<K>> getNext() {
return next;
}
public void setNext(Map<K, Node<K>> next) {
this.next = next;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
public class AggreeResult implements Serializable {
private static final long serialVersionUID = 8971968054515154622L;
private Status status;
private int totalPage;
private Map<String, List<Integer>> data;
public AggreeResult(Status status) {
this.status = status;
}
public Status getStatus() {
return this.status;
}
public int getTotalPage() {
return this.totalPage;
}
public Map<String, List<Integer>> getData() {
return this.data;
}
public void setStatus(Status status) {
this.status = status;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}
public void setData(Map<String, List<Integer>> data) {
this.data = data;
}
public AggreeResult(Status status, int totalPage, Map<String, List<Integer>> data) {
this.status = status;
this.totalPage = totalPage;
this.data = data;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.io.Serializable;
import java.util.List;
public class CommonAggreeResult implements Serializable {
private static final long serialVersionUID = 8971968054515154622L;
private Status status;
private PageData<CommonAggreeResult.ResultInfo> results;
public CommonAggreeResult(Status status) {
this.status = status;
}
public Status getStatus() {
return this.status;
}
public PageData<CommonAggreeResult.ResultInfo> getResults() {
return this.results;
}
public void setStatus(Status status) {
this.status = status;
}
public void setResults(PageData<CommonAggreeResult.ResultInfo> results) {
this.results = results;
}
public CommonAggreeResult(Status status, PageData<CommonAggreeResult.ResultInfo> results) {
this.status = status;
this.results = results;
}
public static class ResultInfo implements Serializable {
private static final long serialVersionUID = -3656509880731033198L;
private String clusterName;
private Integer size;
private List<AggreeDTO> indexes;
private AggreeDTO templateData;
public String getClusterName() {
return this.clusterName;
}
public Integer getSize() {
return this.size;
}
public List<AggreeDTO> getIndexes() {
return this.indexes;
}
public AggreeDTO getTemplateData() {
return this.templateData;
}
public void setClusterName(String clusterName) {
this.clusterName = clusterName;
}
public void setSize(Integer size) {
this.size = size;
}
public void setIndexes(List<AggreeDTO> indexes) {
this.indexes = indexes;
}
public void setTemplateData(AggreeDTO templateData) {
this.templateData = templateData;
}
public boolean equals(Object o) {
if (o == this) {
return true;
} else if (!(o instanceof CommonAggreeResult.ResultInfo)) {
return false;
} else {
CommonAggreeResult.ResultInfo other = (CommonAggreeResult.ResultInfo)o;
if (!other.canEqual(this)) {
return false;
} else {
label59: {
Object this$clusterName = this.getClusterName();
Object other$clusterName = other.getClusterName();
if (this$clusterName == null) {
if (other$clusterName == null) {
break label59;
}
} else if (this$clusterName.equals(other$clusterName)) {
break label59;
}
return false;
}
Object this$size = this.getSize();
Object other$size = other.getSize();
if (this$size == null) {
if (other$size != null) {
return false;
}
} else if (!this$size.equals(other$size)) {
return false;
}
Object this$indexes = this.getIndexes();
Object other$indexes = other.getIndexes();
if (this$indexes == null) {
if (other$indexes != null) {
return false;
}
} else if (!this$indexes.equals(other$indexes)) {
return false;
}
Object this$templateData = this.getTemplateData();
Object other$templateData = other.getTemplateData();
if (this$templateData == null) {
if (other$templateData != null) {
return false;
}
} else if (!this$templateData.equals(other$templateData)) {
return false;
}
return true;
}
}
}
protected boolean canEqual(Object other) {
return other instanceof CommonAggreeResult.ResultInfo;
}
public String toString() {
return "CommonAggreeResult.ResultInfo(clusterName=" + this.getClusterName() + ", size=" + this.getSize() + ", indexes=" + this.getIndexes() + ", templateData=" + this.getTemplateData() + ")";
}
public ResultInfo(String clusterName, Integer size, List<AggreeDTO> indexes, AggreeDTO templateData) {
this.clusterName = clusterName;
this.size = size;
this.indexes = indexes;
this.templateData = templateData;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.util.List;
public class GroupTerm {
/** 组合关键词 **/
private List<String> andKeywords;
/** 关键词完整名 **/
private String fullName;
/** 隶属监测关键词 **/
private MonitorKeyword monitorKeyword;
public GroupTerm(List<String> asList, String usedKeyword, MonitorKeyword monitorKeyword) {
this.andKeywords = asList;
this.fullName = usedKeyword;
this.monitorKeyword = monitorKeyword;
}
public List<String> getAndKeywords() {
return andKeywords;
}
public void setAndKeywords(List<String> andKeywords) {
this.andKeywords = andKeywords;
}
public String getFullName() {
return fullName;
}
public void setFullName(String fullName) {
this.fullName = fullName;
}
public MonitorKeyword getMonitorKeyword() {
return monitorKeyword;
}
public void setMonitorKeyword(MonitorKeyword monitorKeyword) {
this.monitorKeyword = monitorKeyword;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import java.io.Serializable;
import java.util.List;
public class MarkInfoMulti implements Serializable {
private static final long serialVersionUID = 124627162986379948L;
private MarkInfo markInfo;
private List<String> projects;
public MarkInfoMulti(MarkInfo markInfo, List<String> projects) {
this.markInfo = markInfo;
this.projects = projects;
}
public MarkInfo getMarkInfo() {
return markInfo;
}
public List<String> getProjects() {
return projects;
}
public void setMarkInfo(MarkInfo markInfo) {
this.markInfo = markInfo;
}
public void setProjects(List<String> projects) {
this.projects = projects;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
/**
* Description:
*
* @author LiuMingHuan
* @classname MarkUploadInfo
* @date 2019/10/29
*/
public class MarkUploadInfo {
@ExcelProperty(value = "序号", index = 0)
@ColumnWidth(5)
private String id;
@ExcelProperty(value = "时间", index = 1)
@ColumnWidth(18)
private String time;
@ExcelProperty(value = "标题", index = 2)
@ColumnWidth(35)
private String title;
@ExcelProperty(value = "文本", index = 3)
@ColumnWidth(35)
private String content;
@ExcelProperty(value = "地址", index = 4)
@ColumnWidth(15)
private String url;
/**
* 固定平台
*/
@ExcelProperty(value = "平台", index = 5)
@ColumnWidth(10)
private String platform;
/**
* 信源 APP\PC\MOBI
*/
@ExcelProperty(value = "信源", index = 6)
@ColumnWidth(10)
private String originInfo;
/**
* c4 name realSource
*/
@ExcelProperty(value = "来源", index = 7)
@ColumnWidth(10)
private String clientFrom;
/**
* 来源 source
*/
@ExcelProperty(value = "渠道", index = 8)
@ColumnWidth(10)
private String source;
@ExcelProperty(value = "级别", index = 9)
@ColumnWidth(5)
private String level;
@ExcelProperty(value = "采编权", index = 10)
@ColumnWidth(5)
private String acquisitionRights;
@ExcelProperty(value = "领域", index = 11)
@ColumnWidth(5)
private String field;
@ExcelProperty(value = "认证信息", index = 12)
@ColumnWidth(10)
private String authenticationInformation;
@ExcelProperty(value = "粉丝数", index = 13)
@ColumnWidth(10)
private String fans;
@ExcelProperty(value = "认证类型", index = 14)
@ColumnWidth(10)
private String authenticationType;
@ExcelProperty(value = "是否原发", index = 15)
@ColumnWidth(10)
private String primary;
@ExcelProperty(value = "原创作者", index = 16)
@ColumnWidth(10)
private String rootSource;
@ExcelProperty(value = "原创文本", index = 17)
@ColumnWidth(10)
private String rootContent;
@ExcelProperty(value = "命中词", index = 18)
@ColumnWidth(10)
private String keywords;
@ExcelProperty(value = "UID", index = 19)
@ColumnWidth(7)
private String uid;
@ExcelProperty(value = "MID", index = 20)
@ColumnWidth(7)
private String mid;
@ExcelProperty(value = "标注时间", index = 21)
@ColumnWidth(10)
private String mtime;
@ExcelProperty(value = "标注人", index = 22)
@ColumnWidth(7)
private String mperson;
@ExcelProperty(value = "标签一", index = 23)
@ColumnWidth(10)
private String mtagOne;
@ExcelProperty(value = "标签二", index = 24)
@ColumnWidth(10)
private String mtagTwo;
@ExcelProperty(value = "标签三", index = 25)
@ColumnWidth(10)
private String mtagThree;
@ExcelProperty(value = "标签四", index = 26)
@ColumnWidth(10)
private String mtagFour;
@ExcelProperty(value = "标签五", index = 27)
@ColumnWidth(10)
private String mtagFive;
@ExcelProperty(value = "ctime", index = 28)
@ColumnWidth(7)
private Long ctime;
@ExcelProperty(value = "cname", index = 29)
@ColumnWidth(7)
private String cname;
@ExcelProperty(value = "c1", index = 30)
@ColumnWidth(7)
private Integer c1;
@ExcelProperty(value = "c2", index = 31)
@ColumnWidth(7)
private Integer c2;
@ExcelProperty(value = "c3", index = 32)
@ColumnWidth(7)
private Integer c3;
@ExcelProperty(value = "c4", index = 33)
@ColumnWidth(7)
private Integer c4;
@ExcelProperty(value = "c5", index = 34)
@ColumnWidth(7)
private Integer c5;
@ExcelProperty(value = "origin", index = 35)
@ColumnWidth(7)
private String origin;
@ExcelProperty(value = "foreign", index = 36)
@ColumnWidth(7)
private Integer foreign;
@ExcelProperty(value = "cid", index = 37)
@ColumnWidth(7)
private Long cid;
@ExcelProperty(value = "realSource", index = 38)
@ColumnWidth(7)
private String realSource;
@ExcelProperty(value = "mtag", index = 39)
@ColumnWidth(7)
private String mtag;
private String mgroup;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getPlatform() {
return platform;
}
public void setPlatform(String platform) {
this.platform = platform;
}
public String getOriginInfo() {
return originInfo;
}
public void setOriginInfo(String originInfo) {
this.originInfo = originInfo;
}
public String getClientFrom() {
return clientFrom;
}
public void setClientFrom(String clientFrom) {
this.clientFrom = clientFrom;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getAcquisitionRights() {
return acquisitionRights;
}
public void setAcquisitionRights(String acquisitionRights) {
this.acquisitionRights = acquisitionRights;
}
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
}
public String getAuthenticationInformation() {
return authenticationInformation;
}
public void setAuthenticationInformation(String authenticationInformation) {
this.authenticationInformation = authenticationInformation;
}
public String getFans() {
return fans;
}
public void setFans(String fans) {
this.fans = fans;
}
public String getAuthenticationType() {
return authenticationType;
}
public void setAuthenticationType(String authenticationType) {
this.authenticationType = authenticationType;
}
public String getPrimary() {
return primary;
}
public void setPrimary(String primary) {
this.primary = primary;
}
public String getRootSource() {
return rootSource;
}
public void setRootSource(String rootSource) {
this.rootSource = rootSource;
}
public String getRootContent() {
return rootContent;
}
public void setRootContent(String rootContent) {
this.rootContent = rootContent;
}
public String getKeywords() {
return keywords;
}
public void setKeywords(String keywords) {
this.keywords = keywords;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getMid() {
return mid;
}
public void setMid(String mid) {
this.mid = mid;
}
public String getMtime() {
return mtime;
}
public void setMtime(String mtime) {
this.mtime = mtime;
}
public String getMperson() {
return mperson;
}
public void setMperson(String mperson) {
this.mperson = mperson;
}
public String getMtagOne() {
return mtagOne;
}
public void setMtagOne(String mtagOne) {
this.mtagOne = mtagOne;
}
public String getMtagTwo() {
return mtagTwo;
}
public void setMtagTwo(String mtagTwo) {
this.mtagTwo = mtagTwo;
}
public String getMtagThree() {
return mtagThree;
}
public void setMtagThree(String mtagThree) {
this.mtagThree = mtagThree;
}
public String getMtagFour() {
return mtagFour;
}
public void setMtagFour(String mtagFour) {
this.mtagFour = mtagFour;
}
public String getMtagFive() {
return mtagFive;
}
public void setMtagFive(String mtagFive) {
this.mtagFive = mtagFive;
}
public Long getCtime() {
return ctime;
}
public void setCtime(Long ctime) {
this.ctime = ctime;
}
public String getCname() {
return cname;
}
public void setCname(String cname) {
this.cname = cname;
}
public Integer getC1() {
return c1;
}
public void setC1(Integer c1) {
this.c1 = c1;
}
public Integer getC2() {
return c2;
}
public void setC2(Integer c2) {
this.c2 = c2;
}
public Integer getC3() {
return c3;
}
public void setC3(Integer c3) {
this.c3 = c3;
}
public Integer getC4() {
return c4;
}
public void setC4(Integer c4) {
this.c4 = c4;
}
public Integer getC5() {
return c5;
}
public void setC5(Integer c5) {
this.c5 = c5;
}
public String getOrigin() {
return origin;
}
public void setOrigin(String origin) {
this.origin = origin;
}
public Integer getForeign() {
return foreign;
}
public void setForeign(Integer foreign) {
this.foreign = foreign;
}
public Long getCid() {
return cid;
}
public void setCid(Long cid) {
this.cid = cid;
}
public String getRealSource() {
return realSource;
}
public void setRealSource(String realSource) {
this.realSource = realSource;
}
public String getMtag() {
return mtag;
}
public void setMtag(String mtag) {
this.mtag = mtag;
}
public String getMgroup() {
return mgroup;
}
public void setMgroup(String mgroup) {
this.mgroup = mgroup;
}
public MarkUploadInfo(String id, String time, String title, String content, String url, String platform, String originInfo, String clientFrom, String source, String level, String acquisitionRights, String field, String authenticationInformation, String fans, String authenticationType, String primary, String rootSource, String rootContent, String keywords, String uid, String mid, String mtime, String mperson, String mtagOne, String mtagTwo, String mtagThree, String mtagFour, String mtagFive, Long ctime, String cname, Integer c1, Integer c2, Integer c3, Integer c4, Integer c5, String origin, Integer foreign, Long cid, String realSource, String mtag, String mgroup) {
this.id = id;
this.time = time;
this.title = title;
this.content = content;
this.url = url;
this.platform = platform;
this.originInfo = originInfo;
this.clientFrom = clientFrom;
this.source = source;
this.level = level;
this.acquisitionRights = acquisitionRights;
this.field = field;
this.authenticationInformation = authenticationInformation;
this.fans = fans;
this.authenticationType = authenticationType;
this.primary = primary;
this.rootSource = rootSource;
this.rootContent = rootContent;
this.keywords = keywords;
this.uid = uid;
this.mid = mid;
this.mtime = mtime;
this.mperson = mperson;
this.mtagOne = mtagOne;
this.mtagTwo = mtagTwo;
this.mtagThree = mtagThree;
this.mtagFour = mtagFour;
this.mtagFive = mtagFive;
this.ctime = ctime;
this.cname = cname;
this.c1 = c1;
this.c2 = c2;
this.c3 = c3;
this.c4 = c4;
this.c5 = c5;
this.origin = origin;
this.foreign = foreign;
this.cid = cid;
this.realSource = realSource;
this.mtag = mtag;
this.mgroup = mgroup;
}
public MarkUploadInfo() {}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
public class MarkUploadRule {
private String id;
private String group;
private String mperson;
private UploadInfo.MtagType mtagType;
private UploadInfo.FilterType filterType;
private String projectId;
private InsertType insertType;
public MarkUploadRule(String id, String group, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType,
String projectId, InsertType insertType) {
this.id = id;
this.group = group;
this.mperson = mperson;
this.mtagType = mtagType;
this.filterType = filterType;
this.projectId = projectId;
this.insertType = insertType;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getGroup() {
return group;
}
public void setGroup(String group) {
this.group = group;
}
public String getMperson() {
return mperson;
}
public void setMperson(String mperson) {
this.mperson = mperson;
}
public UploadInfo.MtagType getMtagType() {
return mtagType;
}
public void setMtagType(UploadInfo.MtagType mtagType) {
this.mtagType = mtagType;
}
public UploadInfo.FilterType getFilterType() {
return filterType;
}
public void setFilterType(UploadInfo.FilterType filterType) {
this.filterType = filterType;
}
public String getProjectId() {
return projectId;
}
public void setProjectId(String projectId) {
this.projectId = projectId;
}
public InsertType getInsertType() {
return insertType;
}
public void setInsertType(InsertType insertType) {
this.insertType = insertType;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.util.List;
public class MonitorKeyword {
private String id;
private String name;
private String type;
private List<String> keywords;
private List<String> channels;
private List<String> platforms;
private String project;
private MonitorLevel monitorLevel;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public List<String> getChannels() {
return channels;
}
public void setChannels(List<String> channels) {
this.channels = channels;
}
public List<String> getPlatforms() {
return platforms;
}
public void setPlatforms(List<String> platforms) {
this.platforms = platforms;
}
public String getProject() {
return project;
}
public void setProject(String project) {
this.project = project;
}
public MonitorLevel getMonitorLevel() {
return monitorLevel;
}
public void setMonitorLevel(MonitorLevel monitorLevel) {
this.monitorLevel = monitorLevel;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
/**
* @Description:监测分层等级
* @Author: shentao
* @Date: 2021/9/16 18:17
*/
public class MonitorLevel {
/**
* id ex.1111000000 1112000000 1113000000 1114000000 1115000000 1211000000
* 1212000000 1213000000 1214000000 1215000000 1311000000 1312000000 1313000000
* 1314000000 1315000000
*/
private Integer id;
/**
* level 分层 ex.红色I级、红色II级、红色III级……
*/
private String level;
/**
* levelOne 分类1层 ex. 红色 黄色 蓝色
*/
private String levelOne;
/**
* levelOneWeights 分层权重值 ex.1100000000 1200000000 1300000000
*/
private Integer levelOneWeights;
/**
* levelTwo 分类2层 ex. I级 II级 III级 IV级 V级
*/
private String levelTwo;
/**
* levelTwoWeights 分层权重值 ex.11000000 12000000 13000000 14000000 15000000
*/
private Integer levelTwoWeights;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getLevelOne() {
return levelOne;
}
public void setLevelOne(String levelOne) {
this.levelOne = levelOne;
}
public Integer getLevelOneWeights() {
return levelOneWeights;
}
public void setLevelOneWeights(Integer levelOneWeights) {
this.levelOneWeights = levelOneWeights;
}
public String getLevelTwo() {
return levelTwo;
}
public void setLevelTwo(String levelTwo) {
this.levelTwo = levelTwo;
}
public Integer getLevelTwoWeights() {
return levelTwoWeights;
}
public void setLevelTwoWeights(Integer levelTwoWeights) {
this.levelTwoWeights = levelTwoWeights;
}
public MonitorLevel(LevelOne levelOne, LevelTwo levelTwo) {
this.id = levelOne.weights + levelTwo.weights;
this.level = levelOne.name() + levelTwo.name();
this.levelOne = levelOne.name();
this.levelOneWeights = levelOne.weights;
this.levelTwo = levelTwo.name();
this.levelTwoWeights = levelTwo.weights;
}
/**
* 分层2
*/
public enum LevelTwo {
I(11000000), II(12000000), III(13000000), IV(14000000), V(15000000);
private Integer weights;
LevelTwo(Integer weights) {
this.weights = weights;
}
public Integer getWeights() {
return weights;
}
}
/**
* 分层1
*/
public enum LevelOne {
红色(1100000000), 黄色(1200000000), 蓝色(1300000000);
private Integer weights;
LevelOne(Integer weights) {
this.weights = weights;
}
public Integer getWeights() {
return weights;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
import java.util.List;
public class PageData<T extends Serializable> implements Serializable {
private static final long serialVersionUID = -9171451550170066449L;
private int page;
private int total;
private int totalPage;
private int pageLimit;
private List<T> list;
public int getPage() {
return this.page;
}
public int getTotal() {
return this.total;
}
public int getTotalPage() {
return this.totalPage;
}
public int getPageLimit() {
return this.pageLimit;
}
public List<T> getList() {
return this.list;
}
public void setPage(int page) {
this.page = page;
}
public void setTotal(int total) {
this.total = total;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}
public void setPageLimit(int pageLimit) {
this.pageLimit = pageLimit;
}
public void setList(List<T> list) {
this.list = list;
}
public PageData() {
}
public PageData(int page, int total, int totalPage, int pageLimit, List<T> list) {
this.page = page;
this.total = total;
this.totalPage = totalPage;
this.pageLimit = pageLimit;
this.list = list;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.graphs.Keyword;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class QbjcRuleMatchedInfo {
/** 唯一id(使用ruleId) **/
private String id;
/** 项目名 **/
private String project;
/** 规则类型 **/
private RuleType ruleType;
/** 渠道列表 **/
private List<String> channels;
/** 平台列表 **/
private List<String> platforms;
/** 监测等级 **/
private MonitorLevel monitorLevel;
/** 匹配id列表 **/
private Set<String> hitIds;
/** 匹配信息列表 **/
private List<HitInfo> hitInfos;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getProject() {
return project;
}
public void setProject(String project) {
this.project = project;
}
public RuleType getRuleType() {
return ruleType;
}
public void setRuleType(RuleType ruleType) {
this.ruleType = ruleType;
}
public List<String> getChannels() {
return channels;
}
public void setChannels(List<String> channels) {
this.channels = channels;
}
public List<String> getPlatforms() {
return platforms;
}
public void setPlatforms(List<String> platforms) {
this.platforms = platforms;
}
public MonitorLevel getMonitorLevel() {
return monitorLevel;
}
public void setMonitorLevel(MonitorLevel monitorLevel) {
this.monitorLevel = monitorLevel;
}
public Set<String> getHitIds() {
return hitIds;
}
public void setHitIds(Set<String> hitIds) {
this.hitIds = hitIds;
}
public List<HitInfo> getHitInfos() {
return hitInfos;
}
public void setHitInfos(List<HitInfo> hitInfos) {
this.hitInfos = hitInfos;
}
public QbjcRuleMatchedInfo appendhitIds() {
return appendhitIds(id);
}
public QbjcRuleMatchedInfo appendhitIds(String id) {
synchronized (this) {
if (null == hitIds) {
hitIds = new HashSet<>();
hitIds.add(this.id);
}
hitIds.add(id);
return this;
}
}
public static class HitInfo {
/** 命中的Keywords信息 **/
private Set<Keyword> hitKeywords;
/** 完整关键词名 **/
private String fullName;
/** 命中次数(取低值) **/
private int rate;
public HitInfo() {}
public HitInfo(Set<Keyword> hitKeywords, String fullName, int count) {
this.hitKeywords = hitKeywords;
this.fullName = fullName;
this.rate = count;
}
public Set<Keyword> getHitKeywords() {
return hitKeywords;
}
public void setHitKeywords(Set<Keyword> hitKeywords) {
this.hitKeywords = hitKeywords;
}
public String getFullName() {
return fullName;
}
public void setFullName(String fullName) {
this.fullName = fullName;
}
public int getRate() {
return rate;
}
public void setRate(int rate) {
this.rate = rate;
}
}
public enum RuleType {
KEYWORD("关键词"), CHANNEL("渠道");
private String name;
private static RuleType[] values = RuleType.values();
private RuleType(String name) {
this.name = name;
}
public String getName() {
return name;
}
public static RuleType getByName(String name) {
for (RuleType type : values) {
if (type.getName().equals(name)) {
return type;
}
}
return null;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
public enum Status {
START,
RUN,
END,
ERROR;
private Status() {
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.entity.CommonDO;
import java.io.Serializable;
/**
*
* @ClassName: UploadInfo
* @Description: 数据上传实体
* @author SJJ
* @date 2020年2月17日 上午10:19:08
*/
public class UploadInfo implements Serializable {
private static final long serialVersionUID = -1339177542820210256L;
/**
* 原始数据
*/
MarkUploadInfo originData;
/**
* 基础数据
*/
CompoundCommonDO compound;
/**
* 消息类型
*/
TypeB typeB;
/**
* 数据类型
*/
DataType dataType;
/**
* 大库上传结果
*/
Boolean dwResult;
/**
* 标注库上传结果
*/
Boolean markResult;
/**
* 是否异常操作
*/
boolean isError;
/**
* 错误描述
*/
String errorMsg;
public UploadInfo() {
}
public UploadInfo(MarkUploadInfo originData) {
this.originData = originData;
}
public UploadInfo(MarkUploadInfo originData, CompoundCommonDO compound, TypeB typeB) {
this.originData = originData;
this.compound = compound;
this.typeB = typeB;
}
public void setError(String errorMsg) {
this.errorMsg = errorMsg;
this.isError = true;
}
public void removeError() {
this.errorMsg = null;
this.isError = false;
}
public MarkUploadInfo getOriginData() {
return originData;
}
public void setOriginData(MarkUploadInfo originData) {
this.originData = originData;
}
public CompoundCommonDO getCompound() {
return compound;
}
public void setCompound(CompoundCommonDO compound) {
this.compound = compound;
}
public TypeB getTypeB() {
return typeB;
}
public void setTypeB(TypeB typeB) {
this.typeB = typeB;
}
public DataType getDataType() {
return dataType;
}
public void setDataType(DataType dataType) {
this.dataType = dataType;
}
public Boolean getDwResult() {
return dwResult;
}
public void setDwResult(Boolean dwResult) {
this.dwResult = dwResult;
}
public Boolean getMarkResult() {
return markResult;
}
public void setMarkResult(Boolean markResult) {
this.markResult = markResult;
}
public boolean isError() {
return isError;
}
public void setError(boolean error) {
isError = error;
}
public String getErrorMsg() {
return errorMsg;
}
public void setErrorMsg(String errorMsg) {
this.errorMsg = errorMsg;
}
public enum FilterType{
/** 过滤 **/
FILTER,
/** 过滤补全 **/
FILTER_COMPLETE;
}
public enum MtagType{
/** 覆盖历史标签 **/
INDEX,
/** 更新历史标签 **/
UPDATE
}
public enum DataType {
/** 库里暂未匹配到的新数据 **/
EXTERNAL,
/** 舆情库存在 **/
DW,
/** 标注库存在 **/
MARK;
}
public enum UploadType {
/** 格式校验错误 **/
FORMAR_ERROR,
/** 字段格式错误 **/
FIELD_ERROR,
/** 成功上传 **/
SUCCESS,
/** 失败上传 **/
FAILED;
}
public static class CompoundCommonDO implements Serializable {
private static final long serialVersionUID = -657894841924114949L;
CommonDO dw;
CommonDO mark;
public CommonDO getDw() {
return dw;
}
public void setDw(CommonDO dw) {
this.dw = dw;
}
public CommonDO getMark() {
return mark;
}
public void setMark(CommonDO mark) {
this.mark = mark;
}
public CompoundCommonDO() {}
public CompoundCommonDO(CommonDO dw, CommonDO mark) {
this.dw = dw;
this.mark = mark;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
public class UploadStatus {
int status = 0;
/**
* 共上传数据条数
*/
int totalCount;
/**
* 格式错误数
*/
int formatErrorCount;
/**
* 字段错误数
*/
int fieldErrorCount;
/**
* 成功数
*/
int successCount;
/**
* 失败数
*/
int failedCount;
public UploadStatus() {
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("共上传数据条数:");
sb.append(totalCount);
sb.append(",格式错误数:");
sb.append(formatErrorCount);
sb.append(",字段错误数:");
sb.append(fieldErrorCount);
sb.append(",成功数:");
sb.append(successCount);
sb.append(",失败数:");
sb.append(failedCount);
return sb.toString();
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public int getTotalCount() {
return totalCount;
}
public void setTotalCount(int totalCount) {
this.totalCount = totalCount;
}
public int getFormatErrorCount() {
return formatErrorCount;
}
public void setFormatErrorCount(int formatErrorCount) {
this.formatErrorCount = formatErrorCount;
}
public int getFieldErrorCount() {
return fieldErrorCount;
}
public void setFieldErrorCount(int fieldErrorCount) {
this.fieldErrorCount = fieldErrorCount;
}
public int getSuccessCount() {
return successCount;
}
public void setSuccessCount(int successCount) {
this.successCount = successCount;
}
public int getFailedCount() {
return failedCount;
}
public void setFailedCount(int failedCount) {
this.failedCount = failedCount;
}
}
package com.zhiwei.middleware.automatic.server.pojo.dto;
public class AggreeDTO {
private static final long serialVersionUID = -2649288545116289667L;
private String id;
private String text;
public String getId() {
return this.id;
}
public String getText() {
return this.text;
}
public void setId(String id) {
this.id = id;
}
public void setText(String text) {
this.text = text;
}
public AggreeDTO(String id, String text) {
this.id = id;
this.text = text;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum InsertType {
MARK,
ALL;
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum TemplateStatus {
运行中,
重置中,
已重置,
重置失败,
待删除;
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>middleware-automatic-center-server</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<properties>
<redisson.version>3.17.3</redisson.version>
<json.version>1.2.47</json.version>
<push-log.version>2.17.0-SNAPSHOT</push-log.version>
<curator.version>2.12.0</curator.version>
<es.version>7.9.2</es.version>
<es-client.version>0.0.4-SNAPSHOT</es-client.version>
<filter.version>1.1.6-SNAPSHOT</filter.version>
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
<dubbo-server.version>2.7.4.1</dubbo-server.version>
<automatic.version>1.0-SNAPSHOT</automatic.version>
<base.version>2.0.0-SNAPSHOT</base.version>
<marker.version>1.2.3-SNAPSHOT</marker.version>
<kafka.version>2.4.1.RELEASE</kafka.version>
</properties>
<dependencies>
<!-- kafka -->
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
<version>${kafka.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId>
<version>1.3.7-SNAPSHOT</version>
</dependency>
<!-- 标注客户端 -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>marker-client</artifactId>
<version>${marker.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei.base</groupId>
<artifactId>base-objects-application</artifactId>
<version>${base.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center-client</artifactId>
<version>${automatic.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei.nlp</groupId>
<artifactId>nlp-aggree</artifactId>
<version>${nlp-aggree.version}</version>
</dependency>
<!-- 日志依赖使用crawler-filter -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-filter</artifactId>
<version>${filter.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>qbjc-bean</artifactId>
<version>${qbjc-bean.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>es-client</artifactId>
<version>${es-client.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
<version>${spring-boot.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<!--日志整合-->
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>push-log</artifactId>
<version>${push-log.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<version>${spring-boot.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>${redisson.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework/spring-context -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.dubbo/dubbo-spring-boot-starter -->
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
<version>${dubbo-server.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<version>${dubbo-server.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>fastjson</artifactId>
<groupId>com.alibaba</groupId>
</exclusion>
<exclusion>
<artifactId>snakeyaml</artifactId>
<groupId>org.yaml</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.server;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class Server {
private static final Logger log = LogManager.getLogger(Server.class);
public static void main(String[] args) {
try {
SpringApplication.run(Server.class, args);
} catch (Exception e) {
System.out.println(e.getMessage());
}
log.info("时间:,自动标注中间件启动成功");
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
public interface BaseDataUploadService {
ClassB.TypeB getTypeB();
/**
* 通过文本搜索大库数据
*
* @param info 上传信息
* @return CommonDO
*/
CommonDO searchDwByContentNew(MarkUploadResult info);
/**
* 将上传表格实体转换为数据上传实体
*
* @param info 上传信息
* @return UploadInfo
* @throws Exception
*/
UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info)
throws Exception;
/**
* 构建url查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder urlSearchQuery(MarkUploadResult result);
/**
* 构建文本查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder textSearchQuery(MarkUploadResult result);
/**
* es数据转base实体
* @param hit es数据
* @return base实体
*/
CommonDO getCommonDOBySearchHit(SearchHit hit);
/**
* 标注markInfo转换
* @param result 标注结果
* @param mperson 标注人
* @param group 项目
* @param originMtag 标签
* @return MarkInfo
*/
MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag);
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.functional.*;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import java.util.*;
public class BulkTemplate<T> {
private static final Logger log = LogManager.getLogger(BulkTemplate.class);
private List<T> source;
private String state;
private boolean isNext;
private BoolQueryBuilder queryBuilder;
private Set<String> indexSet;
private Set<ClassB.TypeB> typeSet;
public BulkTemplate(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
public void bulkQuery(EsRowQuery<T> esRowQuery, EsIndex<T> esIndex, DataClassType<T> classType, UploadRowException<T> exception) {
for (T t : source) {
try {
this.queryBuilder.should(esRowQuery.rowQuery(t));
if (Objects.nonNull(esIndex)) {
this.indexSet.add(esIndex.getIndex(t));
}
if (Objects.nonNull(classType)) {
this.typeSet.add(classType.getClassType(t));
}
} catch (Exception e) {
exception.rowException(t, state + "-构建查询条件", e.getMessage());
}
}
this.isNext = true;
if (indexSet.isEmpty()) {
this.isNext = false;
log.error("批量操作-构建查询条件阶段 es索引为空");
}
}
public void searchCallback(Map<String, List<SearchHit>> hitMap, MarkUploadRule rule, RowKey<T> rowKey, DataMerge<T> dataMerge, UploadRowException<T> exception) {
if (!isNext) {
return;
}
for (T t : source) {
try {
String key = rowKey.getRowKey(t);
dataMerge.dataMerge(hitMap.get(key), t, rule);
} catch (Exception e) {
exception.rowException(t, state + "-es数据合并", e.getMessage());
}
}
}
public Set<String> getIndexSet() {
return indexSet;
}
public Set<ClassB.TypeB> getTypeSet() {
return typeSet;
}
public BoolQueryBuilder getQueryBuilder() {
return queryBuilder;
}
public List<T> getSource() {
return source;
}
public void clean(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.ValueFilter;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import java.util.ArrayList;
import java.util.List;
public class DataUploadCommon {
public ClassB.TypeB getTypeB() {
return typeB;
}
public Class<? extends CommonDO> getDwClazz() {
return dwClazz;
}
public Class<? extends CommonDO> getMarkClazz() {
return markClazz;
}
public DubboHandler getDubboHandler() {
return dubboHandler;
}
private final ClassB.TypeB typeB;
private final Class<? extends CommonDO> dwClazz;
private final Class<? extends CommonDO> markClazz;
private final DubboHandler dubboHandler;
public DataUploadCommon(ClassB.TypeB typeB, Class<? extends CommonDO> dwClazz, Class<? extends CommonDO> markClazz,
DubboHandler dubboHandler) {
this.typeB = typeB;
this.dwClazz = dwClazz;
this.markClazz = markClazz;
this.dubboHandler = dubboHandler;
}
/**
* 获取数据类型
*
* @param info
* @return DataType
*/
public final UploadInfo.DataType getDataType(UploadInfo info) throws Exception {
UploadInfo.DataType dataType = UploadInfo.DataType.EXTERNAL;
// 标注库是否存在
if (dubboHandler.contains(info.getCompound().getMark().filterInfo())) {
dataType = UploadInfo.DataType.MARK;
// 舆情库是否存在
} else if (dubboHandler.contains(info.getCompound().getDw().filterInfo())) {
dataType = UploadInfo.DataType.DW;
}
info.setDataType(dataType);
return dataType;
}
/**
* 转换为markCommon
*
* @param dw
* @param mgroup
* @return CommonDO
*/
public final CommonDO convert2Mark(CommonDO dw, String mgroup) {
JSONObject json = dw.toJSON();
json.put(GenericAttribute.ES_M_GROUP, mgroup);
return JSONObject.parseObject(json.toJSONString(), markClazz);
}
public static BoolQueryBuilder urlQuery(String url, String urlName) {
List<String> urls = new ArrayList<>(2);
urls.add(url);
if (url.contains("https:")) {
urls.add(url.replaceFirst("https", "http"));
} else if (url.contains("http")) {
urls.add(url.replaceFirst("http", "https"));
}
if (url.contains("toutiao.com")) {
String pattern = "[\\d]+";
List<String> result = Tools.patternMatchFind(url, pattern);
if (!result.isEmpty()) {
String toutiaoSuffix = result.get(0);
url = "https://www.toutiao.com/a" + toutiaoSuffix;
urls.add(url);
}
}
BoolQueryBuilder bool = QueryBuilders.boolQuery();
urls.forEach(value -> {
bool.should(QueryBuilders.termQuery(urlName, value));
});
return bool;
}
/**
* 补充可能缺失的必要字段
*
* @param commonDO
* @param mperson
* @return CommonDO
*/
public CommonDO addDefault(CommonDO commonDO, String mperson, String mgroup, String originTag,
String mtag, Class<? extends CommonDO> clazz) {
JSONObject json = commonDO.toJSON();
// 未有ctime,cid,cname作补充
if (null == json.get(GenericAttribute.ES_C_TIME)) {
json.put(GenericAttribute.ES_C_TIME, System.currentTimeMillis());
}
Long cid = json.getLong(GenericAttribute.ES_CID);
if (null == cid || -1 == cid) {
json.put(GenericAttribute.ES_CID, GenericAttribute.ES_CID_DEFAULT);
}
if (!json.containsKey(GenericAttribute.ES_C_NAME)) {
json.put(GenericAttribute.ES_C_NAME, GenericAttribute.AUTO_CNAME);
}
json.put(GenericAttribute.ES_M_GROUP, mgroup);
json.put(GenericAttribute.ES_M_PERSON, mperson);
json.put(GenericAttribute.ES_M_TAG, Tools.partialUpdateTag(originTag, mtag));
ValueFilter filter = (o, n, v) -> {
if ("".equals(v)) {
return null;
}
return v;
};
return JSONObject.parseObject(JSON.toJSONString(json, filter), clazz);
}
}
package com.zhiwei.middleware.automatic.server.base;
public class FieldErrorException extends Exception {
private static final long serialVersionUID = 6671756541874479047L;
public FieldErrorException(String msg) {
super(msg);
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.category.ClassCodec;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.functional.FunctionalImpl;
import com.zhiwei.middleware.automatic.server.listener.BaseServiceContext;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.text.ParseException;
import java.util.*;
import java.util.stream.Collectors;
/**
* 上传模板 获取转换的数据
*/
@Service
public class MarkCommonTemplate extends FunctionalImpl {
private static final Logger log = LogManager.getLogger(MarkCommonTemplate.class);
protected final DubboHandler dubboHandler;
private final EsDao esDao;
public MarkCommonTemplate(DubboHandler dubboHandler, EsDao esDao) {
this.dubboHandler = dubboHandler;
this.esDao = esDao;
}
/**
* 标注上传数据 转换上传实体
* @param infos 上传数据
* @return 上传实体
*/
public List<MarkUploadResult> dataTransform(List<MarkUploadInfo> infos) {
List<MarkUploadResult> result = new ArrayList<>();
for (MarkUploadInfo info : infos) {
ClassB.TypeB typeB = getInfoTypeB(info);
MarkUploadResult uploadResult = new MarkUploadResult(info);
if (Objects.isNull(typeB)) {
uploadResult.setInfo(GenericAttribute.FORMAT_ERROR_SUFFIX, "解析用户上传c2出错");
result.add(uploadResult);
continue;
}
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(typeB);
uploadResult.setTypeB(typeB);
UploadInfo uploadInfo = setTransformInfo(dataUploadService, uploadResult, info);
if (Objects.nonNull(uploadInfo)) {
uploadResult.setCommonDO(uploadInfo.getCompound().getDw(), uploadInfo.getCompound().getMark());
}
result.add(uploadResult);
}
return result;
}
/**
* 批量文本搜索
* @param template 批量操作模板
*/
public void textSearch(BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
if (template.getSource().isEmpty()) {
return;
}
try {
template.bulkQuery(this::urlSearchQuery, this::getDwIndex, this::getTypeB, this::rowException);
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), template.getTypeSet()), template.getQueryBuilder());
Map<String, List<SearchHit>> urlGroup = Arrays.stream(searchHits.getHits()).collect(Collectors.groupingBy(hit -> {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String key = Objects.nonNull(sourceAsMap.get("answer_url")) ? String.valueOf(sourceAsMap.get("answer_url"))
: Objects.nonNull(sourceAsMap.get("question_url")) ? String.valueOf(sourceAsMap.get("question_url")) : String.valueOf(sourceAsMap.get("url"));
return Tools.urlReplace(key);
}, Collectors.toList()));
template.searchCallback(urlGroup, rule, this::getTextSearchRowKey, this::searchHitMerge, this::rowException);
} catch (IOException e) {
log.error("标注上传-批量url查询出错:", e);
}
}
/**
* 数据类型设置
* @param results 上传实体
*/
public void uploadType(List<MarkUploadResult> results) {
for (MarkUploadResult result : results) {
if (!result.isSuccess()) {
continue;
}
//判重
try {
result.setDataType(getDataType(result.getMark(), result.getDw()));
} catch (Exception e) {
log.error("数据url:{},判重失败:", result.getOriginData().getUrl(), e);
result.setInfo(GenericAttribute.FIELD_ERROR_SUFFIX, "判重失败:" + e.getMessage());
}
}
}
/**
* 二次搜索
* @param results 上传实体
* @param bulkTemplate 批量操作模板
*/
public void secondarySearch(List<MarkUploadResult> results, BulkTemplate<MarkUploadResult> bulkTemplate, MarkUploadRule rule) {
try {
Map<UploadInfo.DataType, List<MarkUploadResult>> resGroup = results.stream().collect(Collectors.groupingBy(MarkUploadResult::getDataType, Collectors.toList()));
Set<UploadInfo.DataType> dataTypes = resGroup.keySet();
for (UploadInfo.DataType dataType : dataTypes) {
switch (dataType) {
case MARK:
markHandle(resGroup.get(dataType), bulkTemplate, rule);
break;
case DW:
dwHandle(resGroup.get(dataType), bulkTemplate, rule);
break;
case EXTERNAL:
externalHandle(resGroup.get(dataType));
break;
default:break;
}
}
} catch (Exception e) {
log.error("标注上传-批量二次搜索查询出错:", e);
}
}
private UploadInfo setTransformInfo(BaseDataUploadService dataUploadService , MarkUploadResult result, MarkUploadInfo info) {
try {
UploadInfo uploadInfo = dataUploadService.parseMarkUploadInfo2UploadInfo(info);
// 表示不需要往后继续清洗
result.setSuccess(true);
return uploadInfo;
} catch (FieldErrorException e) {
// 设置为格式解析错误
result.setInfo(GenericAttribute.FORMAT_ERROR_SUFFIX, e.getMessage());
} catch (Exception e) {
//设置为字段错误
result.setInfo(GenericAttribute.FIELD_ERROR_SUFFIX, e.getMessage());
}
log.error("上传数据转标注数据失败,数据url:{}, 错误信息:{}, {}", result.getOriginData().getUrl(), result.getInfoType(), result.getMessage());
return null;
}
private void dwHandle(List<MarkUploadResult> res, BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
template.clean(res, "大库url查询");
if (template.getSource().isEmpty()) {
return;
}
try {
template.bulkQuery(this::urlSearchQuery, this::getDwIndex, this::getTypeB, this::rowException);
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), template.getTypeSet()), template.getQueryBuilder());
Map<String, List<SearchHit>> urlGroup = Arrays.stream(searchHits.getHits()).collect(Collectors.groupingBy(hit -> {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String key = Objects.nonNull(sourceAsMap.get("answer_url")) ? String.valueOf(sourceAsMap.get("answer_url"))
: Objects.nonNull(sourceAsMap.get("question_url")) ? String.valueOf(sourceAsMap.get("question_url")) : String.valueOf(sourceAsMap.get("url"));
return Tools.urlReplace(key);
}, Collectors.toList()));
template.searchCallback(urlGroup, rule, this::getTextSearchRowKey, this::dwSearchHitMerge, this::rowException);
} catch (IOException e) {
log.error("标注上传-批量url查询出错:", e);
}
}
private void markHandle(List<MarkUploadResult> res, BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
template.clean(res, "标注库url查询");
if (template.getSource().isEmpty()) {
return;
}
template.bulkQuery(this::textSearchQuery, this::getMarkIndex, null, this::rowException);
try {
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), null), template.getQueryBuilder());
Map<String, List<SearchHit>> hitGroup = new HashMap<>();
for (SearchHit hit : searchHits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String group = String.valueOf(sourceAsMap.get("mgroup"));
Object url = sourceAsMap.get("url");
Object questionUrl = sourceAsMap.get("question_url");
Object answerUrl = sourceAsMap.get("answer_url");
if (Objects.nonNull(url)) {
String key = group + Tools.urlReplace(String.valueOf(url));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
if (Objects.nonNull(questionUrl)) {
String key = group + Tools.urlReplace(String.valueOf(questionUrl));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
if (Objects.nonNull(answerUrl)) {
String key = group + Tools.urlReplace(String.valueOf(answerUrl));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
}
template.searchCallback(hitGroup, rule, this::markHandleRowKey, this::markSearchHitMerge, this::rowException);
} catch (IOException e) {
log.error("大库文本搜索失败:", e);
}
}
private void externalHandle(List<MarkUploadResult> res) {
for (MarkUploadResult result : res) {
try {
// 补全C1-C5类型字段
CommonDO wholeMark = DataUploadUtil.defaultCTypeAll(result.getMark(), result.getOriginData());
result.setMark(wholeMark);
} catch (Exception e) {
log.error("externalHandle-", e);
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "externalHandle处理异常");
}
}
}
/**
* 获取消息类型
*
* @param info 标注上传实体
* @return TypeB
*/
private ClassB.TypeB getInfoTypeB(MarkUploadInfo info) {
Integer c2 = info.getC2();
try {
return null == c2 || 0 == c2 ? selfAdaptionTypeB(info.getPlatform(), info.getClientFrom(), info.getTime(), info.getUrl())
: ClassB.TypeB.fromEncode(c2);
} catch (Exception e) {
log.error(e);
return null;
}
}
/**
* 获取typeB
* @param platform 平台
* @param source 搜索
* @param timeStr 时间
* @param url url
* @return typeB
* @throws ParseException 时间格式转换异常
* @throws IOException es查询异常
*/
private ClassB.TypeB selfAdaptionTypeB(String platform, String source, String timeStr, String url) throws ParseException, IOException {
if (StringUtils.isBlank(platform) || "未知".equals(platform)) {
SearchHits search = esDao.search(TimeUtil.getWholeIndexInMonth(TimeUtil.TIME_FORMAT.parse(timeStr).getTime()),
QueryBuilders.termQuery(GenericAttribute.ES_URL, url), null, null, 0, 1, null);
if (0 != search.getTotalHits().value) {
return ClassB.TypeB.fromEncode(Integer.parseInt(search.getAt(0).getSourceAsMap().get("c2") + ""));
}
return null;
}
return ClassCodec.decodeClassB(DataUploadUtil.getEndoceByPlatformAndSource(platform, source)).typeB();
}
/**
* 获取数据源类型
* @param markDO 标注实体
* @param dwDo 大库实体
* @return dataType
*/
private UploadInfo.DataType getDataType(CommonDO markDO, CommonDO dwDo) {
UploadInfo.DataType dataType = UploadInfo.DataType.EXTERNAL;
// 标注库是否存在
if (Objects.nonNull(markDO) && dubboHandler.contains(markDO.filterInfo())) {
dataType = UploadInfo.DataType.MARK;
// 舆情库是否存在
} else if (Objects.nonNull(dwDo) && dubboHandler.contains(dwDo.filterInfo())) {
dataType = UploadInfo.DataType.DW;
}
return dataType;
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class CompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(CompleteTextServiceImpl.class);
private final EsDao esDao;
public CompleteTextServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.COMPLETE, CompleteText.class, CompleteTextMark.class, dubboHandler);
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
CompleteText dw = (CompleteText) info.getDw();
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
try {
CompleteText text = CompleteText.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
} catch (Exception e) {
log.info("debug-esMap:{}", JSONObject.toJSONString(map));
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
CompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), CompleteTextMark.class);
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 重置userId
mark.setUserId(info.getUid());
try {
String[] mupdates = getDubboHandler().getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
CompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), CompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
CompleteTextMark mark = (CompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((CompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), CompleteTextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.mark.IncompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.middleware.automatic.server.util.WeiboMidUrlDealUtil;
import io.micrometer.core.instrument.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.util.Objects;
@Service
public class IncompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(IncompleteTextServiceImpl.class);
private final DubboHandler dubboHandler;
public IncompleteTextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.INCOMPLETE, IncompleteText.class, IncompleteTextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
if (null == info.getMgroup()) {
// TODO 测试打印
log.info("出现mgroup为空数据,data:{}", JSONObject.toJSONString(info));
}
IncompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info),
IncompleteTextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 文本为空-选用标题数据
if (StringUtils.isEmpty(mark.getContent())) {
mark.setContent(info.getTitle());
}
// 粉丝数
if (null != info.getFans()) {
mark.setFollowersNum(Integer.valueOf(info.getFans()));
}
// 还原认证类型
String vtype = info.getAuthenticationType();
// 微博必须要有vtype
if (null != vtype) {
mark.setVtype(restoreVtype(vtype));
}
// 是否原发(默认值:原创)
if (StringUtils.isEmpty(info.getPrimary())) {
mark.setIsForward(false);
} else {
mark.setIsForward(!"原创".equals(info.getPrimary()));
}
// source也为screenName
mark.setScreenName(info.getSource());
// rootSource意为rootScreenName
mark.setRootScreenName(info.getRootSource());
if ("微博".equals(info.getPlatform())) {
// 去重信息需要携带c4
mark.setC4(1020);
if (null == mark.getMid()) {
String mid = WeiboMidUrlDealUtil.urlToMid(mark.getUrl());
if (null == mid) {
throw new FieldErrorException("转换mid出错");
} else {
mark.setMid(mid);
}
}
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
IncompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), IncompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (Objects.nonNull(result.getOriginData().getMid())) {
boolQueryBuilder.should(QueryBuilders.termQuery(GenericAttribute.ES_MID, result.getOriginData().getMid()));
}
return boolQueryBuilder.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
IncompleteTextMark mark = (IncompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
if (Objects.nonNull(mark.getMid())) {
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_MID, mark.getMid()));
}
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((IncompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), IncompleteTextMark.class));
}
/**
*
* 根据微博规则还原认证类型
*
* @param vtypeStr
*
* @return int
*/
private int restoreVtype(String vtypeStr) {
if (null == vtypeStr) {
throw new IllegalArgumentException("微博必须要有vtype!!!");
}
switch (vtypeStr) {
case "未知":
return -2;
case "普通用户":
return -1;
case "名人":
return 0;
case "政府":
return 1;
case "企业":
return 2;
case "媒体":
return 3;
case "校园":
return 4;
case "网站":
return 5;
case "应用":
return 6;
case "团体":
return 7;
case "微博女郎":
return 10;
default:
// 其中 "达人" 对应200和220,返回默认值
// 默认返回-2(未知)
return -2;
}
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.QATextMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
@Service
public class QATextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(QATextServiceImpl.class);
private final DubboHandler dubboHandler;
public QATextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.QA, QAText.class, QATextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
JSONObject json = JSONObject.parseObject(JSONObject.toJSONString(info));
String url = info.getUrl();
String title = info.getTitle();
String content = info.getContent();
String source = info.getSource();
Long time = TimeUtil.TIME_FORMAT.parse(info.getTime()).getTime();
// 论坛数据以questionUrl title为准
json.put("questionTitle", title);
json.put("questionUrl", url);
// 做简单问答判断
if ("www.zhihu.com".equals(Tools.getHost(url)) && !url.contains("answer")) {
json.put("questionTime", time);
json.put("questionUsername", source);
json.put("questionContent", content);
} else {
json.put("answerTime", time);
json.put("answerUrl", url);
json.put("answerUsername",source);
json.put("answerContent", content);
}
QATextMark mark = JSONObject.parseObject(json.toJSONString(), QATextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
QAText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), QAText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder should = QueryBuilders.boolQuery().should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_ANSWER_URL));
return QueryBuilders.boolQuery().must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, result.getOriginData().getSource())).must(should);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
QATextMark mark = (QATextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
BoolQueryBuilder urlQuery = QueryBuilders.boolQuery();
BoolQueryBuilder qaUrl = QueryBuilders.boolQuery()
.must(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.must(QueryBuilders.existsQuery(GenericAttribute.ES_QA_ANSWER_URL));
urlQuery.should(qaUrl);
urlQuery.should(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_ANSWER_URL));
bool.must(urlQuery);
result.setKey(mark.getMgroup() + Tools.urlReplace(result.getOriginData().getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((QATextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), QATextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.VideoMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class VideoServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(VideoServiceImpl.class);
private final DubboHandler dubboHandler;
private final EsDao esDao;
public VideoServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.VIDEO, Video.class, VideoMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
Video dw = (Video) info.getDw();
// 查询条件
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
Video text = Video.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
VideoMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), VideoMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
VideoMark dw = JSONObject.parseObject(mark.toJSON().toJSONString(), VideoMark.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return QueryBuilders.boolQuery().must(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
VideoMark mark = (VideoMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), this.getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((VideoMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), VideoMark.class));
}
}
package com.zhiwei.middleware.automatic.server.config;
import com.zhiwei.es.pojo.Address;
import com.zhiwei.es.util.IndexUtil;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Configuration
public class EsClientConfig {
private static final Logger log = LogManager.getLogger(EsClientConfig.class);
private static final String COLON = ":";
private static final String COMMA = ",";
private final EsProperties esProperties;
public EsClientConfig(EsProperties esProperties) {
this.esProperties = esProperties;
}
@Bean(name = "esIndexes")
public IndexUtil.ESIndexes esIndexes() {
try {
List<Address> addresses = new ArrayList<>();
Assert.hasText(esProperties.getEsClientAddresses(), "Http Cluster nodes source must not be null or empty!");
String[] nodes = StringUtils.delimitedListToStringArray(esProperties.getEsClientAddresses(), COMMA);
Arrays.stream(nodes).forEach(node -> {
String[] segments = StringUtils.delimitedListToStringArray(node, COLON);
Assert.isTrue(segments.length == 2 || segments.length == 4,
() -> String.format("Invalid cluster node %s in %s! Must be in the format host:port or " +
"host:port:username:password!", node, esProperties.getEsClientAddresses()));
String host = segments[0].trim();
String port = segments[1].trim();
Assert.hasText(host, () -> String.format("No host name given cluster node %s!", node));
Assert.hasText(port, () -> String.format("No port given in cluster node %s!", node));
if (segments.length == 2) {
addresses.add(new Address(host, Integer.parseInt(port)));
} else {
String username = segments[2].trim();
String password = segments[3].trim();
Assert.hasText(username, () -> String.format("No username given cluster node %s!", node));
Assert.hasText(password, () -> String.format("No password given in cluster node %s!", node));
addresses.add(new Address(host, Integer.parseInt(port), username, password));
}
});
return IndexUtil.create(addresses);
} catch (Exception e) {
log.error("esIndexes初始化异常", e);
return null;
}
}
@Bean("restHighLevelClient")
public RestHighLevelClient restHighLevelClient() {
return buildRestHighLevelClient(esProperties.getClusterNodes(), esProperties.getUsername(),
esProperties.getPassword());
}
private RestHighLevelClient buildRestHighLevelClient(String clusterNodes, String esUsername, String esPassword) {
List<HttpHost> httpHostList = new ArrayList<>();
try {
Assert.hasText(clusterNodes, "Cluster nodes source must not be null or empty!");
String[] nodes = StringUtils.delimitedListToStringArray(clusterNodes, COMMA);
Arrays.stream(nodes).forEach(node -> {
String[] segments = StringUtils.delimitedListToStringArray(node, COLON);
Assert.isTrue(segments.length == 2,
() -> String.format("Invalid cluster node %s in %s! Must be in the format host:port!", node,
clusterNodes));
String host = segments[0].trim();
String port = segments[1].trim();
Assert.hasText(host, () -> String.format("No host name given cluster node %s!", node));
Assert.hasText(port, () -> String.format("No port given in cluster node %s!", node));
httpHostList.add(new HttpHost(host, Integer.parseInt(port)));
});
HttpHost[] httpHosts = httpHostList.toArray(new HttpHost[httpHostList.size()]);
//判断,如果未配置用户名,则进行无用户名密码连接,配置了用户名,则进行用户名密码连接
if (StringUtils.isEmpty(esUsername)) {
RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(httpHosts));
return client;
} else {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY,
//es账号密码
new UsernamePasswordCredentials(esUsername, esPassword));
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(httpHosts)
.setHttpClientConfigCallback((httpClientBuilder) -> {
//这里可以设置一些参数,比如cookie存储、代理等等
httpClientBuilder.disableAuthCaching();
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
})
);
return client;
}
} catch (Exception e) {
log.error("es client初始化异常", e);
}
return null;
}
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
/**
* @Description:
* @Author: shentao
* @Date: 2020/4/27 13:47
*/
@Component
@ConfigurationProperties(prefix = "es")
public class EsProperties {
/**
* esClientAddresses
*/
private String esClientAddresses;
/**
* httpClusterNodes
*/
private String httpClusterNodes;
/**
* clusterName 集群名
*/
private String clusterName;
/**
* 集群节点s
*/
private String clusterNodes;
private String username;
private String password;
public String getEsClientAddresses() {
return esClientAddresses;
}
public void setEsClientAddresses(String esClientAddresses) {
this.esClientAddresses = esClientAddresses;
}
public String getHttpClusterNodes() {
return httpClusterNodes;
}
public void setHttpClusterNodes(String httpClusterNodes) {
this.httpClusterNodes = httpClusterNodes;
}
public String getClusterName() {
return clusterName;
}
public void setClusterName(String clusterName) {
this.clusterName = clusterName;
}
public String getClusterNodes() {
return clusterNodes;
}
public void setClusterNodes(String clusterNodes) {
this.clusterNodes = clusterNodes;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
}
package com.zhiwei.middleware.automatic.server.config;
public class GenericAttribute {
public static final boolean IS_TEST = false;
public static final String UNIFIED_PREFIX = "dataUpload";
public static final String SOURCE_DATA = "sourceData";
public static final String FORMAT_ERROR_SUFFIX = "formatError";
public static final String FIELD_ERROR_SUFFIX = "fieldError";
public static final String SYSTEM_ERROR_SUFFIX = "fieldError";
public static final String SUCCESS_SUFFIX = "successed";
public static final String FAILED_SUFFIX = "failed";
public static final String STATUS_SUFFIX = "status";
public static final String REDIS_PREFIX = "DATA-COLLECTION";
public static final String SOURCE = "SOURCE";
public static final String STATUS = "STATUS";
public static final String NOISE = "NOISE";
public static final String MAP_SET = "|MAP_SET";
public static final String KEY_SET = "|KEY_SET";
public static final String NOISE_SET = "|NOISE_SET";
public static final String HIT_WORD_RATE = "hitWordAndRate";
public static final double SIMILAR_STANDARD_NOISE = 0.8;
public static final String KEY_INCREMENT = "increment";
public static final String REDIS_QUEUE_ONE_KEY = "autoDataOneQueue";
public static final String REDIS_QUEUE_MULTI_KEY = "autoDataMultiQueue";
public static final String REDIS_MAP_KEY = "autoDataMap";
public static final int REDIS_QUEUE_LIMIT = 1000;
public static final double SIMILAR_STANDARD = 0.7;
public static final String SON_ID = "sonId";
/**
* 修改模板标签最大处理数据的数量
*/
public static final int POINT_SIZE = 100;
public static final String AUTO_PERSON = "自动化机器人";
public static final long AUTO_CID = 100040002;
public static final String LOCK_TEMPLATE_HOUR = "lock:template:hour";
public static final String LOCK_TEMPLATE_DAY = "lock:template:day";
public static final String LOCK_TEMPLATE_NUMBER = "lock:template:number";
public static final String ES_C_TIME = "ctime";
public static final String ES_M_TIME = "mtime";
public static final String ES_CID = "cid";
public static final long ES_CID_DEFAULT = 100040002L;
public static final String ES_C_NAME = "cname";
public static final String AUTO_CNAME = "上传标注补充采集";
public static final String ES_M_GROUP = "mgroup";
public static final String ES_M_PERSON = "mperson";
public static final String ES_M_TAG = "mtag";
public static final String ES_URL = "url";
public static final String ES_MID = "mid";
public static final String ES_QA_QUESTION_URL = "question_url";
public static final String ES_QA_ANSWER_URL = "answer_url";
public static final String ES_SOURCE = "source";
public static final String ES_TITLE = "title";
public static final String ES_CONTENT = "content";
}
package com.zhiwei.middleware.automatic.server.config;
import java.util.List;
import java.util.Set;
public class GlobalPojo {
public static Set<String> BRAND_WORDS;
public static List<String> ALL_GROUP;
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "middleware")
public class MiddlewareProperties {
private String zookeeperAddress;
private String appName;
private String markGroup;
private String filterGroup;
public String getZookeeperAddress() {
return zookeeperAddress;
}
public void setZookeeperAddress(String zookeeperAddress) {
this.zookeeperAddress = zookeeperAddress;
}
public String getAppName() {
return appName;
}
public void setAppName(String appName) {
this.appName = appName;
}
public String getMarkGroup() {
return markGroup;
}
public void setMarkGroup(String markGroup) {
this.markGroup = markGroup;
}
public String getFilterGroup() {
return filterGroup;
}
public void setFilterGroup(String filterGroup) {
this.filterGroup = filterGroup;
}
}
package com.zhiwei.middleware.automatic.server.config;
import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.data.mongodb.MongoDatabaseFactory;
import org.springframework.data.mongodb.SpringDataMongoDB;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.SimpleMongoClientDatabaseFactory;
import org.springframework.data.mongodb.core.convert.DbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultMongoTypeMapper;
import org.springframework.data.mongodb.core.convert.MappingMongoConverter;
import org.springframework.data.mongodb.core.mapping.MongoMappingContext;
import java.util.concurrent.TimeUnit;
/**
* @ClassName
* @Description TODO
* @Author ${"liu-yu"}
* @Date 2022/12/21 18:01
**/
@Configuration
public class MongoConfig {
@Value("${mongo.connectTimeout}")
private int connectTimeout;
@Value("${mongo.maxWaitTime}")
private int maxWaitTime;
@Value("${mongo.dataBaseMarker}")
private String dataBaseMarker;
@Value("${primary.uri.marker}")
private String uriMarker;
@Value("${primary.uri.hangzhou}")
private String uriHangZhou;
@Value("${mongo.hangzhouMarker}")
private String dataBaseHangZhou;
private MongoDatabaseFactory mongoDbMarkerFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriMarker));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseMarker);
}
@Primary
@Bean(name = "markerMongoTemplate")
public MongoTemplate getMongoTemplateMarker() {
MongoDatabaseFactory mongoDbFactory = mongoDbMarkerFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbMarkerFactory(), converter);
}
private MongoDatabaseFactory mongoDbHangZhouFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriHangZhou));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseHangZhou);
}
@Bean(name = "hangzhouMongoTemplate")
public MongoTemplate getMongoTemplateHangZhou() {
MongoDatabaseFactory mongoDbFactory = mongoDbHangZhouFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbHangZhouFactory(), converter);
}
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.concurrent.ThreadPoolExecutor;
@Component
public class TaskPoolConfig {
@Bean("autMarkExecutor")
public ThreadPoolTaskExecutor autMarkExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("autoMark-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("asyncExecutor")
public ThreadPoolTaskExecutor asyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(4);
// 配置最大线程数
executor.setMaxPoolSize(5);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("async-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("aggreeNoiseExecutor")
public ThreadPoolTaskExecutor aggreeNoiseExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(32);
// 配置最大线程数
executor.setMaxPoolSize(64);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-noise-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("aggreeExecutor")
public ThreadPoolTaskExecutor aggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeEasyExecutor")
public ThreadPoolTaskExecutor eventAggreeEasyExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(6);
// 配置最大线程数
executor.setMaxPoolSize(8);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-easy-aggree-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeExecutor")
public ThreadPoolTaskExecutor eventAggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(60);
// 配置最大线程数
executor.setMaxPoolSize(100);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
}
package com.zhiwei.middleware.automatic.server.dao;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import java.io.IOException;
import java.util.List;
public interface EsDao {
/**
* es查询
* @param indexes 索引
* @param postFilter 过滤条件
* @param query 查询条件
* @param sort 排序
* @param from 起始下标
* @param size 返回数量
* @param highlighter 高亮词
* @return es数据
* @throws IOException io
*/
SearchHits search(String[] indexes, QueryBuilder postFilter, QueryBuilder query, FieldSortBuilder sort, int from, int size, HighlightBuilder highlighter) throws IOException;
/**
* 深度分页
* @param indexes 索引
* @param size 返回数量
* @return es数据
* @throws IOException io
*/
List<SearchHit> afterSearch(String [] indexes, SearchSourceBuilder searchSourceBuilder, int size) throws IOException;
SearchHits searchHitsByQuery(String[] indexes, BoolQueryBuilder bool) throws IOException;
}
package com.zhiwei.middleware.automatic.server.dao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.data.mongodb.core.query.Query;
import java.util.List;
public interface TemplateRecordDao {
/**
* 根据条件查询模板记录集
* @param query 条件
* @return 记录集
*/
List<TemplateRecord> findTemplateRecord (Query query);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 查询模板记录数量
* @param query 条件
* @return 声量
*/
long count(Query query);
/**
* 根据插件删除模板记录
* @param query 条件
*/
void removeTemplateRecord (Query query);
}
package com.zhiwei.middleware.automatic.server.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@Component
public class EsDaoImpl implements EsDao {
private final RestHighLevelClient esClient;
public EsDaoImpl(RestHighLevelClient esClient) {
this.esClient = esClient;
}
@Override
public SearchHits search(String[] indexes, QueryBuilder postFilter, QueryBuilder query, FieldSortBuilder sort, int from, int size, HighlightBuilder highlighter) throws IOException {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//检索的索引库
searchRequest.indices(indexes);
//过滤条件
searchSourceBuilder.postFilter(postFilter);
//查询条件
searchSourceBuilder.query(query);
//排序
if(null != sort){
searchSourceBuilder.sort(sort);
}
if(from >= 0){
searchSourceBuilder.from(from);
}
if (size >= 0) {
searchSourceBuilder.size(size);
}
if(highlighter != null){
searchSourceBuilder.highlighter(highlighter);
}
searchRequest.source(searchSourceBuilder);
SearchResponse response = esClient.search(searchRequest, RequestOptions.DEFAULT);
return response.getHits();
}
@Override
public List<SearchHit> afterSearch(String [] indexes, SearchSourceBuilder builder, int size) throws IOException {
List<SearchHit> searchHits = new ArrayList<>();
Object [] objects = new Object[]{};
SearchRequest request = new SearchRequest();
request.indices(indexes);
if (size >= 0) {
builder.size(size);
}
builder.sort("_id", SortOrder.ASC);
String scrollId = "";
while (null != scrollId) {
if (objects.length > 0) {
builder.searchAfter(objects);
}
request.source(builder);
SearchResponse response = esClient.search(request, RequestOptions.DEFAULT);
if (1 > response.getHits().getHits().length) {
scrollId = null;
} else {
for (SearchHit hit : response.getHits()) {
searchHits.add(hit);
}
SearchHit[] hits = response.getHits().getHits();
objects = hits[hits.length-1].getSortValues();
}
}
return searchHits;
}
@Override
public SearchHits searchHitsByQuery(String[] indexes, BoolQueryBuilder bool) throws IOException {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchRequest.indices(indexes);
searchSourceBuilder.size(5000);
searchSourceBuilder.query(bool);
searchRequest.source(searchSourceBuilder);
return esClient.search(searchRequest, RequestOptions.DEFAULT).getHits();
}
}
package com.zhiwei.middleware.automatic.server.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
public class TemplateRecordDaoImpl implements TemplateRecordDao {
private final MongoTemplate mongoTemplate;
public TemplateRecordDaoImpl(@Qualifier("markerMongoTemplate") MongoTemplate mongoTemplate) {
this.mongoTemplate = mongoTemplate;
}
@Override
public List<TemplateRecord> findTemplateRecord(Query query) {
return mongoTemplate.find(query, TemplateRecord.class);
}
@Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
mongoTemplate.insert(templateRecord);
}
@Override
public long count(Query query) {
return mongoTemplate.count(query, TemplateRecord.class);
}
@Override
public void removeTemplateRecord(Query query) {
mongoTemplate.remove(query, TemplateRecord.class);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.handle;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.config.MiddlewareProperties;
import com.zhiwei.middleware.automatic.server.util.DataCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.EventCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.cleaner.filter.UnifiedFilterClient;
import com.zhiwei.middleware.mark.service.MarkerClient;
import com.zhiwei.middleware.mark.vo.QueryResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
@Component
public class DubboHandler {
private static final Logger log = LogManager.getLogger(DubboHandler.class);
private final UnifiedFilterClient unifiedFilterClient;
private final MarkerClient markerClient;
public DubboHandler(MiddlewareProperties properties) {
unifiedFilterClient = UnifiedFilterClient.getClient(properties.getAppName(), properties.getZookeeperAddress(),
properties.getFilterGroup());
markerClient = MarkerClient.getService(properties.getZookeeperAddress(), properties.getMarkGroup(), properties.getAppName());
}
public boolean contains(FilterInfo filterInfo) {
return unifiedFilterClient.contains(filterInfo, true);
}
public String[] getMupdates(FilterInfo filterInfo) {
return unifiedFilterClient.getUpdateInfo(filterInfo);
}
public void markUpsert(List<MarkInfo> collect) {
markerClient.upsert(collect);
}
/**
* 批量查询标签结果
*
* @param list
* @return
*/
public Map<String, QueryResult> matchQueryResult(List<FilterInfo> list) {
return markerClient.matchQueryResult(list);
}
/**
* 数据采集标注接口
*
* @return list(重新计算filterInfo后的数据)
*/
public int dataCollectionUpsert(List<JSONObject> list, String mgroup, String mtag, String mperson) {
// 补充必要字段
DataCollectionUtil.supplementForInsert(list, mgroup, mtag, mperson);
List<MarkInfo> markInfoList = MarkInfoUtil.transformToMarkInfo(list);
markerClient.eventCollectionUpsert(markInfoList);
log.info("数据采集-调用标注中间件插入数据{}条", list.size());
return markInfoList.size();
}
/**
* 事件采集标注清洗接口
*
* @param consumers
* @return list(重新计算filterInfo后的数据)
*/
public List<MarkInfo> eventCollectionUpsertWithSupplement(List<JSONObject> list, String mgroup, String mperson) {
// 补充必要字段
EventCollectionUtil.supplementForInsert(list, mgroup, mperson);
List<MarkInfo> markInfoList = MarkInfoUtil.transformToMarkInfo(list);
markerClient.eventCollectionUpsert(markInfoList);
log.info("调用标注中间件插入数据{}条", list.size());
return markInfoList;
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public interface AutoMaticService {
void autoMark(List<MarkInfo> markInfos);
void autoMarkMulti(List<MarkInfoMulti> markInfoMultis);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 根据模板标题获取数据(仅最新100条)
*
* @param group 项目
* @param templateTitle 模板标题
* @return 特征值
*/
List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/**
* 根据标题和特征值尝试搜索模板标题
*
* @param group 项目
* @param title 标题
* @param mupdate 特征值
* @return 模板标题
*/
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/**
* 根据项目组和标题在线匹配已有聚合标题
*
* @param project 项目
* @param title 标题
* @return 返回值
*/
public Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB;
import java.util.List;
import java.util.Map;
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group 项目
* @param id id
*/
void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group 项目
* @param id id
*/
void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group 项目
* @param id id
* @param compressedList 数据集
*/
void addDataCollection(String group, String id, List<String> compressedList);
/**
* 启动聚合
*
* @param group 项目
* @param id id
*/
void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group 项目
* @param id id
* @param fatherIds 父级id
* @param mtag 标签
* @param mperson 标注人
* @param typeB typeB
* @return 是否成功
*/
boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @return
*/
boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
int getInsertResultNow(String group, String id);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public interface DataUploadService {
/**
* 添加源数据集
*
* @param group 项目
* @param id 任务id
*/
void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*
* @param group 项目
* @param id 任务
* @param mperson 提交人
* @return void
*/
void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType);
/**
* 获取上传状态(进度)
*
* @param group 项目
* @param id 任务id
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group 项目
* @param id 任务id
* @param page page
* @param size size
* @param isAsc 排序
* @param searchField 查询字段
* @param keyword 关键字
* @param uploadType 上传类型
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import java.util.Map;
public interface EventCollectionMarkService {
/**
* 增加事件采集聚合源数据集
*
* @param sourceList
* @return
*/
void addEventCollectionAggreSourceList(String group, String id, String sourceStr);
/**
* 清理事件采集聚合结果集
*/
void cleanEventCollectionAggreData(String group, String id);
/**
* 获取事件采集聚合结果
*
* @return
*/
Map<String, Object> getEventCollectionAggreTemplate(String group, String id, int page, int size, boolean isAsc,
int markFlag, String keyword);
/**
* 修改插件聚合模板标题的标签
*
* @param group
* @param id
* @param modifyTag
* @return
*/
boolean modifyEventCollectionAggreTitleMarkTag(String group, String id, String templateTitle, String modifyTag);
/**
* 根据模板标题获取父标题标注信息markTag
*
* @param group
* @param id
* @param templateTitle
* @return
*/
String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle);
/**
* 根据模板标题获取子标题集合
*
* @param group
* @param id
* @param templateTitle
* @return
*/
Map<String, Object> getEventCollectionAggreSubTitle(String group, String id, String templateTitle);
/**
* 启动聚合
*
* @param group
* @param id
*/
public void startAggre(String group, String id);
/**
* 已标注事件采集入库
*
* @param group
* @param id
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum);
/**
* 已标注事件采集入库
*
* @param group
* @param id
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson);
/**
* 清理全部结果(聚合集+噪音集)
*/
void cleanEventCollectionAllData(String group, String id);
/**
* 获取事件采集噪音父标题集
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc,
String keyword);
/**
*
* 获取事件采集噪音子集
*
* @param group
* @param id
* @param templateTitle
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle);
/**
*
* 标注部分是否已入库
*
* @param group
* @param id
* @return
*
* @return boolean
*/
boolean markedHasInserted(String group, String id);
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import org.apache.dubbo.config.annotation.Service;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class AutoMaticServiceImpl implements AutoMaticService {
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
public AutoMaticServiceImpl(RedissonUtil redissonUtil, TemplateTitleService templateTitleService) {
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
}
@Override
public void autoMark(List<MarkInfo> markInfos) {
redissonUtil.putQueue(GenericAttribute.REDIS_QUEUE_ONE_KEY, markInfos.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
}
@Override
public void autoMarkMulti(List<MarkInfoMulti> markInfoMultis) {
redissonUtil.putQueue(GenericAttribute.REDIS_QUEUE_MULTI_KEY, markInfoMultis.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
}
@Override
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
return templateTitleService.modifyTemplateTitle(group, templateTitle, fixTag);
}
@Override
public List<String> getMupdateByTemplateTitle(String group, String templateTitle) {
return templateTitleService.getMupdateByTemplateTitle(group, templateTitle);
}
@Override
public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) {
return templateTitleService.tryGetTemplateTitleByMupdate(group, title, mupdate);
}
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return templateTitleService.compareWithTemplateTileOL(project, title);
}
@Override
public boolean resetTemplate(String group, String templateTitle) {
return templateTitleService.resetTemplate(group, templateTitle);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeCache;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.PageData;
import com.zhiwei.middleware.automatic.server.pojo.Status;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.service.handler.TextHandlerService;
import org.springframework.stereotype.Service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.List;
@Service
public class CommonServiceImpl implements CommonService {
// 每次返回量
private static final int PAGE_SIZE = 5000;
private final TextHandlerService textHandler;
public CommonServiceImpl(TextHandlerService textHandler) {
this.textHandler = textHandler;
}
@Override
public String generateAggreeOrder() {
return textHandler.generateAggreeOrder();
}
@Override
public boolean appendAggreeOrder(String id, List<AggreeDTO> list) {
return textHandler.appendAggreeOrderNew(id, list);
}
@Override
public boolean startAggree(String id) {
return textHandler.startAggree(id);
}
@Override
public boolean startAggree(String id, double limit) {
return textHandler.startAggree(id, limit);
}
@Override
public CommonAggreeResult getAggreeResult(String id) {
return getAggreeResult(id, 0, PAGE_SIZE);
}
@Override
public CommonAggreeResult getAggreeResult(String id, int page, int pageLimit) {
CommonAggreeCache cache = textHandler.getAggreeResult(id);
// 错误状态(页数异常||未查询到对应任务)
if (page <= 0 || null == cache) {
return new CommonAggreeResult(Status.ERROR);
}
// 正在聚合状态
List<ResultInfo> result = cache.getResults();
if (result.isEmpty()) {
return new CommonAggreeResult(Status.RUN);
}
int total = result.size();
int start = pageLimit * (page - 1);
int end = Math.min(start + pageLimit, total);
int totalPage = (total + pageLimit - 1) / pageLimit;
if (start > total) {
// 超出总量范畴
return new CommonAggreeResult(Status.ERROR);
}
PageData<ResultInfo> results = new PageData<>(page, result.size(), totalPage, pageLimit,
result.subList(start, end));
return new CommonAggreeResult(Status.END, results);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import com.zhiwei.middleware.automatic.server.service.impl.DataCollection;
import org.apache.dubbo.config.annotation.Service;
import java.util.List;
import java.util.Map;
@Service
public class DataCollectionServiceImpl implements DataCollectionService {
private final DataCollection dataCollection;
public DataCollectionServiceImpl(DataCollection dataCollection) {
this.dataCollection = dataCollection;
}
@Override
public void cleanCache(String group, String id) {
dataCollection.cleanCache(group, id);
}
@Override
public void cleanCacheExceptNoise(String group, String id) {
dataCollection.cleanCacheExceptNoise(group, id);
}
@Override
public void addDataCollection(String group, String id, List<String> compressedList) {
dataCollection.addDataCollection(group, id, compressedList);
}
@Override
public void startAggree(String group, String id, String highWords) {
dataCollection.startAggree(group, id, highWords);
}
@Override
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.batchModifyFatherTag(group, id, fatherIds, mtag, mperson, typeB);
}
@Override
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
@Override
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.modifySonTag(group, id, fatherId, sonId, mtag, mperson, typeB);
}
@Override
public boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollection.throwIntoNoise(group, id, fatherId, typeB);
}
@Override
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB) {
return dataCollection.batchThrowIntoNoise(group, id, fatherIds, typeB);
}
@Override
public boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollection.restoreFromNoise(group, id, fatherId, typeB);
}
@Override
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollection.getFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
@Override
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollection.getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
@Override
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollection.getNoiseFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
@Override
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollection.getNoiseSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
@Override
public void checkedThenInsert(String group, String id) {
dataCollection.checkedThenInsert(group, id);
}
@Override
public int getAggreResultNow(String group, String id) {
return dataCollection.getAggreResultNow(group, id);
}
@Override
public int getInsertResultNow(String group, String id) {
return dataCollection.getInsertResultNow(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import com.zhiwei.middleware.automatic.server.service.UploadService;
import org.apache.dubbo.config.annotation.Service;
import java.util.Map;
@Service
public class DataUploadServiceImpl implements DataUploadService {
private final UploadService uploadService;
public DataUploadServiceImpl (UploadService uploadService) {
this.uploadService = uploadService;
}
@Override
public void addUploadList(String group, String id, String sourceStr) {
uploadService.addUploadList(group, id, sourceStr);
}
@Override
public void startUpload(String group, String id, String mperson, UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType) {
uploadService.startUpload(new MarkUploadRule(id, group, mperson, mtagType, filterType, projectId, insertType));
}
@Override
public Map<String, Object> getUploadStatus(String group, String id) {
return uploadService.getUploadStatus(group, id);
}
@Override
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc, String searchField, String keyword, UploadInfo.UploadType uploadType) {
return uploadService.getUploadInfoList(group, id, page, size, isAsc, searchField, keyword, uploadType);
}
@Override
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
return uploadService.getDataType(json, typeB);
}
@Override
public void cleanUploadResult(String group, String id) {
uploadService.cleanUploadResult(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.middleware.automatic.server.dubbo.service.EventCollectionMarkService;
import com.zhiwei.middleware.automatic.server.service.impl.EventCollectionMark;
import org.apache.dubbo.config.annotation.Service;
import java.util.Map;
@Service
public class EventCollectionMarkServiceImpl implements EventCollectionMarkService {
private final EventCollectionMark eventCollectionMark;
public EventCollectionMarkServiceImpl(EventCollectionMark eventCollectionMark) {
this.eventCollectionMark = eventCollectionMark;
}
@Override
public void addEventCollectionAggreSourceList(String group, String id, String sourceStr) {
eventCollectionMark.addEventCollectionAggreeSourceList(group, id, sourceStr);
}
@Override
public void cleanEventCollectionAggreData(String group, String id) {
eventCollectionMark.cleanEventCollectionAggreeData(group, id);
}
@Override
public Map<String, Object> getEventCollectionAggreTemplate(String group, String id, int page, int size, boolean isAsc, int markFlag, String keyword) {
return eventCollectionMark.getEventCollectionAggreeTemplate(group, id, page, size, isAsc, markFlag, keyword);
}
@Override
public boolean modifyEventCollectionAggreTitleMarkTag(String group, String id, String templateTitle, String modifyTag) {
return eventCollectionMark.modifyEventCollectionAggreeTitleMarkTag(group, id, templateTitle, modifyTag);
}
@Override
public String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionMarkTagByTemplate(group, id, templateTitle);
}
@Override
public Map<String, Object> getEventCollectionAggreSubTitle(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionAggreeSubTitle(group, id, templateTitle);
}
@Override
public void startAggre(String group, String id) {
eventCollectionMark.startAggree(group, id);
}
@Override
public boolean eventCollectionMarkedInsert(String group, String id, int markSum) {
return eventCollectionMark.eventCollectionMarkedInsert(group, id, markSum);
}
@Override
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson) {
return eventCollectionMark.eventCollectionMarkedInsert(group, id, markSum, mperson);
}
@Override
public void cleanEventCollectionAllData(String group, String id) {
eventCollectionMark.cleanEventCollectionAllData(group, id);
}
@Override
public Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc, String keyword) {
return eventCollectionMark.getEventCollectionNoiseTitles(group, id, page, size, isAsc, keyword);
}
@Override
public Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionNoiseSubTitle(group, id, templateTitle);
}
@Override
public boolean markedHasInserted(String group, String id) {
return eventCollectionMark.markedHasInserted(group, id);
}
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.base.category.ClassB;
@FunctionalInterface
public interface DataClassType<T> {
ClassB.TypeB getClassType(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import org.elasticsearch.search.SearchHit;
import java.util.List;
@FunctionalInterface
public interface DataMerge<T> {
void dataMerge(List<SearchHit> hit, T t, MarkUploadRule rule);
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface EsIndex<T> {
String getIndex(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import org.elasticsearch.index.query.BoolQueryBuilder;
@FunctionalInterface
public interface EsRowQuery<T> {
BoolQueryBuilder rowQuery(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.listener.BaseServiceContext;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
import java.util.List;
import java.util.Objects;
public class FunctionalImpl {
private static final Logger log = LogManager.getLogger(FunctionalImpl.class);
public void rowException(MarkUploadResult result, String stage, String message) {
if (Objects.nonNull(result)) {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, stage + ":" + message);
}
}
/**
* 标注上传:url查询条件
* @param result 上传DTO
* @return BoolQueryBuilder
*/
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return BaseServiceContext.getInstance().getDataUploadService(result.getTypeB()).urlSearchQuery(result);
}
/**
* 标注上传:文本查询条件
* @param result 上传DTO
* @return BoolQueryBuilder
*/
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
return BaseServiceContext.getInstance().getDataUploadService(result.getTypeB()).textSearchQuery(result);
}
/**
* 获取大库es index
* @param result 上传DTO
* @return es index
*/
public String getDwIndex(MarkUploadResult result) {
return Objects.nonNull(result.getDw()) ? TimeUtil.getDwIndex(result.getDw().getTime()) : TimeUtil.getDwIndex(result.getOriginData().getTime());
}
/**
* 获取标注库es index
* @param result 上传DTO
* @return es index
*/
public String getMarkIndex(MarkUploadResult result) {
return Objects.nonNull(result.getDw()) ? TimeUtil.getMarkIndex(result.getDw().getTime()) : TimeUtil.getMarkIndex(result.getOriginData().getTime());
}
/**
* 获取TypeB
* @param result 上传DTO
* @return TypeB
*/
public ClassB.TypeB getTypeB(MarkUploadResult result) {
return result.getTypeB();
}
/**
* 获取文本搜索key
* @param result 上传DTO
* @return key
*/
public String getTextSearchRowKey(MarkUploadResult result) {
return Tools.urlReplace(result.getOriginData().getUrl());
}
/**
* 获取标注数据源key
* @param result 上传DTO
* @return key
*/
public String markHandleRowKey(MarkUploadResult result) {
return result.getKey();
}
/**
* url搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void searchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
if (Objects.nonNull(hits)) {
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = dataUploadService.getCommonDOBySearchHit(hits.get(0));
DataUploadCommon dataUploadCommon = (DataUploadCommon) dataUploadService;
result.setCommonDO(commonDO, dataUploadCommon.convert2Mark(commonDO, rule.getGroup()));
result.setSearch(true);
} else {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "格式转换失败且大库中不存在该数据");
}
}
/**
* 大库数据源搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void dwSearchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
boolean isNull = Objects.isNull(hits);
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = result.isSearch() ? result.getMark() : (isNull) ? dataUploadService.searchDwByContentNew(result) : dataUploadService.getCommonDOBySearchHit(hits.get(0));
if (null == commonDO) {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "数据类型:【dw-content数据】;上传结果:【失败】,二次文本搜索任未搜索到数据");
return;
}
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup()));
}
/**
* 标注数据源搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void markSearchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
if (Objects.nonNull(hits)) {
try {
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = dataUploadService.getCommonDOBySearchHit(hits.get(0));
switch (rule.getMtagType()) {
case INDEX:
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup()));
case UPDATE:
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup(), commonDO.toJSON().get("mtag") + ""));
}
} catch (Exception e) {
log.error("UploadShell-标注库数据源处理失败:", e);
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "markHandle处理异常");
}
} else {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "数据类型:【标注数据】;上传结果:【失败】,原因:标注库未找到对应数据");
}
}
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface RowKey<T> {
String getRowKey(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface UploadRowException<T> {
void rowException(T t, String state, String message);
}
package com.zhiwei.middleware.automatic.server.listener;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
public class ApplicationContextHolder implements ApplicationContextAware {
private static ApplicationContext context;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
context = applicationContext;
}
public static ApplicationContext getInstance() {
return context;
}
}
package com.zhiwei.middleware.automatic.server.listener;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import org.springframework.context.ApplicationContext;
import java.util.HashMap;
import java.util.Map;
/**
* 基础服务单例类
*/
public class BaseServiceContext {
private final Map<TypeB, BaseDataUploadService> handlerMap = new HashMap<>();
public BaseServiceContext() {
ApplicationContext applicationContext = ApplicationContextHolder.getInstance();
Map<String, BaseDataUploadService> beansOfType = applicationContext.getBeansOfType(BaseDataUploadService.class);
beansOfType.forEach((key, value) -> handlerMap.put(value.getTypeB(), value));
}
public BaseDataUploadService getDataUploadService(TypeB typeB) {
return handlerMap.get(typeB);
}
public static BaseServiceContext getInstance() {
return BaseServiceContextHolder.BASE_SERVICE;
}
private static class BaseServiceContextHolder {
private static final BaseServiceContext BASE_SERVICE = new BaseServiceContext();
}
}
package com.zhiwei.middleware.automatic.server.mission;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.qbjc.bean.pojo.common.Project;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.stream.Collectors;
@Component
public class AsyncTask {
private final MongoTemplate hangZhouMongo;
private final AutoService autoService;
public AsyncTask(@Qualifier("hangzhouMongoTemplate") MongoTemplate hangZhouMongo,
AutoService autoService) {
this.hangZhouMongo = hangZhouMongo;
this.autoService = autoService;
}
public List<String> findAllGroup() {
return hangZhouMongo.findAll(Project.class).stream().map(Project::getProjectName).collect(Collectors.toList());
}
public void queueDataPull() {
autoService.asyncAutoMark();
}
}
package com.zhiwei.middleware.automatic.server.mission;
import com.zhiwei.middleware.automatic.server.config.GlobalPojo;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.stereotype.Component;
/**
* @ClassName
* @Description 初始化任务
* @Author ${"liu-yu"}
* @Date 2022/4/2 11:47
**/
@Component
public class InitTask {
private final static Logger log = LogManager.getLogger(InitTask.class);
private final AsyncTask asyncTask;
public InitTask(AsyncTask asyncTask) {
this.asyncTask = asyncTask;
}
public void init () {
GlobalPojo.ALL_GROUP = asyncTask.findAllGroup();
}
}
package com.zhiwei.middleware.automatic.server.mission;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.Calendar;
import java.util.Date;
import java.util.concurrent.TimeUnit;
@Component
@EnableScheduling
public class ScheduledMission {
private final Logger log = LogManager.getLogger(ScheduledMission.class);
private final AsyncTask asyncTask;
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
public ScheduledMission(AsyncTask asyncTask, RedissonUtil redissonUtil,
TemplateTitleService templateTitleService) {
this.asyncTask = asyncTask;
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
}
// @Scheduled(cron = "10/10 * * * * ? ")
// @Async("asyncExecutor")
public void queueDataPull() {
try {
asyncTask.queueDataPull();
} catch (Exception e) {
log.error("定时拉取自动标注队列出错:", e);
}
}
// @Scheduled(cron = "0 0/5 * * * ?")
// @Async("asyncExecutor")
public void templateHourSync() {
try {
if (redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_HOUR, 0, 1, TimeUnit.MINUTES)) {
Calendar calendar = Calendar.getInstance();
calendar.add(Calendar.HOUR_OF_DAY, -7);
long startTime = calendar.getTime().getTime();
Calendar calendarEndTime = Calendar.getInstance();
calendarEndTime.add(Calendar.MINUTE, -5);
long endTime = calendarEndTime.getTime().getTime();
templateTitleService.schedulerHourAggregation(asyncTask.findAllGroup(), startTime, endTime);
//释放锁
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_HOUR);
}
} catch (Exception e) {
log.error("十分钟定时同步模板失败:", e);
}
}
// @Scheduled(cron = "0 10 4 * * ?")
// @Async("autMarkExecutor")
public void templateDaySync() {
try {
if (redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_DAY, 0, 1, TimeUnit.MINUTES)) {
Calendar calendar = Calendar.getInstance();
// 聚合1天,文章时间和标注时间都在1天内
calendar.add(Calendar.DAY_OF_MONTH, -1);
long startTime = calendar.getTime().getTime();
templateTitleService.schedulerHourAggregation(asyncTask.findAllGroup(), startTime, System.currentTimeMillis());
//释放锁
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_DAY);
}
} catch (Exception e) {
log.error("每天定时同步模板失败:", e);
}
}
}
package com.zhiwei.middleware.automatic.server.mission;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
/**
* @ClassName
* @Description TODO
* @Author ${"liu-yu"}
* @Date 2022/4/2 11:48
**/
@Component
public class StartTask {
private final InitTask initTask;
public StartTask(InitTask initTask) {
this.initTask = initTask;
}
@PostConstruct
public void start() {
initTask.init();
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
public class AggreInfo implements Serializable {
private static final long serialVersionUID = 4901060154053874112L;
/**
* 聚合是否完成
*/
Boolean aggreFinshed;
/**
* 共聚合数据条数
*/
int totalCount;
/**
* 疑似噪音数
*/
int noiseCount;
/**
* 模板标题数
*/
int titleFatherCount;
/**
* 自动标注数
*/
int automaticmarkCount;
/**
* 已标注部分是否入库
*/
Boolean inserted;
// JSON解析必须保留
public AggreInfo() {
}
public AggreInfo(Boolean aggreFinshed, Boolean isInserted) {
this.aggreFinshed = aggreFinshed;
this.inserted = isInserted;
}
public Boolean isAggreFinshed() {
return aggreFinshed;
}
public void setAggreFinshed(Boolean aggreFinshed) {
this.aggreFinshed = aggreFinshed;
}
public void setTotalCount(int totalCount) {
this.totalCount = totalCount;
}
public void setNoiseCount(int noiseCount) {
this.noiseCount = noiseCount;
}
public void setTitleFatherCount(int titleFatherCount) {
this.titleFatherCount = titleFatherCount;
}
public void setAutomaticmarkCount(int automaticmarkCount) {
this.automaticmarkCount = automaticmarkCount;
}
public void setAll(boolean aggreFinshed, int totalCount, int noiseCount, int titleFatherCount,
int automaticmarkCount) {
this.aggreFinshed = aggreFinshed;
this.totalCount = totalCount;
this.noiseCount = noiseCount;
this.titleFatherCount = titleFatherCount;
this.automaticmarkCount = automaticmarkCount;
}
public void setAll(boolean aggreFinshed, boolean isInserted, int totalCount, int noiseCount, int titleFatherCount,
int automaticmarkCount) {
this.aggreFinshed = aggreFinshed;
this.inserted = isInserted;
this.totalCount = totalCount;
this.noiseCount = noiseCount;
this.titleFatherCount = titleFatherCount;
this.automaticmarkCount = automaticmarkCount;
}
public Boolean isInserted() {
return inserted;
}
public void setInserted(Boolean inserted) {
this.inserted = inserted;
}
public int getTotalCount() {
return totalCount;
}
public int getNoiseCount() {
return noiseCount;
}
public int getTitleFatherCount() {
return titleFatherCount;
}
public int getAutomaticmarkCount() {
return automaticmarkCount;
}
public String getPrintString() {
StringBuffer sb = new StringBuffer();
sb.append("本次数据采集共计");
sb.append(totalCount);
sb.append("条");
if (0 != noiseCount) {
sb.append("(有效数据");
sb.append(totalCount - noiseCount);
sb.append("条,疑似噪音");
sb.append(noiseCount);
sb.append("条)");
}
sb.append(",聚合模板共计");
sb.append(titleFatherCount);
sb.append("条,自动标注");
sb.append(automaticmarkCount);
sb.append("条");
return sb.toString();
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class CommonAggreeCache {
/** 订单id **/
String id;
/** 更新时间 **/
Long updateTime;
Map<String, AggreeDTO> data;
/** 缓存结果 **/
List<ResultInfo> results;
public CommonAggreeCache(String id) {
this.id = id;
this.updateTime = System.currentTimeMillis();
this.data = new HashMap<>();
this.results = new ArrayList<>();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Long updateTime) {
this.updateTime = updateTime;
}
public Map<String, AggreeDTO> getData() {
return data;
}
public void setData(Map<String, AggreeDTO> data) {
this.data = data;
}
public List<ResultInfo> getResults() {
return results;
}
public void setResults(List<ResultInfo> results) {
this.results = results;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import lombok.Data;
import java.util.Objects;
@Data
public class MarkUploadResult {
/**
* 是否转换成功
*/
private boolean success;
private boolean search;
private String key;
/**
* 信息描述类型
*/
private String infoType;
/**
* 上传信息
*/
private String message;
/**
* 数据信息
*/
private MarkInfo markInfo;
/**
* 原始上传数据
*/
private MarkUploadInfo originData;
/**
* 大库数据
*/
private CommonDO dw;
/**
* 标注数据
*/
private CommonDO mark;
private ClassB.TypeB typeB;
private UploadInfo.DataType dataType;
public void setMarkInfo(MarkInfo markInfo) {
this.success = Objects.nonNull(markInfo);
this.markInfo = markInfo;
}
public MarkUploadResult(MarkUploadInfo markUploadInfo) {
this.originData = markUploadInfo;
this.infoType = GenericAttribute.SUCCESS_SUFFIX;
this.success = false;
}
public void setInfo(String infoType, String message) {
this.infoType = infoType;
this.message = message;
this.success = false;
}
private void clearInfo() {
this.infoType = GenericAttribute.SUCCESS_SUFFIX;
this.message = null;
}
public void setCommonDO(CommonDO dw, CommonDO mark) {
if (Objects.nonNull(dw)) {
this.dw = dw;
this.mark = mark;
this.success = true;
clearInfo();
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import lombok.Data;
@Data
public class TemplateNum {
private String title;
private String group;
private Integer number;
public TemplateNum() {}
public TemplateNum(String title, String group) {
this.title = title;
this.group = group;
this.number = 1;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import org.springframework.data.mongodb.core.mapping.Document;
/**
* @ClassName
* @Description 模板标注信息记录
* @Author ${"liu-yu"}
* @Date 2022/5/6 17:05
**/
@Document("automaticmark_template_record")
public class TemplateRecord {
/**
* id
*/
private String id;
/**
* 模板id
*/
private String templateId;
/**
* 特征值
*/
private String mupdate;
/**
* 创建时间
*/
private Long createAt;
public TemplateRecord() {}
public TemplateRecord(String templateId, String mupdate) {
this.templateId = templateId;
this.mupdate = mupdate;
this.createAt = System.currentTimeMillis();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTemplateId() {
return templateId;
}
public void setTemplateId(String templateId) {
this.templateId = templateId;
}
public String getMupdate() {
return mupdate;
}
public void setMupdate(String mupdate) {
this.mupdate = mupdate;
}
public Long getCreateAt() {
return createAt;
}
public void setCreateAt(Long createAt) {
this.createAt = createAt;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import lombok.Data;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@Data
public class TitleAggreeResult {
private Map<TemplateFatherVo, List<JSONObject>> templateFatherVoListMap;
private AtomicInteger fatherId;
private String keyword;
private ClassB.TypeB typeB;
private String group;
private String id;
public TitleAggreeResult(String id, String group, ClassB.TypeB typeB, String keyword) {
this.templateFatherVoListMap = new HashMap<>();
this.fatherId = new AtomicInteger(1);
this.keyword = keyword;
this.id = id;
this.group = group;
this.typeB = typeB;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import lombok.Data;
@Data
public class TitleCosFreq {
private String title;
private double cosFreq;
public TitleCosFreq() {}
public TitleCosFreq(String title, double cosFreq) {
this.title = title;
this.cosFreq = cosFreq;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum AggreeTaskType {
DATA("普通任务", "DATA-COLLECTION:"),
EVENT("事件任务", "event:"),
COMMON("普通任务", "common:");
final String type;
final String keyPrefix;
public String getType() {
return type;
}
public String getKeyPrefix() {
return keyPrefix;
}
AggreeTaskType(String type, String keyPrefix) {
this.type = type;
this.keyPrefix = keyPrefix;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
import com.zhiwei.base.category.ClassB;
public enum Fields {
QA("question_title", "question_content"), VIDEO(), COMPLETE(), INCOMPLETE();
// 默认值
public String title = "title";
public String content = "content";
public String mtag = "mtag";
public String mtime = "mtime";
public String mperson = "mperson";
public String mgroup = "mgroup";
Fields() {
}
Fields(String title, String content) {
this.title = title;
this.content = content;
}
public static Fields getFields(ClassB.TypeB typeB) {
return Fields.valueOf(typeB.name());
}
}
package com.zhiwei.middleware.automatic.server.pojo.vo;
import com.alibaba.fastjson.JSONObject;
import lombok.Data;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
@Data
public class TemplateFatherVo implements Serializable {
private static Logger logger = LogManager.getLogger(TemplateFatherVo.class);
private static final long serialVersionUID = 4142532604627291041L;
/**
* 比较字段
*/
private String title = "";
/**
* 检索字段(标题/文本)
*/
private String content = "";
/**
* 模板id
*/
private String fatherId = "1";
/**
* 作为模板的第一条数据
*/
private JSONObject example;
/**
* 子集总数
*/
private Integer totalSon = 0;
private boolean isForward;
/**
* 高亮命中关键词及词频
*/
private List<Map<String, Integer>> hitWordAndRate;
public TemplateFatherVo() {
}
public TemplateFatherVo(String title) {
if (null == title) {
title = "";
}
this.title = title;
this.content = title;
}
public TemplateFatherVo(String title, String content) {
if (null == title) {
title = "";
}
if (null == content) {
content = "";
}
this.title = title;
this.content = content;
}
public void reFreshTotalSon() {
totalSon++;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof TemplateFatherVo)) {
return false;
}
TemplateFatherVo vo = (TemplateFatherVo) o;
if (this.isForward || vo.isForward) {
return false;
}
if (null == content) {
content = "";
}
if (null == title) {
return vo.content.equals(this.content);
}
return vo.content.equals(this.content) && vo.title.equals(this.title);
}
@Override
public int hashCode() {
try {
if (null == content) {
content = "";
}
if (null == title) {
return content.hashCode();
}
return content.hashCode() + title.hashCode();
} catch (Exception e) {
logger.info("TemplateFatherVo获取hashCode错误,title:{},content:{}", title, content, e);
return -1;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo.vo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import java.io.Serializable;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import java.util.concurrent.atomic.AtomicLong;
public class TemplateTitleVo implements Serializable {
private static final long serialVersionUID = -5076752567160203430L;
private String id;
private String templateTitle;
private Date updateTime;
private Date createTime;
private AtomicLong markSum;
private String mtag;
private String url;
private TemplateStatus status;
public String getId() {
return id;
}
public void setId(String group) {
MessageDigest sMd5Digest = null;
try {
sMd5Digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
String info = this.templateTitle + group;
byte[] hash = sMd5Digest.digest(info.getBytes());
BigInteger numValue = new BigInteger(1, hash);
this.id = numValue.toString(16);
}
public String getTemplateTitle() {
return templateTitle;
}
public void setTemplateTitle(String templateTitle) {
this.templateTitle = templateTitle;
}
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public AtomicLong getMarkSum() {
return markSum;
}
public void setMarkSum(AtomicLong markSum) {
this.markSum = markSum;
}
public String getMtag() {
return mtag;
}
public void setMtag(String mtag) {
this.mtag = mtag;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public TemplateStatus getStatus() {
return status;
}
public void setStatus(TemplateStatus status) {
this.status = status;
}
public void emptyNum() {
this.markSum = new AtomicLong(0);
}
public TemplateTitleVo(String templateTitle, String mtag, String url) {
this.updateTime = new Date();
this.createTime = new Date();
this.templateTitle = templateTitle;
this.markSum = new AtomicLong();
this.mtag = mtag;
this.url = url;
this.status = TemplateStatus.运行中;
}
public void refreshMark() {
this.getMarkSum().getAndIncrement();
this.updateTime = new Date();
}
}
package com.zhiwei.middleware.automatic.server.queue;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Component
public class TemplateNumQueue implements Runnable {
private static final Logger log = LogManager.getLogger(TemplateNumQueue.class);
private final BlockingQueue<TemplateNum> queue;
private static final int PULL_LIMIT = 1000;
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
public TemplateNumQueue(RedissonUtil redissonUtil, TemplateTitleService templateTitleService,
@Qualifier("asyncExecutor")ThreadPoolTaskExecutor executor) {
this.queue = new LinkedBlockingQueue<>();
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
executor.execute(this);
}
public void put(List<TemplateNum> templateNum) {
queue.addAll(templateNum);
}
public void put(TemplateNum templateNum) {
queue.add(templateNum);
}
@Override
public void run() {
while (!Thread.interrupted()) {
try {
// 获取锁
if (queue.size() != 0 && redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_NUMBER, 0, 1, TimeUnit.MINUTES)) {
int pullSize = Math.min(queue.size(), PULL_LIMIT);
List<TemplateNum> infoList = new ArrayList<>(pullSize);
infoList.add(queue.take());
for (int i = 0; i < pullSize - 1; i++) {
infoList.add(queue.take());
}
Map<String, List<TemplateNum>> templateNumGroup = infoList.stream().collect(Collectors.groupingBy(TemplateNum::getGroup, Collectors.toList()));
for (Map.Entry<String, List<TemplateNum>> entry : templateNumGroup.entrySet()) {
entry.getValue().stream().collect(Collectors.groupingBy(TemplateNum::getTitle, Collectors.counting())).forEach((title, num) -> {
templateTitleService.modifyTemplateNum(entry.getKey(), title, num);
});
}
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_NUMBER);
}
Tools.sleep(300L);
} catch (Exception e) {
log.error("模板数值更新失败:", e);
}
}
}
}
package com.zhiwei.middleware.automatic.server.redis;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.redisson.api.*;
import org.redisson.client.protocol.ScoredEntry;
import org.springframework.stereotype.Component;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Component
public class RedissonUtil {
private static final Logger log = LogManager.getLogger(RedissonUtil.class);
/**
* 定义开始时间戳:2022-01-01 00:00:00
*/
private static final long BEGIN_TIMESTAMP = 1640995200L;
/**
* 序列号的位数
*/
private static final int COUNT_BITS = 32;
private static final String MARK_KEY = "auto:mark:";
private final RedissonClient redissonClient;
public static final DecimalFormat FORMAT = new DecimalFormat("0000000");
private RedissonUtil(RedissonClient redissonClient) {
this.redissonClient = redissonClient;
}
//生成全局唯一ID
public String nextId(String keyPrefix) {
RAtomicLong atomicLong = redissonClient.getAtomicLong(Tools.assembleKey(MARK_KEY, keyPrefix));
long count = atomicLong.incrementAndGet();
//1.生成时间戳
LocalDateTime now = LocalDateTime.now();
long nowSecond = now.toEpochSecond(ZoneOffset.UTC);
long timeStamp = nowSecond - BEGIN_TIMESTAMP;
//2.拼接并返回
return String.valueOf(timeStamp << COUNT_BITS | count);
}
/**
* redis队列添加数据
* @param key key
* @param value value
*/
public void putQueue(String key, List<String> value) {
RQueue<String> queue = redissonClient.getQueue(redisKey(key));
queue.addAll(value);
}
/**
* 拉取redis队列数据
* @param key key
* @param limit 条数
* @return value
*/
public List<String> pullQueue(String key, int limit) {
RQueue<String> queue = redissonClient.getQueue(redisKey(key));
return queue.poll(limit);
}
public Map<String, String> getMapValue(String key) {
return redissonClient.getMap(redisKey(key));
}
public String getMapKeyValue(String key, String group) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
return map.get(group);
}
public void setMapValue(String key, String group, String value) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
map.put(group, value);
}
public void setMapValue(String key, Map<String, String> data) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
map.putAll(data);
}
/**
* 获取分布式锁
* @param lockKey key
* @param waitTime 等待时间
* @param leaseTime 超时时间
* @param unit 时间格式
* @return boolean
* @throws InterruptedException 线程中断异常
*/
public boolean tryLock(String lockKey, long waitTime, long leaseTime, TimeUnit unit) throws InterruptedException {
RLock lock = redissonClient.getLock(redisKey(lockKey));
return lock.tryLock(waitTime, leaseTime, unit);
}
/**
* 解锁
* @param lockKey key
*/
public void unlock(String lockKey) {
RLock lock = redissonClient.getLock(redisKey(lockKey));
lock.forceUnlock();
}
public void setList(String assembleKey, List<String> gzipWithUploadInfoList) {
RList<Object> list = redissonClient.getList(redisKey(assembleKey));
list.addAll(gzipWithUploadInfoList);
}
public List<String> getList(String redisKey, int start, int end) {
RList<String> list = redissonClient.getList(redisKey(redisKey));
return list.subList(start, end);
}
public List<String> getList(String redisKey) {
return redissonClient.getList(redisKey(redisKey));
}
public void deleteList(String key) {
RList<Object> list = redissonClient.getList(key);
list.delete();
}
public long getListSize(String redisKey) {
RList<String> list = redissonClient.getList(redisKey(redisKey));
return list.size();
}
public <T> void addListMapWithGzip(String key, Map<String, List<T>> listMap) {
if (null == listMap || listMap.isEmpty()) {
return;
}
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rListMultimap);
listMap.forEach((k, v) -> {
List<String> resList = new ArrayList<>(v.size());
for (T t : v) {
resList.add(Tools.gzip(JSONObject.toJSONString(t)));
}
if (null != k) {
rListMultimap.get(k).addAll(resList);
}
});
}
public <T> T getListMapKeySetByScore(String key, String score, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
Double scoreValue = Double.valueOf(score);
List<String> list = new ArrayList<>(
scoredSortedSet.valueRange(scoreValue, true, getPointNext(scoreValue), false));
if (checkIsNull(list)) {
return null;
}
return JSONObject.parseObject(list.get(0), clazz);
}
public Double generateScore(TemplateFatherVo fatherVo, int size) {
String result = new StringBuilder(String.valueOf(size)).append(".")
.append(FORMAT.format(Double.valueOf(fatherVo.getFatherId()))).toString();
fatherVo.setFatherId(result);
fatherVo.setTotalSon(size - 1);
return Double.valueOf(result);
}
public void transferListMapKeySetFromOld2New(String oldKey, String newKey, String score) {
transferScoredSortedListFromOld2New(oldKey + GenericAttribute.KEY_SET, newKey + GenericAttribute.KEY_SET, score);
}
public void transferScoredSortedListFromOld2New(String oldKey, String newKey, String score) {
// 缓存k集合
RScoredSortedSet<String> scoredSortedSetOld = redissonClient.getScoredSortedSet(oldKey);
RScoredSortedSet<String> scoredSortedSetNew = redissonClient.getScoredSortedSet(newKey);
Double scoreValue = Double.valueOf(score);
for (String str : scoredSortedSetOld.valueRange(scoreValue, true, getPointNext(scoreValue), false)) {
scoredSortedSetNew.add(scoreValue, str);
}
scoredSortedSetOld.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
}
public <T> void replaceListMapKeySetByScore(String key, String score, T t) {
Double scoreValue = Double.valueOf(score);
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
expireDefault(scoredSortedSet);
scoredSortedSet.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
scoredSortedSet.add(scoreValue, JSONObject.toJSONString(t));
}
public String getBucket(String assembleKey) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
return (String) bucket.get();
}
public void setBucket(String assembleKey, String str) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
bucket.set(str);
}
public void deleteBucket(String assembleKey) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
bucket.delete();
}
public void deleteListByKey(String assembleKey) {
redissonClient.getList(redisKey(assembleKey)).delete();
}
public void deleteListMap(String key) {
redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).delete();
redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).delete();
redissonClient.getList(key + GenericAttribute.NOISE_SET).delete();
}
public void deleteListMapRetainNoiseRule(String key) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
if (scoredSortedSet.size() < 2000) {
for (String str : scoredSortedSet) {
redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).addAsync(str);
}
}
redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).delete();
}
/**
* @param key
* @return
*/
public RFuture<Boolean> deleteByData(String key) {
return redissonClient.getBucket(key).deleteAsync();
}
public RFuture<Boolean> deleteListByData(String key) {
return redissonClient.getList(key).deleteAsync();
}
public List<RFuture<Boolean>> deleteListMapByType(String key) {
List<RFuture<Boolean>> resList = new ArrayList<>();
resList.add(redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).deleteAsync());
resList.add(redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).deleteAsync());
resList.add(redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).deleteAsync());
return resList;
}
public RFuture<Boolean> deleteListMapRetainNoiseRuleByData(String key) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
if (scoredSortedSet.size() < 2000) {
for (String str : scoredSortedSet) {
redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).addAsync(str);
}
}
return redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).deleteAsync();
}
public void addListByData(String key, List<String> list) {
if (checkIsNull(list)) {
return;
}
RList<String> rList = redissonClient.getList(key);
expireDefault(rList);
rList.addAll(list);
}
public <T> T getStrByData(String key, Class<T> clazz) {
RBucket<String> rBucket = redissonClient.getBucket(key);
String value = rBucket.get();
if (StringUtils.isEmpty(value)) {
return null;
}
return JSONObject.parseObject(value, clazz);
}
public <T> void setStrByData(String key, T t) {
RBucket<String> rBucket = redissonClient.getBucket(key);
expireDefault(rBucket);
rBucket.set(JSONObject.toJSONString(t));
}
public <T> List<T> getListAllWithGunZipByData(String key, Class<T> clazz) {
RList<String> rList = redissonClient.getList(key);
if (checkIsNull(rList)) {
return new ArrayList<>();
}
return gunzipFormat(rList, clazz);
}
public <T> void setListMapAllWithGzipCustom(String key, Map<TemplateFatherVo, List<T>> listMap) {
if (checkIsNull(listMap)) {
return;
}
Map<TemplateFatherVo, List<String>> preMap = new HashMap<>(listMap.size());
listMap.forEach((k, list) -> {
preMap.put(k, gzipFormat(list));
});
setListMapAllCustom(key, preMap);
}
public void addScoredSortedList(String key, Map<String, Double> map) {
if (checkIsNull(map)) {
return;
}
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
expireDefault(scoredSortedSet);
scoredSortedSet.addAll(map);
}
public void setListMapAllCustom(String key, Map<TemplateFatherVo, List<String>> listMap) {
if (null == listMap || listMap.isEmpty()) {
return;
}
Map<String, Double> perMap = new HashMap<>(listMap.size());
listMap.forEach((fatherVo, list) -> {
Double score = generateScore(fatherVo, list.size());
perMap.put(JSONObject.toJSONString(fatherVo), score);
});
// 缓存K集合
addScoredSortedList(key + GenericAttribute.KEY_SET, perMap);
// 缓存V集合
RListMultimap<String, String> rMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
listMap.forEach((k, v) -> {
// list拆分
final int limit = 1000;
int total = v.size();
if (total <= limit) {
rMultimap.putAll(k.getFatherId(), v);
} else {
// 截取list分批次缓存
int count = (total + limit - 1) / limit;
for (int i = 0; i < count; i++) {
int start = i * limit;
int end = (i + 1) * limit;
rMultimap.putAll(k.getFatherId(), v.subList(start, end > total ? total : end));
}
}
});
}
public <T> List<T> getListMapValueByFieldWithGunZipByData(String key, String fieldKey, Class<T> clazz) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
List<String> list = rListMultimap.getAll(fieldKey);
return gunzipFormat(list, clazz);
}
public void addListMapKeyByData(String key, String score, Object obj) {
redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).add(Double.valueOf(score), JSONObject.toJSONString(obj));
expireDefault(redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET));
}
public void addListMapValueOneByData(String key, String fieldKey, JSONObject json) {
// 缓存V集合
RListMultimap<String, String> rMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rMultimap);
rMultimap.put(fieldKey, Tools.gzip(JSONObject.toJSONString(json)));
}
public void reduceListMapKeyByScoreCustomByData(String key, String score) {
RScoredSortedSet<String> rScoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
expireDefault(rScoredSortedSet);
TemplateFatherVo fatherVo = removeListMapKeySetByScore(key, score, TemplateFatherVo.class);
fatherVo.setTotalSon(fatherVo.getTotalSon() - 1);
rScoredSortedSet.add(Double.valueOf(score), JSONObject.toJSONString(fatherVo));
}
public <T> List<T> getListMapKeySet(String key, int fromIndex, int toIndex, Class<T> clazz) {
return getScoredSortedList(key + GenericAttribute.KEY_SET, fromIndex, toIndex, clazz);
}
public <T> List<T> getListMapNoiseRule(String key, Class<T> clazz) {
RSortedSet<String> sortedSet = redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET);
if (null == sortedSet) {
return null;
}
List<T> res = new ArrayList<>(sortedSet.size());
for (String text : sortedSet) {
res.add(JSONObject.parseObject(text, clazz));
}
return res;
}
public int getScoredSortedListSize(String key) {
return redissonClient.getScoredSortedSet(key).size();
}
public int getListMapValueByFieldSize(String key, String fieldKey) {
return redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).get(fieldKey).size();
}
public <T> List<T> getRangeListMapValueByFieldWithGunZip(String key, String fieldKey, int fromIndex, int toIndex,
Class<T> clazz) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
List<String> list = rListMultimap.get(fieldKey).range(fromIndex, toIndex);
return gunzipFormat(list, clazz);
}
public int getListMapKeySetSize(String key) {
return getScoredSortedListSize(key + GenericAttribute.KEY_SET);
}
public <T> void setListMapValueByFieldWithGZipByData(String key, String fieldKey, List<T> list) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rListMultimap);
rListMultimap.replaceValues(fieldKey, gzipFormat(list));
}
public void listExpirable(String key, long time) {
redissonClient.getList(redisKey(key)).expire(time, TimeUnit.MINUTES);
}
private <T> List<String> gzipFormat(List<T> list) {
if (checkIsNull(list)) {
return new ArrayList<>();
}
List<String> res = new ArrayList<>(list.size());
list.forEach(t -> {
res.add(Tools.gzip(JSONObject.toJSONString(t)));
});
return res;
}
public <T> List<T> getScoredSortedList(String key, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
List<T> res = new ArrayList<>(scoredSortedSet.size());
for (String str : scoredSortedSet) {
try {
T t = JSONObject.parseObject(str, clazz);
res.add(t);
} catch (Exception e) {
log.error("parseObject-", e);
}
}
return res;
}
public <T> List<T> getScoredSortedList(String key, int fromIndex, int toIndex, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
List<T> res = new ArrayList<>(scoredSortedSet.size());
for (ScoredEntry<String> entry : scoredSortedSet.entryRangeReversed(fromIndex, toIndex)) {
try {
res.add(JSONObject.parseObject(entry.getValue(), clazz));
} catch (Exception e) {
log.error("getScoredSortedList-parseJSONObject出错,value:{}", entry.getValue(),e);
}
}
return res;
}
private <T> T removeListMapKeySetByScore(String key, String score, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.MAP_SET);
Double scoreValue = Double.valueOf(score);
List<String> list = new ArrayList<>(
scoredSortedSet.valueRange(scoreValue, true, getPointNext(scoreValue), false));
scoredSortedSet.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
if (checkIsNull(list)) {
return null;
}
return JSONObject.parseObject(list.get(0), clazz);
}
private <T> List<T> gunzipFormat(List<String> list, Class<T> clazz) {
if (checkIsNull(list)) {
return new ArrayList<>();
}
List<T> res = new ArrayList<>(list.size());
list.forEach(compressedStr -> {
res.add(JSONObject.parseObject(Tools.gunzip(compressedStr), clazz));
});
return res;
}
private Double getPointNext(Double scoreValue) {
try {
String[] values = scoreValue.toString().split("\\.");
String a = values[1];
if (a.length() < 7) {
StringBuilder sb = new StringBuilder(a);
while (sb.length() < 7) {
sb.append("0");
}
a = sb.toString();
}
Integer b = FORMAT.parse(a).intValue() + 1;
String c = new StringBuilder(values[0]).append(".").append(FORMAT.format(b)).toString();
return Double.valueOf(c);
} catch (ParseException e) {
log.error(e);
}
return 0.0;
}
private String redisKey(String key) {
return MARK_KEY + key;
}
/*
* 默认超时时间8天
*/
private void expireDefault(RExpirable rExpirable) {
rExpirable.expire(8, TimeUnit.DAYS);
}
private <T> boolean checkIsNull(List<T> list) {
return null == list || list.isEmpty();
}
private <K, T> boolean checkIsNull(Map<K, T> map) {
return null == map || map.isEmpty();
}
}
package com.zhiwei.middleware.automatic.server.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import java.util.List;
import java.util.Set;
public interface AutoService {
/**
*/
void asyncAutoMark();
/**
* 异步自动标注 多项目
*/
void autoMarkMulti();
/**
* 噪音聚合自动标注
* @param templateFatherVos 噪音模板
* @param group 项目
* @param field 字段
* @return 标注数量
*/
int noiseAutoMark(Set<TemplateFatherVo> templateFatherVos, String group, String field);
/**
* 事件数据自动标注
* @param group 项目
* @param data 数据集
*/
void autMarkByEvent(String group, List<MarkInfo> data);
}
package com.zhiwei.middleware.automatic.server.service;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import java.util.List;
import java.util.Map;
public interface TemplateTitleService {
/**
* 自动聚合模板
* @param groups 项目集
*/
void schedulerHourAggregation(List<String> groups, Long startTime, Long endTime);
/**
* 获取项目文本模板
* @param project 项目
* @return 模板集
*/
Map<String, TemplateTitleVo> getTemplateTitleByProject(String project);
/**
* 添加项目文本模板
* @param project 项目
* @param vos 模板集
* @return 模板集数量
*/
void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 修改模板计数
* @param group 项目
*/
void modifyTemplateNum(String group, String title, Long num);
/**
* 根据模板标题获取数据(仅最新100条)
*
* @param group 项目
* @param templateTitle 模板标题
* @return 特征值
*/
List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 根据标题和特征值尝试搜索模板标题
*
* @param group 项目
* @param title 标题
* @param mupdate 特征值
* @return 模板标题
*/
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/**
* 根据项目组和标题在线匹配已有聚合标题
*
* @param project 项目
* @param title 标题
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.server.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public interface UploadService {
/**
* 添加源数据集
*
* @param group 项目
* @param id 任务id
*/
void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*/
void startUpload(MarkUploadRule markUploadRule);
/**
* 获取上传状态(进度)
*
* @param group 项目
* @param id 任务id
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group 项目
* @param id 任务id
* @param page page
* @param size size
* @param isAsc 排序
* @param searchField 查询字段
* @param keyword 关键字
* @param uploadType 上传类型
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import java.util.List;
public class BaseTaskHandler {
private final RedissonUtil redissonUtil;
private final AggreeTaskType aggreeTaskType;
private static final String SOURCE_KEY = "source";
private static final String TASK_KEY = "task";
public BaseTaskHandler(RedissonUtil redissonUtil, AggreeTaskType aggreeTaskType) {
this.redissonUtil = redissonUtil;
this.aggreeTaskType = aggreeTaskType;
}
public String getKeyPrefix() {
return aggreeTaskType.getKeyPrefix();
}
/**
* 获取聚合任务
* @param group 项目
* @param id id
* @return 聚合任务
*/
public AggreInfo getAggreeTask(String group, String id) {
return JSONObject.parseObject(redissonUtil.getBucket(getTaskKey(group, id))).toJavaObject(AggreInfo.class);
}
/**
* 添加聚合任务
* @param group 项目
* @param id id
* @param aggreInfo 聚合任务
*/
public void addAggreeTask(String group, String id, AggreInfo aggreInfo) {
redissonUtil.setBucket(getTaskKey(group, id), JSONObject.toJSONString(aggreInfo));
}
/**
* 添加数据集
* @param group 项目
* @param id id
* @param dataSource 数据集
*/
public void addDataSource(String group, String id, List<String> dataSource) {
redissonUtil.setList(getSourceKey(group, id), dataSource);
}
/**
* 获取数据集
* @param group 项目
* @param id id
* @return 数据集
*/
public List<String> getDataSource(String group, String id) {
return redissonUtil.getList(getSourceKey(group, id));
}
public long getDataSourceSize(String group, String id) {
return redissonUtil.getListSize(getSourceKey(group, id));
}
public void dataSourceExpirable(String group, String id) {
redissonUtil.listExpirable(getSourceKey(group, id), 30);
}
/**
* 删除该任务得所有信息
* @param group 项目
* @param id id
*/
public void removerInfo(String group, String id) {
// 数据集
redissonUtil.deleteList(getSourceKey(group, id));
redissonUtil.deleteBucket(getTaskKey(group, id));
}
private String getSourceKey(String group, String id) {
return Tools.assembleKey(aggreeTaskType.getKeyPrefix(), SOURCE_KEY, group, id);
}
private String getTaskKey(String group, String id) {
return Tools.assembleKey(aggreeTaskType.getKeyPrefix(), TASK_KEY, group, id);
}
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Component;
import java.util.List;
/**
*
* @ClassName: KafkaSendHandler
* @Description: kafka发送消息
* @author shenjunjie
* @date 2019年8月29日 下午11:09:30
*/
@Component
@EnableKafka
public class KafkaSendHandler {
private static final Logger logger = LogManager.getLogger(KafkaSendHandler.class);
@Autowired
private KafkaTemplate<String, Object> kafkaTemplate;
@Value("${crawler.topic}")
private String topic;
public void insertDataByMarkInfo(List<MarkInfo> list) {
list.forEach(markInfo -> {
insertData(markInfo.getSourceObj());
});
logger.info("Kafka发送消息{}条", list.size());
}
public void insertData(JSONObject json) {
String cname = json.getString("cname");
try {
while (!syncSendKafkaMsg(topic, cname, json)) {
logger.error("Kafka消息发送{}失败,立即重试...", cname);
}
} catch (Exception e) {
logger.error("Kafka发送消息异常,等待3s后重试...");
}
}
/**
* 同步发送kafka消息
*
* @param topic
* kafka 主题
* @param msgType
* 消息类型
* @param list
* 待发送数据
* @return
*/
private boolean syncSendKafkaMsg(String topic, String msgType, JSONObject json) {
Boolean[] isSuccesses = new Boolean[] { true };
// 2019/7/11 11:13 value由list调整为string
kafkaTemplate.send(topic, msgType, json.toJSONString()).addCallback((success) -> {
isSuccesses[0] = false;
}, (failure) -> {
logger.error("KafkaSendFailure", failure);
});
return isSuccesses[0];
}
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeCache;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.utils.BasicUtil;
import com.zhiwei.nlp.vo.KResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class TextHandlerService extends BaseTaskHandler {
private static final Logger log = LogManager.getLogger(TextHandlerService.class);
/** 默认规格标准0.1 **/
private static final double DEFAULT_LIMIT = 0.1;
/** 单个聚合任务不得超出15W条 **/
private static final int AGGREE_COUNT_LIMIT = 150000;
private static final String TEXT_RES = "textRes";
/* 启动聚合处理线程池 */
private final ThreadPoolTaskExecutor START_SERVICE;
private final RedissonUtil redissonUtil;
public TextHandlerService(RedissonUtil redissonUtil,
@Qualifier("aggreeExecutor") ThreadPoolTaskExecutor aggreeExecutor) {
super(redissonUtil, AggreeTaskType.COMMON);
this.redissonUtil = redissonUtil;
this.START_SERVICE = aggreeExecutor;
}
/**
* 获取订单id
*
* @return String
*/
public String generateAggreeOrder() {
// 生成聚合任务订单
return redissonUtil.nextId(GenericAttribute.KEY_INCREMENT);
}
public boolean appendAggreeOrderNew(String id, List<AggreeDTO> list) {
long listSize = getDataSourceSize(null, id);
if (!checkLimit((int) listSize)) {
log.info("id:{},聚合任务超出上限:{},预期值:{}", id, AGGREE_COUNT_LIMIT, list.size() + listSize);
return false;
}
addDataSource(null, id, list.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
log.info("id:{},聚合任务添加{}条", id, list.size());
return true;
}
public boolean startAggree(String id) {
return startAggree(id, DEFAULT_LIMIT);
}
/**
* 开启聚合
*
* @param id 任务id
* @param limit limit
*
* @return boolean
*/
public boolean startAggree(String id, double limit) {
List<ResultInfo> res = new ArrayList<>();
List<String> source = getDataSource(null, id);
if (null == source) {
return false;
}
if (START_SERVICE.getActiveCount() == 10) {
return false;
}
START_SERVICE.execute(() -> {
log.info("id:{},开始聚合任务", id);
Map<String, AggreeDTO> dataGroup = source.stream()
.map(e -> JSONObject.parseObject(e).toJavaObject(AggreeDTO.class))
.collect(Collectors.toMap(AggreeDTO::getId, dto -> dto));
// 添加统一简体处理
List<KResult<String>> kResultList = AggreeBootStarter.getKResult(
dataGroup.values().stream().collect(Collectors.toMap(AggreeDTO::getId, AggreeDTO::getText)), limit);
// 添加聚合结果
kResultList.forEach(result -> res.add(packageResultInfo(result, dataGroup)));
// 按照聚合量级倒叙排序
res.sort((a, b) -> (b.getSize() - a.getSize()));
// 结果缓存
String resKey = Tools.assembleKey(TEXT_RES, id);
redissonUtil.setList(resKey, res.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
dataSourceExpirable(null, id);
redissonUtil.listExpirable(resKey, 30);
log.info("id:{},聚合任务结束,缓存已生成", id);
});
return true;
}
/**
* 获取聚合结果new
*
* @param id 任务id
*
* @return Map<String,List<AggreeInfo>>
*/
public CommonAggreeCache getAggreeResult(String id) {
CommonAggreeCache cache = new CommonAggreeCache(id);
cache.setResults(redissonUtil.getList(Tools.assembleKey(TEXT_RES, id))
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(ResultInfo.class)).collect(Collectors.toList()));
cache.setData(getDataSource(null, id)
.stream()
.map(e -> JSONObject.parseObject(e).toJavaObject(AggreeDTO.class))
.collect(Collectors.toMap(AggreeDTO::getId, dto -> dto)));
return cache;
}
private ResultInfo packageResultInfo(KResult<String> result, Map<String, AggreeDTO> sourceMap) {
List<String> indexes = result.getDataPoints();
AggreeDTO templateData;
List<String> hitList = indexes.stream().filter(index -> BasicUtil
.textComparisonByAggreeConfig(result.getClusterName(), sourceMap.get(index).getText()))
.collect(Collectors.toList());
if (hitList.isEmpty()) {
// 未命中重要渠道选择第一条数据
log.info("未命中重要渠道选择第一条数据");
templateData = sourceMap.get(indexes.get(0));
} else {
templateData = sourceMap.get(hitList.get(0));
}
return new ResultInfo(result.getClusterName(), indexes.size(),
sourceMap.values().stream().filter(dto -> indexes.contains(dto.getId())).collect(Collectors.toList()),
templateData);
}
/**
* 校验单个任务是否超过上限
*
* @param size size
* @return boolean
*/
private boolean checkLimit(int size) {
return AGGREE_COUNT_LIMIT > size;
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.queue.TemplateNumQueue;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
@Service
public class AutoServiceImpl implements AutoService {
private static final Logger log = LogManager.getLogger(AutoServiceImpl.class);
private final RedissonUtil redissonUtil;
private final DubboHandler dubboHandler;
private final TemplateTitleService templateTitleService;
private final ThreadPoolTaskExecutor autoMarkExecutor;
private final TemplateNumQueue templateNumQueue;
public AutoServiceImpl(RedissonUtil redissonUtil,
DubboHandler dubboHandler, TemplateTitleService templateTitleService,
TemplateNumQueue templateNumQueue,
@Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) {
this.redissonUtil = redissonUtil;
this.dubboHandler = dubboHandler;
this.templateTitleService = templateTitleService;
this.templateNumQueue = templateNumQueue;
this.autoMarkExecutor = autoMarkExecutor;
}
@Override
public void asyncAutoMark() {
List<String> infos = redissonUtil.pullQueue(GenericAttribute.REDIS_QUEUE_ONE_KEY, GenericAttribute.REDIS_QUEUE_LIMIT);
if (Tools.isEmpty(infos)) {
return;
}
List<MarkInfo> data = infos.stream()
.map(e -> {
try {
JSONObject jsonObject = JSONObject.parseObject(e);
return jsonObject.toJavaObject(MarkInfo.class);
} catch (Exception exception) {
log.error("单项目自动标注失败,json转换异常,原数据:{}", e);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (data.isEmpty()) {
log.info("异步单项目自动标注,本次拉取数据为0");
return;
}
// 分组自动化标注
Map<String, List<MarkInfo>> groupMap = data.stream()
.collect(Collectors.groupingBy(markInfo -> {
try {
return markInfo.getSourceObj().getString("mgroup");
} catch (Exception e) {
return "";
}
}));
projectAutoMark(groupMap);
}
@Override
public void autoMarkMulti() {
List<MarkInfoMulti> data = redissonUtil.pullQueue(GenericAttribute.REDIS_QUEUE_MULTI_KEY, GenericAttribute.REDIS_QUEUE_LIMIT)
.stream()
.map(e -> {
try {
return JSONObject.parseObject(e).toJavaObject(MarkInfoMulti.class);
} catch (Exception exception) {
log.error("多项目自动标注失败,json转换异常,原数据:{}", e, exception);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (data.isEmpty()) {
log.info("异步多项目自动标注,本次拉取数据为0");
return;
}
Map<String, List<MarkInfo>> groupMap = new HashMap<>();
data.forEach(markInfoMulti -> {
// 标注信息,未正确填充mgroup
JSONObject example = markInfoMulti.getMarkInfo().getSourceObj();
markInfoMulti.getProjects().forEach(project -> groupMap.compute(project, (k, v) -> {
v = null == v ? new ArrayList<>() : v;
// 调整mgroup
example.put("mgroup", project);
v.add(MarkInfoUtil.transformToMarkInfo(example));
return v;
}));
});
projectAutoMark(groupMap);
}
@Override
public int noiseAutoMark(Set<TemplateFatherVo> templateFatherVos, String group, String field) {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProject(group);
int count = 0;
for (TemplateFatherVo vo : templateFatherVos) {
String title = vo.getExample().getString(field);
Map<String, Object> map = similarMapInfo(template, title, group);
if (Tools.isEmpty(map)) {
continue;
}
autoInfo(vo.getExample());
// 填充数据
String aggreTitle = String.valueOf(map.get("aggreeTitle"));
TemplateTitleVo templateTitleVo = template.get(aggreTitle);
vo.getExample().put(GenericAttribute.ES_M_TAG, templateTitleVo.getMtag());
count++;
}
return count;
}
@Override
public void autMarkByEvent(String group, List<MarkInfo> data) {
try {
asyncTitleMark(group, data, templateTitleService.getTemplateTitleByProject(group));
} catch (Exception e) {
log.error("事件自动标注出错group:{},:", group, e);
}
}
/**
* 项目自动标注
* @param groupMap 数据集 项目名分类
*/
private void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) {
try {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProject(entry.getKey());
asyncTitleMark(entry.getKey(), entry.getValue(), template);
} catch (Exception e) {
log.error("自动标注处理失败,项目:{}", entry.getKey(), e);
}
}
}
/**
* 异步等待
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
* @throws Exception 异步异常
*/
private void asyncTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) throws Exception {
List<List<MarkInfo>> splitList = Tools.spilt(markInfos, 1000);
emptyTemplate(titleVoMap);
List<CompletableFuture<Void>> futures = splitList.stream()
.map(e -> CompletableFuture.runAsync(() -> oneTitleMark(group, e, titleVoMap), autoMarkExecutor)).collect(Collectors.toList());
CompletableFuture<Void> allOf = CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
// 获取返回值
allOf.thenApply(e -> futures.stream().map(CompletableFuture::join)).get();
}
/**
* 自动标注
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
*/
private void oneTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) {
// 移除标题长度小于6的部分
List<MarkInfo> newList = markInfos.stream().filter(markInfo -> {
String title = markInfo.getSourceObj().getString(GenericAttribute.ES_TITLE);
return null != title && title.length() > 6;
}).collect(Collectors.toList());
for (MarkInfo markInfo : newList) {
JSONObject sourceObj = markInfo.getSourceObj();
String title = Tools.filterSymbol(sourceObj.getString(GenericAttribute.ES_TITLE));
Map<String, Object> similarMap = similarMapInfo(titleVoMap, title, group);
if (!similarMap.isEmpty()) {
// 填充数据
String aggreTitle = String.valueOf(similarMap.get("aggreTitle"));
TemplateTitleVo templateTitleVo = titleVoMap.get(aggreTitle);
String aggreTag = templateTitleVo.getMtag();
sourceObj.put(GenericAttribute.ES_M_TAG, aggreTag);
sourceObj.put(GenericAttribute.ES_M_PERSON, "自动化机器人");
sourceObj.put(GenericAttribute.ES_M_TIME, new Date().getTime());
log.info("模板标题:{} MarkSum:{} Tag:{}被标注标题:{}相似度:{}", aggreTitle, templateTitleVo.getMarkSum(), aggreTag,
title, similarMap.get("similar"));
// 刷新一下标注量和标注时间,
try {
String[] updates = dubboHandler.getMupdates(markInfo.filterInfo());
templateTitleVo.refreshMark();
templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), updates[0]));
templateNumQueue.put(new TemplateNum(templateTitleVo.getTemplateTitle(), group));
} catch (Exception e) {
log.error("记录事件采集-标注数据特征值失败", e);
}
}
}
}
private Map<String, Object> similarMapInfo(Map<String, TemplateTitleVo> titleVoMap, String title, String group) {
// 相似度最高的模板信息
Map<String, Object> similarMap = new HashMap<>();
for (TemplateTitleVo templateTitleVo : titleVoMap.values()) {
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
// 过滤掉以重置的模板
if (templateTitleVo.getStatus() == TemplateStatus.已重置 || Tools.isEmpty(templateTitleVo.getMtag())) {
continue;
}
String aggreTitle = templateTitleVo.getTemplateTitle();
// 过滤掉标题里面的特殊符号
double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title);
double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0;
// 选取相似度最大的标注
if (similar >= GenericAttribute.SIMILAR_STANDARD && similar > currentSimilar) {
similarMap.put("similar", similar);
similarMap.put("aggreeTitle", aggreTitle);
}
}
return similarMap;
}
private void autoInfo(JSONObject json) {
json.put(GenericAttribute.ES_M_PERSON, "自动化机器人");
json.put(GenericAttribute.ES_M_TIME, new Date().getTime());
}
/**
* 重置模板计数
* @param titleVoMap 模板集
*/
private void emptyTemplate(Map<String, TemplateTitleVo> titleVoMap) {
for (Map.Entry<String, TemplateTitleVo> entry : titleVoMap.entrySet()) {
entry.getValue().emptyNum();
}
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.pojo.enums.Fields;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.handler.BaseTaskHandler;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.DataCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.middleware.mark.vo.QueryResult;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import io.micrometer.core.instrument.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.redisson.api.RFuture;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import static com.zhiwei.middleware.automatic.server.config.GenericAttribute.SON_ID;
import static com.zhiwei.middleware.automatic.server.pojo.enums.Fields.getFields;
import static com.zhiwei.middleware.automatic.server.util.Tools.assembleKey;
import static com.zhiwei.middleware.automatic.server.util.Tools.cutKeyword;
/**
* @ClassName: DataUploadVo
* @Description: 数据采集
* @author SJJ
* @date 2020年4月8日 下午6:33:15
*/
@Component
public class DataCollection extends BaseTaskHandler {
private static final Logger logger = LogManager.getLogger(DataCollection.class);
private static final String VAILD = "VAILD";
private static final String NOISE = "NOISE";
private static final String STATUS = "STATUS";
private static final double SIMILAR_STANDARD = 0.8;
/* 启动聚合处理线程池 */
private final ThreadPoolTaskExecutor START_SERVICE;
private final AutoService autoService;
private final DubboHandler dubboHandler;
private final RedissonUtil redissonUtil;
private final TypeB[] typeBs = new TypeB[] { TypeB.COMPLETE, TypeB.INCOMPLETE, TypeB.QA, TypeB.VIDEO };
public DataCollection(AutoService autoService, DubboHandler dubboHandler, RedissonUtil redissonUtil,
@Qualifier("aggreeNoiseExecutor")ThreadPoolTaskExecutor aggreeNoiseExecutor) {
super(redissonUtil, AggreeTaskType.DATA);
this.autoService = autoService;
this.dubboHandler = dubboHandler;
this.redissonUtil = redissonUtil;
this.START_SERVICE = aggreeNoiseExecutor;
}
public void cleanCache(String group, String id) {
try {
long s = System.currentTimeMillis();
List<RFuture<Boolean>> futureList = new ArrayList<>();
removerInfo(group, id);
for (TypeB typeB : typeBs) {
futureList.addAll(redissonUtil.deleteListMapByType(vaildKey(group, id, typeB)));
futureList.add(redissonUtil.deleteByData(vaildStatusKey(group, id, typeB)));
futureList.addAll(redissonUtil.deleteListMapByType(noiseKey(group, id, typeB)));
}
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[futureList.size()])).get();
logger.info("id:{}-清理缓存完毕!耗时:{}ms", id, System.currentTimeMillis() - s);
} catch (Exception e) {
logger.info("id:{}-清理缓存出错,e", id, e);
}
}
public void cleanCacheExceptNoise(String group, String id) {
try {
long s = System.currentTimeMillis();
List<RFuture<Boolean>> futureList = new ArrayList<>();
removerInfo(group, id);
for (TypeB typeB : typeBs) {
futureList.addAll(redissonUtil.deleteListMapByType(vaildKey(group, id, typeB)));
futureList.add(redissonUtil.deleteByData(vaildStatusKey(group, id, typeB)));
futureList.add(redissonUtil.deleteListMapRetainNoiseRuleByData(noiseKey(group, id, typeB)));
}
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[futureList.size()])).get();
logger.info("id:{}-清理缓存(保留噪音部分)完毕!耗时:{}ms", id, System.currentTimeMillis() - s);
} catch (Exception e) {
logger.info("id:{}-清理缓存(保留噪音部分)完毕出错,e", id, e);
}
}
public void addDataCollection(String group, String id, List<String> compressedlist) {
try {
if (null == compressedlist || compressedlist.isEmpty()) {
logger.info("id:{}-传入数据源为null或empty!", id);
return;
}
addDataSource(group, id, compressedlist);
} catch (Exception e) {
logger.error("addDataCollection-", e);
}
}
public void startAggree(String group, String id, String highWords) {
logger.info("DEBUG-group:{},id:{}", group, id);
START_SERVICE.execute(() -> {
AggreInfo aggreInfo = new AggreInfo(false, false);
try {
if (null != getAggreeTask(group, id)) {
logger.info("id:{}-正在聚合请等待...", id);
return;
}
// 获得全部源数据
List<JSONObject> sourceList = getDataSource(group, id).stream().map(e -> JSONObject.parseObject(Tools.gunzip(e))).collect(Collectors.toList());
// List<JSONObject> sourceList = redissonUtil.getListAllWithGunZipByData(sourceKey(group, id), JSONObject.class);
// 过滤指定C5的数据
sourceList = sourceList.stream().filter(json -> 143657937 != json.getLong("c5"))
.collect(Collectors.toList());
if (sourceList.isEmpty()) {
logger.info("id:{}-缓存数据源未准备!", id);
return;
}
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.info("id:{}启动聚合...", id);
AtomicInteger noiseTotal = new AtomicInteger();
AtomicInteger markedTotal = new AtomicInteger();
AtomicInteger titleFatherTotal = new AtomicInteger();
// 区分数据格式并处理
divid(sourceList).forEach((typeB, data) -> {
AggreeHandler handler = new AggreeHandler(data, id, group, highWords);
// 根据噪音规则列表剔除数据
int noiseCount = handler.reduceByNoise();
// 聚合
Map<TemplateFatherVo, List<JSONObject>> aggreMap = handler.aggree();
// 聚合集的是否已有标注标签(通过调用标注接口的方式)
handler.checkHasMarked(aggreMap);
// 对sourceList针对现有自动标注聚合集作聚合及自动标注
int markCount = handler.autoMarkByCurrentCollection(aggreMap.keySet());
// 数据统计部分
handler.statisticsCache(markCount, aggreMap.size());
noiseTotal.addAndGet(noiseCount);
markedTotal.addAndGet(markCount);
titleFatherTotal.addAndGet(aggreMap.size());
// 数据缓存
redissonUtil.setListMapAllWithGzipCustom(vaildKey(group, id, typeB), aggreMap);
});
// 更新信息并同步redis
aggreInfo.setAll(true, false, sourceList.size(), noiseTotal.get(), titleFatherTotal.get(),
markedTotal.get());
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.info("id:{}-数据采集聚合完成 ", id);
} catch (Exception e) {
aggreInfo.setAggreFinshed(null);
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.error("id:{}-数据采集聚合异常", id, e);
}
});
}
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
TypeB typeB) {
if (null == fatherIds || fatherIds.isEmpty()) {
return false;
}
boolean res = true;
for (String fatherId : fatherIds) {
res = res && modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
return res;
}
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
TemplateFatherVo fatherVo = getTemplateFatherVo(vaildKey, fatherId);
if (null == fatherVo) {
logger.info("id:{},fatherId:{}-父聚模板不存在!", id, fatherId);
return true;
}
Fields fields = getFields(typeB);
JSONObject example = fatherVo.getExample();
String originMtag = example.getString(fields.mtag);
String title = example.getString(fields.title);
example.put(fields.mtag, mtag);
example.put(fields.mtime, System.currentTimeMillis());
example.put(fields.mperson, mperson);
redissonUtil.replaceListMapKeySetByScore(vaildKey, fatherId, fatherVo);
logger.info("id:{}-修改父模板标题{}条,oldMtag:{},newMtag:{},模板标题:{},标注人:{}", id, fatherVo.getTotalSon() + 1,
originMtag, mtag, title, mperson);
} catch (Exception e) {
logger.error("modifyFatherTag-", e);
return false;
}
return true;
}
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
TemplateFatherVo templateFatherVo = getTemplateFatherVo(vaildKey, fatherId);
List<JSONObject> list = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildKey, fatherId, JSONObject.class);
if (null == templateFatherVo || list.isEmpty()) {
logger.info("id:{},fatherId:{}-父聚合集不存在!", id, fatherId);
return true;
}
Fields fields = getFields(typeB);
// 标准比对标签
JSONObject templateVo = templateFatherVo.getExample();
String standardMtag = templateVo.getString(fields.mtag);
if (mtag.equals(standardMtag)) {
logger.info("id:{}-待修改子标签与父标签一致!mtag:{}", id, mtag);
return true;
}
JSONObject hitJSON = null;
for (int i = 0; i < list.size(); i++) {
JSONObject json = list.get(i);
// sonId比对找到该条数据
if (sonId.equals(json.getString(SON_ID))) {
json.put(fields.mtag, mtag);
json.put(fields.mtime, System.currentTimeMillis());
json.put(fields.mperson, mperson);
hitJSON = json;
list.remove(i);
break;
}
}
if (null != hitJSON) {
JSONObject status = getStatus(vaildStatusKey(group, id, typeB));
Integer titleFatherCount = status.getInteger(AggreeHandler.TITLE_FATHER_COUNT);
// 新建模板标题
TemplateFatherVo fatherVo = getNewInstance(hitJSON, fields, 1, String.valueOf(titleFatherCount + 1));
// 添加K映射
redissonUtil.addListMapKeyByData(vaildKey, fatherVo.getFatherId(), fatherVo);
// 添加V映射
redissonUtil.addListMapValueOneByData(vaildKey, fatherVo.getFatherId(), fatherVo.getExample());
// 修改当前status
statusIncrease(vaildStatusKey(group, id, typeB), status, 0);
// 修改旧K值
redissonUtil.reduceListMapKeyByScoreCustomByData(vaildKey, fatherId);
// 修改原先对应数据集
redissonUtil.setListMapValueByFieldWithGZipByData(vaildKey, fatherId, list);
logger.info("id:{}-修改子模板标题并新建父集:{},oldMtag:{},newMtag:{}", id, hitJSON.getString(fields.title),
standardMtag, mtag);
} else {
logger.info("id:{}-未找到子模板标题,fatherId:{},sonId:{}", id, fatherId, sonId);
}
} catch (Exception e) {
logger.error("modifySonTag-", e);
return false;
}
return true;
}
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, TypeB typeB) {
try {
List<CompletableFuture<Boolean>> results = fatherIds.stream()
.map(fatherId -> CompletableFuture.supplyAsync(() -> throwIntoNoise(group, id, fatherId, typeB), START_SERVICE))
.collect(Collectors.toList());
CompletableFuture.allOf(results.toArray(new CompletableFuture[results.size()])).join();
logger.info("id:{}-fatherIds:{},typeB:{}-丢入噪音集", id, fatherIds.toString(), typeB);
} catch (Exception e) {
logger.error("batchThrowIntoNoise-", e);
return false;
}
return true;
}
public boolean throwIntoNoise(String group, String id, String fatherId, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
String oldKey = vaildKey;
String newKey = noiseKey(group, id, typeB);
String oldStatusKey = vaildStatusKey(group, id, typeB);
String newStatusKey = noiseStatusKey(group, id, typeB);
if (vaildNoiseChange(fatherId, vaildKey, oldKey, newKey, oldStatusKey, newStatusKey)) {
logger.info("id:{}-fatherId:{},typeB:{}-丢入噪音集", id, fatherId, typeB);
}
} catch (Exception e) {
logger.error("throwIntoNoise-", e);
return false;
}
return true;
}
public boolean restoreFromNoise(String group, String id, String fatherId, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
String oldKey = noiseKey(group, id, typeB);
String newKey = vaildKey;
String oldStatusKey = noiseStatusKey(group, id, typeB);
String newStatusKey = vaildStatusKey(group, id, typeB);
if (vaildNoiseChange(fatherId, vaildKey, oldKey, newKey, oldStatusKey, newStatusKey)) {
logger.info("id:{}-fatherId:{},typeB:{}-从噪音集移除", id, fatherId, typeB);
}
} catch (Exception e) {
logger.error("restoreFromNoise-", e);
return false;
}
return true;
}
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag) {
return getFatherTitles(vaildKey(group, id, typeB), vaildStatusKey(group, id, typeB), page, size, isAsc, keyword,
isTitle, markFlag);
}
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, TypeB typeB) {
return getSonTitles(vaildKey(group, id, typeB), fatherId, page, size, isAsc, keyword, typeB);
}
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag) {
return getFatherTitles(noiseKey(group, id, typeB), noiseStatusKey(group, id, typeB), page, size, isAsc, keyword,
isTitle, markFlag);
}
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, TypeB typeB) {
return getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public void checkedThenInsert(String group, String id) {
START_SERVICE.execute(() -> {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
// AggreInfo aggreInfo = redissonUtil.getStrByData(statusKey, AggreInfo.class);
if (null == aggreInfo) {
logger.info("id:{}-尚未聚合,无法入库!", id);
// 修改标识符
aggreInfo = new AggreInfo(null, null);
addAggreeTask(group, id, aggreInfo);
return;
}
if (Boolean.TRUE.equals(aggreInfo.isInserted())) {
logger.info("id:{}-已经入库完毕!", id);
return;
}
AtomicInteger markedTotal = new AtomicInteger();
for (TypeB typeB : typeBs) {
String vaildKey = vaildKey(group, id, typeB);
Fields fields = getFields(typeB);
List<TemplateFatherVo> fatherTitles = redissonUtil.getScoredSortedList(vaildKey,
TemplateFatherVo.class);
if (fatherTitles.isEmpty()) {
continue;
}
fatherTitles.forEach(fatherVo -> {
JSONObject example = fatherVo.getExample();
List<JSONObject> sonList = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildKey,
fatherVo.getFatherId(), JSONObject.class);
sonList.add(example);
if (DataCollectionUtil.hasTag(fatherVo, fields)) {
markedTotal.addAndGet(dubboHandler.dataCollectionUpsert(sonList, group,
example.getString(fields.mtag), example.getString(fields.mperson)));
}
if (StringUtils.isEmpty(example.getString(fields.mperson))) {
logger.info("id:{}-父标题:{}缺失mperson", id, fatherVo.getTitle());
}
});
}
// 修改标识符
aggreInfo.setInserted(true);
addAggreeTask(group, id, aggreInfo);
logger.info("id:{}-入库完毕,共入标注库{}条", id, markedTotal.get());
} catch (Exception e) {
logger.error("id:{}-入库异常", id, e);
}
});
}
public int getAggreResultNow(String group, String id) {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo) {
return -1;
}
if (null == aggreInfo.isAggreFinshed()) {
logger.info("id:{},聚合结果返回错误状态-2,info:{}", id, JSON.toJSONString(aggreInfo));
return -2;
}
if (!aggreInfo.isAggreFinshed()) {
return 0;
} else {
return 1;
}
} catch (Exception e) {
logger.error("id:{},getAggreResultNow-", id, e);
return -2;
}
}
public int getInsertResultNow(String group, String id) {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo) {
return -1;
}
if (null == aggreInfo.isInserted()) {
logger.info("id:{},入库结果返回错误状态-2,info:{}", id, JSON.toJSONString(aggreInfo));
return -2;
}
if (!aggreInfo.isInserted()) {
return 0;
} else {
return 1;
}
} catch (Exception e) {
logger.error("id:{},getInsertResultNow-", id, e);
return -2;
}
}
private TemplateFatherVo getTemplateFatherVo(String key, String fatherId) {
return redissonUtil.getListMapKeySetByScore(key, fatherId, TemplateFatherVo.class);
}
private JSONObject getStatus(String statuskey) {
return redissonUtil.getStrByData(statuskey, JSONObject.class);
}
@SuppressWarnings("unchecked")
private TemplateFatherVo getNewInstance(JSONObject json, Fields fields, Integer size, String fatherId) {
TemplateFatherVo fatherVo = new TemplateFatherVo(json.getString(fields.title), json.getString(fields.content));
fatherVo.setExample(json);
// 设置fatherId
fatherVo.setFatherId(fatherId);
redissonUtil.generateScore(fatherVo, size);
fatherVo.setTotalSon(0);
fatherVo.setHitWordAndRate((List<Map<String, Integer>>) json.remove(AggreeHandler.HITWORD_RATE));
return fatherVo;
}
private boolean vaildNoiseChange(String fatherId, String vaildkey, String oldkey, String newKey,
String oldStatusKey, String newStatusKey) throws Exception {
List<JSONObject> vaildList = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildkey, fatherId,
JSONObject.class);
if (vaildList.isEmpty()) {
logger.info("噪音集移动-搜索数据为空,fatherId:{},key:{}", fatherId, vaildkey);
return false;
}
// 更新对应数据集
redissonUtil.transferListMapKeySetFromOld2New(oldkey, newKey, fatherId);
// 更新信息
statusReduce(oldStatusKey, vaildList.size());
statusIncrease(newStatusKey, vaildList.size());
return true;
}
private Map<String, Object> getFatherTitles(String key, String statusKey, int page, int size, boolean isAsc,
String keyword, boolean isTitle, int markFlag) {
Map<String, Object> res = new HashMap<>(3);
res.put("status", false);
if (page <= 0 || size <= 0) {
return res;
}
try {
int total = redissonUtil.getListMapKeySetSize(key);
int fromIndex = (page - 1) * size;
int toIndex = page * size - 1;
List<TemplateFatherVo> preList = null;
if (StringUtils.isEmpty(keyword) && markFlag == 0) {
preList = redissonUtil.getListMapKeySet(key, fromIndex, toIndex, TemplateFatherVo.class);
} else {
List<TemplateFatherVo> list = DataCollectionUtil.fuzzyMatch(redissonUtil.getScoredSortedList(key, TemplateFatherVo.class),
keyword, isTitle, markFlag);
total = list.size();
preList = DataCollectionUtil.getList(list, page, size);
}
res.put("totalSize", total);
res.put("status", true);
res.put("data", Tools.bean2JSON(preList));
res.put("statistics", redissonUtil.getStrByData(statusKey, JSONObject.class));
} catch (Exception e) {
logger.error("getFatherTitles-", e);
}
return res;
}
private Map<String, Object> getSonTitles(String key, String fieldKey, int page, int size, boolean isAsc,
String keyword, TypeB typeB) {
Map<String, Object> res = new HashMap<>(3);
res.put("status", false);
if (page <= 0 || size <= 0) {
return res;
}
try {
int total = redissonUtil.getListMapValueByFieldSize(key, fieldKey);
// 移除作为模板的第一条数据
int fromIndex = (page - 1) * size + 1;
int toIndex = page * size;
List<JSONObject> data = redissonUtil.getRangeListMapValueByFieldWithGunZip(key, fieldKey, fromIndex,
toIndex, JSONObject.class);
TemplateFatherVo fatherVo = getTemplateFatherVo(key, fieldKey);
if(null!=fatherVo){
String mtag = fatherVo.getExample().getString(getFields(typeB).mtag);
String mperson = fatherVo.getExample().getString(getFields(typeB).mperson);
String mgroup = fatherVo.getExample().getString(getFields(typeB).mgroup);
res.put("mtag", mtag);
res.put("mperson", mperson);
DataCollectionUtil.supplementForInsert(data, mgroup, mtag, mperson);
}
res.put("totalSize", total > 0 ? total - 1 : 0);
res.put("status", true);
res.put("data", data);
} catch (Exception e) {
logger.error("getSonTitles-", e);
}
return res;
}
private String vaildKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), VAILD);
}
private String vaildStatusKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), VAILD) + "|" + STATUS;
}
private String noiseKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), NOISE);
}
private String noiseStatusKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), NOISE) + "|" + STATUS;
}
private static Map<TypeB, List<JSONObject>> divid(List<JSONObject> sourceList) {
Map<TypeB, List<JSONObject>> res = new HashMap<>(4);
sourceList.forEach(json -> {
TypeB typeB = Tools.getTypeB(json);
res.putIfAbsent(typeB, new ArrayList<>());
res.get(typeB).add(json);
});
return res;
}
private void statusReduce(String statusKey, int listSize) {
JSONObject json = redissonUtil.getStrByData(statusKey, JSONObject.class);
json = json != null ? json : new JSONObject();
json.put(AggreeHandler.TITLE_FATHER_COUNT, json.getIntValue(AggreeHandler.TITLE_FATHER_COUNT) - 1);
json.put(AggreeHandler.TOTAL_COUNT, json.getIntValue(AggreeHandler.TOTAL_COUNT) - listSize);
redissonUtil.setStrByData(statusKey, json);
}
private void statusIncrease(String statusKey, int listSize) {
JSONObject json = redissonUtil.getStrByData(statusKey, JSONObject.class);
statusIncrease(statusKey, json, listSize);
}
private void statusIncrease(String statusKey, JSONObject json, int listSize) {
json = json != null ? json : new JSONObject();
json.put(AggreeHandler.TITLE_FATHER_COUNT, json.getIntValue(AggreeHandler.TITLE_FATHER_COUNT) + 1);
json.put(AggreeHandler.TOTAL_COUNT, json.getIntValue(AggreeHandler.TOTAL_COUNT) + listSize);
redissonUtil.setStrByData(statusKey, json);
}
class AggreeHandler {
public static final String HITWORD_RATE = "hitWordAndRate";
public static final String TOTAL_COUNT = "totalCount";
public static final String TITLE_FATHER_COUNT = "titleFatherCount";
public static final String MARK_COUNT = "markCount";
private final TypeB typeB;
private final Fields fields;
private final String id;
private final String group;
private final List<List<String>> highWordList;
private List<JSONObject> data;
public AggreeHandler(List<JSONObject> data, String id, String group, String highWords) {
if (null == data || data.isEmpty()) {
throw new IllegalArgumentException("data can not be null or empty!");
}
this.data = data;
this.id = id;
this.group = group;
this.highWordList = cutKeyword(highWords);
this.typeB = Tools.getTypeB(data.get(0));
this.fields = getFields(typeB);
}
/**
* 根据噪音规则列表剔除数据
*/
public int reduceByNoise() {
if (TypeB.INCOMPLETE == typeB) {
logger.info("id:{}-【INCOMPLETE】-不做去噪处理,剩余聚合数据{}条", id, data.size());
return 0;
}
int originSize = data.size();
data = reduceByNoiseTitles(
redissonUtil.getListMapNoiseRule(noiseKey(group, id, typeB), TemplateFatherVo.class), data,
SIMILAR_STANDARD, noiseKey(group, id, typeB), redissonUtil, fields);
int noiseCount = originSize - data.size();
logger.info("id:{}-【{}】-根据噪音规则移除源数据{}条,剩余聚合数据{}条", id, typeB.name(), noiseCount, data.size());
return noiseCount;
}
/**
* 对sourceList针对现有自动标注聚合集作聚合及自动标注
*/
public int autoMarkByCurrentCollection(Set<TemplateFatherVo> set) {
if (TypeB.INCOMPLETE == typeB || TypeB.QA == typeB) {
logger.info("id:{}-【{}】-不做自动标注", id, typeB);
return 0;
}
int autoMarkedCount = autoService.noiseAutoMark(set, group, fields.title);
logger.info("id:{}-【{}】-根据现有自动标注聚合集命中{}条", id, typeB.name(), autoMarkedCount);
return autoMarkedCount;
}
/**
* 短文本聚合
*
* @param res
* @param fatherId
*/
private void incompleteAggree(Map<TemplateFatherVo, List<JSONObject>> res, AtomicInteger fatherId) {
List<String> contents = data.stream().map(json -> json.getString(fields.content))
.collect(Collectors.toList());
Map<String, List<Integer>> result = incompleteTextAggree(contents);
result.forEach((clusterName, indexs) -> {
TemplateFatherVo fatherVo = new TemplateFatherVo(null, clusterName);
List<JSONObject> tempList = new ArrayList<>();
boolean isFirst = true;
for (Integer i : indexs) {
JSONObject json = data.get(i);
// 默认原创
fatherVo.setForward(null != json.getBoolean("is_forward") && json.getBoolean("is_forward"));
if (isFirst) {
json.put(AggreeHandler.HITWORD_RATE, fatherVoInit(fatherVo, json, fatherId));
isFirst = false;
} else {
// 只做词频匹配
json.put(AggreeHandler.HITWORD_RATE, getHitWordAndRate(json));
}
tempList.add(json);
}
res.put(fatherVo, tempList);
});
}
private Map<String, List<Integer>> incompleteTextAggree(List<String> list) {
if (null == list || list.isEmpty()) {
return Collections.emptyMap();
}
Map<String, List<Integer>> res = new HashMap<>();
for (int i = 0; i < list.size(); i++) {
final int j = i;
String text = list.get(j);
res.compute(text, (k, v) -> {
if (null == v) {
v = new ArrayList<>();
}
v.add(j);
return v;
});
}
return res;
}
private List<JSONObject> reduceByNoiseTitles(List<TemplateFatherVo> noiseTitles, List<JSONObject> sourceList,
double cosFreq, String noiseKey, RedissonUtil redissonUtil, Fields fields) {
// 噪音规则列表未生成
if (null == noiseTitles || noiseTitles.isEmpty()) {
return sourceList;
}
List<JSONObject> vaildList = new ArrayList<>();
Map<String, List<JSONObject>> newNoiseMap = new HashMap<>();
for (JSONObject json : sourceList) {
String title = Tools.filterSymbol(json.getString(fields.title));
String hitTitle = null;
for (TemplateFatherVo fatherVo : noiseTitles) {
String noiseTilte = fatherVo.getTitle();
// 超过相似度标准判为噪音,不添加
if (null != noiseTilte && CosineSimilarity.calculateTextSimWithBrand(title,
Tools.filterSymbol(noiseTilte)) >= cosFreq) {
hitTitle = noiseTilte;
break;
}
}
// 非噪音数据添加
if (null == hitTitle) {
vaildList.add(json);
} else {
// 纳入噪音集
newNoiseMap.putIfAbsent(hitTitle, new ArrayList<>());
newNoiseMap.get(hitTitle).add(json);
}
}
// 更新噪音集
redissonUtil.addListMapWithGzip(noiseKey, newNoiseMap);
return vaildList;
}
/**
* 长文本聚合
*
* @param res
* @param fatherId
*/
private void completeAggree(Map<TemplateFatherVo, List<JSONObject>> res, AtomicInteger fatherId) {
List<String> titles = data.stream().map(json -> Tools.filterSymbol(json.getString(fields.title)))
.collect(Collectors.toList());
List<KResult<Integer>> agreeResult = AggreeBootStarter.getKResult(titles, 0.1);
for (KResult<Integer> result : agreeResult) {
TemplateFatherVo fatherVo = new TemplateFatherVo(result.getClusterName());
List<JSONObject> tempList = new ArrayList<>();
boolean isFirst = true;
for (Integer i : result.getDataPoints()) {
JSONObject json = data.get(i);
if (isFirst) {
json.put(AggreeHandler.HITWORD_RATE, fatherVoInit(fatherVo, json, fatherId));
isFirst = false;
} else {
// 只做词频匹配
json.put(AggreeHandler.HITWORD_RATE, getHitWordAndRate(json));
}
tempList.add(json);
}
res.put(fatherVo, tempList);
}
}
/**
* 聚合
*/
public Map<TemplateFatherVo, List<JSONObject>> aggree() {
Map<TemplateFatherVo, List<JSONObject>> res = new HashMap<>(data.size());
long s1 = System.currentTimeMillis();
AtomicInteger fatherId = new AtomicInteger(1);
// 无标题数据
if (TypeB.INCOMPLETE == typeB) {
incompleteAggree(res, fatherId);
} else {
// 有标题数据
completeAggree(res, fatherId);
}
long s2 = System.currentTimeMillis();
sortByTimeAndStatistics(res);
logger.info("id:{}-【{}】-聚合前{}条,聚合组后{}条,耗时:{}ms", id, typeB.name(), data.size(), res.size(), s2 - s1);
return res;
}
/**
* 信息部分缓存
*/
public void statisticsCache(int markCount, int titleFatherCount) {
JSONObject json = new JSONObject(3);
json.put(TOTAL_COUNT, data.size());
json.put(MARK_COUNT, markCount);
json.put(TITLE_FATHER_COUNT, titleFatherCount);
redissonUtil.setStrByData(vaildStatusKey(group, id, typeB), json);
}
/**
* 校验已存在标签
*
* @param aggreMap
*/
public void checkHasMarked(Map<TemplateFatherVo, List<JSONObject>> aggreMap) {
long s = System.currentTimeMillis();
int exists = 0;
for (Map.Entry<TemplateFatherVo, List<JSONObject>> entry : aggreMap.entrySet()) {
String[] mInfos = checkHasMarked(entry.getValue());
if (null != mInfos) {
entry.getKey().getExample().put(fields.mtag, mInfos[0]);
entry.getKey().getExample().put(fields.mperson, mInfos[1]);
exists++;
}
}
logger.info("id:{}-【{}】-校验已存在标注标签{}条,耗时:{}ms", id, typeB.name(), exists, (System.currentTimeMillis() - s));
}
public String[] checkHasMarked(List<JSONObject> list) {
String mtag = null;
String mperson = null;
final int cutLimit = 100;
List<FilterInfo> filterList = DataCollectionUtil.changeJSONList2FilterInfoList(list, group, typeB);
int page = (filterList.size() + cutLimit - 1) / cutLimit;
int start;
int end;
// 分段调用接口
for (int i = 0; i < page; i++) {
start = i * cutLimit;
end = start + cutLimit > filterList.size() ? filterList.size() : start + cutLimit;
Map<String, QueryResult> mutiMap = dubboHandler.matchQueryResult(filterList.subList(start, end));
if (null == mutiMap) {
logger.info("匹配标签异常,group:{},id:{}", group, id);
continue;
}
for (QueryResult qResult : mutiMap.values()) {
// 发现有匹配的标签即退出
if (null != qResult.getMtag()) {
mtag = qResult.getMtag();
mperson = qResult.getMperson();
logger.info("命中匹配标签,group:{},QueryResult:{}", group, JSON.toJSONString(qResult));
break;
}
}
// 已有匹配结果,退出
if (null != mtag && !"".equals(mtag)) {
return new String[] { mtag, mperson };
}
}
return null;
}
private void sortByTimeAndStatistics(Map<TemplateFatherVo, List<JSONObject>> aggreMap) {
aggreMap.forEach((fatherVo, list) -> {
// 发文时间升序
list.sort((o1, o2) -> Long.compare(o1.getLong("time"), o2.getLong("time")));
// 添加sonId
for (int i = 0; i < list.size(); i++) {
JSONObject json = list.get(i);
json.put(SON_ID, i);
}
});
// 设置模板example
aggreMap.forEach((fatherVo, list) -> {
JSONObject example = list.get(0);
fatherVo.setContent(example.getString(fields.content));
fatherVo.setExample(example);
});
}
private List<Map<String, Integer>> fatherVoInit(TemplateFatherVo fatherVo, JSONObject json, AtomicInteger fatherId) {
List<Map<String, Integer>> hitWordAndRate = getHitWordAndRate(json);
fatherVo.setFatherId(String.valueOf(fatherId.getAndIncrement()));
fatherVo.setHitWordAndRate(hitWordAndRate);
// 返回一个新的list 防止循环引用
return Tools.deepCopyByJson(hitWordAndRate, List.class);
}
// 词频匹配
@SuppressWarnings("unchecked")
private List<Map<String, Integer>> getHitWordAndRate(JSONObject json) {
try {
if (json.containsKey(AggreeHandler.HITWORD_RATE)) {
return (List<Map<String, Integer>>) json.remove(AggreeHandler.HITWORD_RATE);
}
} catch (ClassCastException e) {
}
return highWordMatch(json.getString(fields.title) + json.getString(fields.content));
}
private List<Map<String, Integer>> highWordMatch(String text) {
List<Map<String, Integer>> res = new ArrayList<>();
for (List<String> ands : highWordList) {
int rate = 0;
for (String and : ands) {
int oneRate = DataCollectionUtil.calculateRate(and, text);
// 该词未被命中,清除词频记录并返回
if (0 == oneRate) {
rate = 0;
break;
}
// 有单个词被命中且取最低值
if (0 == rate || oneRate < rate) {
rate = oneRate;
}
}
// 该组合词被命中
if (rate > 0) {
Map<String, Integer> map = new HashMap<>(1);
map.put(String.join(" ", ands), rate);
res.add(map);
}
}
res.sort((x, y) -> Integer.compare(y.values().toArray(new Integer[0])[0],
x.values().toArray(new Integer[0])[0]));
return res;
}
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.handler.BaseTaskHandler;
import com.zhiwei.middleware.automatic.server.service.handler.KafkaSendHandler;
import com.zhiwei.middleware.automatic.server.util.EventCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
@Service
public class EventCollectionMark extends BaseTaskHandler {
private static final Logger logger = LogManager.getLogger(EventCollectionMark.class);
private static final String AGGREGATION_SUFFIX = "aggregation";
private static final String NOISE_SUFFIX = "noise";
private final ThreadPoolTaskExecutor eventAggreeEasyExecutor;
private final ThreadPoolTaskExecutor eventAggreeExecutor;
private final RedissonUtil redissonUtil;
private final KafkaSendHandler kafkaSendHandler;
private final DubboHandler dubboHandler;
private final AutoService autoService;
public EventCollectionMark(@Qualifier("eventAggreeEasyExecutor") ThreadPoolTaskExecutor eventAggreeEasyExecutor,
@Qualifier("eventAggreeExecutor") ThreadPoolTaskExecutor eventAggreeExecutor,
RedissonUtil redissonUtil, KafkaSendHandler kafkaSendHandler,
DubboHandler dubboHandler, AutoService autoService) {
super(redissonUtil, AggreeTaskType.EVENT);
this.eventAggreeEasyExecutor = eventAggreeEasyExecutor;
this.eventAggreeExecutor = eventAggreeExecutor;
this.redissonUtil = redissonUtil;
this.kafkaSendHandler = kafkaSendHandler;
this.dubboHandler = dubboHandler;
this.autoService = autoService;
}
/**
* 增加注聚合源数据集
*
* @param sourceStr
* @return
*/
public void addEventCollectionAggreeSourceList(String group, String id, String sourceStr) {
try {
String index = group + "-" + id;
// 解压
List<String> cachedMarkInfos = JSONObject.parseArray(Tools.gunzip(sourceStr), String.class);
List<MarkInfo> sourceList = Tools.getMarkInfos(cachedMarkInfos);
int originSize = sourceList.size();
// 过滤掉不规范的类型数据以及标题为空的数据
sourceList = MarkInfoUtil.filterTitleNon(sourceList);
// 补充必要字段
EventCollectionUtil.supplementForMarkInfoList(sourceList, group, "自动化机器人");
int newSize = sourceList.size();
if (originSize != newSize) {
logger.warn("原添加数据集{}条,移除标题为空的数据后{}条", originSize, newSize);
}
addDataSource(group, id, sourceList.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
logger.info("{}添加聚合集{}条", index, sourceList.size());
} catch (Exception e) {
logger.error("addEventCollectionAggreSourceList", e);
}
}
/**
* 获取事件采集聚合模板标题临时结果
*
* @return
*/
public Map<String, Object> getEventCollectionAggreeTemplate(String group, String id, int page, int size,
boolean isAsc, int markFlag, String keyword) {
try {
Map<String, Object> returnMap = new HashMap<>();
List<JSONObject> dataList = new ArrayList<>();
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo || !aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
returnMap.put("status", false);
returnMap.put("data", null);
returnMap.put("totalSize", 0);
return returnMap;
} else {
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
// 模糊匹配
Map<String, List<JSONObject>> markAggreeMap = EventCollectionUtil.fuzzyMatch(aggreeMap, keyword);
if (null != markAggreeMap) {
Comparator<Object> comparator = null;
if (isAsc) {
comparator = Comparator.comparing(key -> markAggreeMap.get(key).size());
} else {
comparator = Comparator.comparing(key -> markAggreeMap.get(key).size()).reversed();
}
List<String> sortList = new ArrayList<>(markAggreeMap.keySet());
sortList.sort(comparator);
// 组装dataMap
for (String key : sortList) {
JSONObject titleFather = new JSONObject();
List<JSONObject> list = markAggreeMap.get(key);
List<MarkInfo> resList = MarkInfoUtil.transformToMarkInfo(list);
String mtag = list.get(0).getString("mtag");
titleFather.put("title", key);
titleFather.put("mtag", mtag);
titleFather.put("sonList", resList);
// 其它值不筛选标注
if (-1 == markFlag) {
dataList.add(titleFather);
}
// 标注成功
if (1 == markFlag && null != mtag && !"".equals(mtag)) {
dataList.add(titleFather);
}
// 未标注
if (0 == markFlag && (null == mtag || "".equals(mtag))) {
dataList.add(titleFather);
}
}
if (!dataList.isEmpty()) {
// 正常数据返回
int totalSize = dataList.size();
dataList = Tools.listPagedQuery(dataList, page, size);
returnMap.put("status", true);
returnMap.put("data", Tools.gzip(JSONObject.toJSONString(dataList)));
returnMap.put("totalSize", totalSize);
returnMap.put("detail", aggreInfo.getPrintString());
return returnMap;
}
}
// 聚合集为空/根据关键词筛选条件后为空
returnMap.put("status", true);
returnMap.put("data", Tools.gzip(JSONObject.toJSONString(dataList)));
returnMap.put("totalSize", 0);
return returnMap;
}
} catch (Exception e) {
logger.error("{}初次获取事件采集聚合模板标题结果失败", group);
logger.error("getEventCollectionAggreTemplate", e);
return null;
}
}
/**
* 根据模板标题获取父标题标注信息markTag
*
* @param group 标注分组
* @param id 事件id
* @param templateTitle 模板标题
* @return String 标注标签
*/
public String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle) {
try {
String mtag = getAggreeMapByField(group, id, templateTitle).get(0).getString("mtag");
return mtag == null ? "" : mtag;
} catch (Exception e) {
logger.error("getEventCollectionMarkTagByTemplate", e);
return null;
}
}
/**
* 根据模板标题获取其子集标注聚合临时结果
*
* @return Map<String, Object>
*/
public Map<String, Object> getEventCollectionAggreeSubTitle(String group, String id, String templateTitle) {
Map<String, Object> resMap = new HashMap<>();
String error = "isEventCollectionAggreeOkMap";
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (!aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
resMap.put("status", false);
} else {
resMap.put("status", true);
}
error = "eventCollectionAggreeMap";
resMap.put("data", MarkInfoUtil.transformToMarkInfo(getAggreeMapByField(group, id, templateTitle)));
return resMap;
} catch (Exception e) {
logger.error("{}根据模板标题获取其子集标注聚合结果{}", group, error);
return null;
}
}
/**
* 修改事件采集聚合模板标题的标签
*
* @param group
* @param id
* @param modifyTag
* @return
*/
public boolean modifyEventCollectionAggreeTitleMarkTag(String group, String id, String templateTitle,
String modifyTag) {
try {
Map<String, List<JSONObject>> markAggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
if (markAggreeMap != null) {
List<JSONObject> markList = markAggreeMap.get(templateTitle);
String oldTag = null;
if (markList != null) {
oldTag = markList.get(0).getString("mtag");
for (JSONObject obj : markList) {
obj.put("mtag", modifyTag);
// 补齐标注时间及标注人
if (null == obj.get("mtime")) {
obj.put("mperson", "自动化机器人");
obj.put("mtime", new Date().getTime());
}
}
upsetAggreeResult(group, id, markAggreeMap, null, null);
logger.info("修改模板标题:{}-tag成功,oldTag:{} modifyTag:{}", templateTitle, oldTag, modifyTag);
return true;
}
}
return false;
} catch (Exception e) {
logger.error("modifyEventCollectionAggreTitleMarkTag", e);
return false;
}
}
/**
* 启动聚合
*/
public synchronized void startAggree(String group, String id) {
try {
String index = group + "-" + id;
AggreInfo tempVo = getAggreeTask(group, id);
AggreInfo aggreInfo = null != tempVo ? tempVo : new AggreInfo(false, false);
if (!aggreInfo.isAggreFinshed()) {
// 分组自动化标注
logger.info("启动事件采集聚合 id:{}", id);
eventAggreeExecutor.execute(() -> {
List<MarkInfo> originSourceList = getDataSource(group, id)
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(MarkInfo.class)).collect(Collectors.toList());
// 1.所有数据入清洗kafka
kafkaSendHandler.insertDataByMarkInfo(originSourceList);
// 2.根据噪音集减少源数据量
List<MarkInfo> sourceList = EventCollectionUtil.reduceByNoise(getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)),
originSourceList, GenericAttribute.SIMILAR_STANDARD);
int noiseCount = originSourceList.size() - sourceList.size();
logger.info("index:{} 根据噪音集移除源数据{}条", index, noiseCount);
// 3.对sourceList针对现有自动标注聚合集作聚合及自动标注
autoService.autMarkByEvent(group,
sourceList);
// 4.根据sourceList的titles作聚合计算
List<String> sourceTitles = sourceList.stream()
.map(info -> Tools.filterSymbol(info.getSourceObj().getString("title")))
.collect(Collectors.toList());
List<KResult<Integer>> kResult = AggreeBootStarter.getKResult(sourceTitles, 0.1);
// 5.还原数据实体类
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
int autoMaticMarkCount = EventCollectionUtil.restoreForAggreeTitleMap(kResult, aggreeMap,
sourceList);
aggreInfo.setAll(true, originSourceList.size(), noiseCount, aggreeMap.keySet().size(),
autoMaticMarkCount);
upsetAggreeResult(group, id, aggreeMap, aggreInfo, null);
logger.info("事件采集聚合完成 index:{}", index);
});
}
} catch (Exception e) {
logger.error("startAggree异常", e);
}
}
/**
* 已标注事件采集标注部分入库
*
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum) {
return eventCollectionMarkedInsert(group, id, markSum, "自动化机器人");
}
/**
* 已标注事件采集标注部分入库
*
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson) {
AggreInfo aggreInfo = getAggreeTask(group, id);
try {
if (!aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
return false;
}
eventAggreeEasyExecutor.execute(() -> {
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
Map<String, List<JSONObject>> noiseMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id));
Set<Map.Entry<String, List<JSONObject>>> entrySet = aggreeMap.entrySet();
for (Map.Entry<String, List<JSONObject>> entry : entrySet) {
// 筛选出已标注的数据
String title = entry.getKey();
List<JSONObject> consumers = entry.getValue();
if (EventCollectionUtil.hasTag(entry.getValue())) {
dubboHandler.eventCollectionUpsertWithSupplement(consumers, group, mperson);
logger.info("titile:{}已经入库{}条", title, consumers.size());
} else {
// 添加噪音空集
if (null == noiseMap) {
noiseMap = new HashMap<>();
}
// 跳过热度不达标的数据
if (consumers.size() < markSum) {
continue;
}
// 新标题集
if (!noiseMap.containsKey(title)) {
noiseMap.put(title, consumers);
logger.info("title:{}新建噪音集{}条", title, consumers.size());
} else {
// 更新重复标题噪音集
List<JSONObject> originList = noiseMap.get(title);
List<JSONObject> newList = EventCollectionUtil.mergeNoiseList(originList, consumers);
noiseMap.put(title, newList);
logger.info("title:{}新增噪音{}条,并入噪音集{}条", title, consumers.size(),
newList.size() - originList.size());
}
}
// 已完成入库操作,修改标识符
aggreInfo.setInserted(true);
upsetAggreeResult(group, id, null, aggreInfo, noiseMap);
}
});
logger.info("id:{}已经入库完毕", id);
return true;
} catch (Exception e) {
logger.error("eventCollectionMarkedInsert", e);
return false;
}
}
/**
* 清理全部结果(聚合集+噪音集)
*/
public void cleanEventCollectionAllData(String group, String id) {
cleanEventCollectionAggreeData(group, id);
redissonUtil.deleteBucket(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id));
logger.info("删除事件采集全部结果集,id:{}", id);
}
/**
* 清理事件采集聚合结果集
*/
public void cleanEventCollectionAggreeData(String group, String id) {
redissonUtil.deleteBucket(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
removerInfo(group, id);
logger.info("删除事件采集聚合结果集,id:{}", id);
}
/**
* 获取事件采集噪音父标题集
*
* @return
*/
public Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc,
String keyword) {
Map<String, Object> resMap = new HashMap<>();
List<JSONObject> noiseList = new ArrayList<>();
// 模糊匹配
Map<String, List<JSONObject>> noiseMap = EventCollectionUtil.fuzzyMatch(getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)), keyword);
if (null == noiseMap) {
resMap.put("noiseList", null);
resMap.put("status", false);
resMap.put("totalSize", 0);
} else {
List<String> noiseTitleList = new ArrayList<>(noiseMap.keySet());
Comparator<Object> comparator = Comparator.comparing(key -> noiseMap.get(key).size());
if (!isAsc) {
comparator = comparator.reversed();
}
noiseTitleList.sort(comparator);
// 组装noiseList
for (String title : noiseTitleList) {
JSONObject titleFather = new JSONObject();
List<JSONObject> list = noiseMap.get(title);
List<MarkInfo> resList = MarkInfoUtil.transformToMarkInfo(list);
String mtag = list.get(0).getString("mtag");
titleFather.put("title", title);
titleFather.put("mtag", mtag);
titleFather.put("sonList", resList);
noiseList.add(titleFather);
}
int total = noiseList.size();
// int pageCount = (int) (total + page - 1) / page;
noiseList = Tools.listPagedQuery(noiseList, page, size);
resMap.put("noiseList", noiseList);
resMap.put("status", true);
resMap.put("totalSize", total);
}
return resMap;
}
/**
* 获取事件采集噪音子集
*
* @return
*/
public Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle) {
Map<String, Object> resMap = new HashMap<>();
try {
resMap.put("data", getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)).get(templateTitle));
return resMap;
} catch (Exception e) {
logger.error("{}根据模板标题获取其子集标注聚合结果", group, e);
return null;
}
}
/**
*
* 标注部分是否已入库
*
* @param group
* @param id
* @return
*
* @return boolean
*/
public boolean markedHasInserted(String group, String id) {
try {
return getAggreeTask(group, id).isInserted();
} catch (Exception e) {
return false;
}
}
/**
*
* 同步redis缓存集
*
* @param group
* @param id
* @param aggreMap
* @param noiseMap
*
* @return void
*/
private void upsetAggreeResult(String group, String id, Map<String, List<JSONObject>> aggreMap, AggreInfo aggreInfo,
Map<String, List<JSONObject>> noiseMap) {
// 需要组装K值和转换V值
synchronized (EventCollectionMark.class) {
// 同步聚合结果集
if (null != aggreMap) {
redissonUtil.setMapValue(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id), Tools.redisHmFormatWithGzip(aggreMap));
}
// 同步聚合状态集
if (null != aggreInfo) {
addAggreeTask(group, id, aggreInfo);
}
// 同步噪音集
if (null != noiseMap) {
redissonUtil.setMapValue(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id), Tools.redisHmFormatWithGzip(noiseMap));
}
}
}
private List<JSONObject> getAggreeMapByField(String group, String id, String mapKey) {
return JSONObject.parseArray(
Tools.gunzip(redissonUtil.getMapKeyValue(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id), mapKey)),
JSONObject.class);
}
/**
* 获取聚合结果集
*
*/
private Map<String, List<JSONObject>> getAggreeMap(String key) {
Map<String, String> map = redissonUtil.getMapValue(key);
if (null == map) {
return null;
}
return EventCollectionUtil.parseFromRedisHmStrWithGunZip(map);
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.es.index.Index;
import com.zhiwei.es.util.IndexUtil;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
@Service
public class TemplateTitleServiceImpl implements TemplateTitleService {
private final Logger log = LogManager.getLogger(TemplateTitleServiceImpl.class);
private final RedissonUtil redissonUtil;
private final EsDao esDao;
private final IndexUtil.ESIndexes esIndexes;
private final TemplateRecordDao templateRecordDao;
private final DubboHandler dubboHandler;
private final ThreadPoolTaskExecutor executor;
/* 一天的秒数(为保留前一天文件) */
private static final int ONE_DAY = 60 * 60 * 24;
public TemplateTitleServiceImpl(RedissonUtil redissonUtil, EsDao esDao,
IndexUtil.ESIndexes esIndexes, TemplateRecordDao templateRecordDao,
DubboHandler dubboHandler,
@Qualifier("asyncExecutor") ThreadPoolTaskExecutor executor) {
this.redissonUtil = redissonUtil;
this.esDao = esDao;
this.esIndexes = esIndexes;
this.templateRecordDao = templateRecordDao;
this.dubboHandler = dubboHandler;
this.executor = executor;
}
@Override
public void schedulerHourAggregation(List<String> groups, Long startTime, Long endTime) {
groups.forEach(group -> {
try {
//源数据
List<Map<String, Object>> sourceList = findRecentTimeData(group, startTime, endTime);
if (sourceList.isEmpty()) {
return;
}
log.info("发现{}组数据{}条,聚合中...", group, sourceList.size());
projectDataTemplate(group, sourceList);
} catch (Exception e) {
log.error("自动聚合模板更新失败,项目:{}", group, e);
}
});
}
@Override
public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) {
Map<String, String> mapValue = redissonUtil.getMapValue(GenericAttribute.REDIS_MAP_KEY + project);
if (Tools.isEmpty(mapValue)) {
return new HashMap<>();
}
return Tools.restoreTMap(mapValue, TemplateTitleVo.class);
}
@Override
public void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos) {
String key = GenericAttribute.REDIS_MAP_KEY + project;
for (Map.Entry<String, TemplateTitleVo> entry : vos.entrySet()) {
redissonUtil.setMapValue(key, entry.getKey(), JSONObject.toJSONString(entry.getValue()));
}
}
@Override
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
try {
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
String key = GenericAttribute.REDIS_MAP_KEY + group;
String mapKeyValue = redissonUtil.getMapKeyValue(key, templateTitle);
if (Tools.isEmpty(mapKeyValue)) {
return false;
}
TemplateTitleVo titleVo = JSONObject.parseObject(mapKeyValue, TemplateTitleVo.class);
String oldTag = titleVo.getMtag();
titleVo.setMtag(fixTag);
redissonUtil.setMapValue(key, templateTitle, JSONObject.toJSONString(titleVo));
log.info("修改模板标签成功: group:{} templateTitle:{} oldTag:{} fixTag:{}", group, templateTitle, oldTag,
fixTag);
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
@Override
public void modifyTemplateNum(String group, String title, Long num) {
String mapKeyValue = redissonUtil.getMapKeyValue(GenericAttribute.REDIS_MAP_KEY + group, title);
if (Tools.isEmpty(mapKeyValue)) {
return;
}
TemplateTitleVo titleVo = JSONObject.parseObject(mapKeyValue, TemplateTitleVo.class);
titleVo.getMarkSum().addAndGet(num);
redissonUtil.setMapValue(mapKeyValue, title, JSONObject.toJSONString(titleVo));
}
@Override
public List<String> getMupdateByTemplateTitle(String group, String templateTitle) {
try {
TemplateTitleVo templateTitleVo = getTemplateTitleByProject(group).get(templateTitle);
if (null == templateTitleVo || TemplateStatus.已重置 == templateTitleVo.getStatus()) {
return null;
}
List<TemplateRecord> templateId = templateRecordDao.findTemplateRecord(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
return templateId.stream().map(TemplateRecord::getMupdate).collect(Collectors.toList());
} catch (Exception e) {
log.error("查看模板标题特征值失败:", e);
return null;
}
}
@Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
templateRecordDao.insertTemplateRecord(templateRecord);
}
@Override
public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) {
Map<String, TemplateTitleVo> templateTitleVos = getTemplateTitleByProject(group);
List<Map<String, Object>> filterTitles = new ArrayList<>();
for (String templateTitle : templateTitleVos.keySet()) {
if (TemplateStatus.已重置 == templateTitleVos.get(templateTitle).getStatus()) {
continue;
}
double similar = CosineSimilarity.calculateTextSimWithBrand(templateTitle, title);
if (similar > GenericAttribute.SIMILAR_STANDARD) {
Map<String, Object> map = new HashMap<>();
map.put("title", templateTitle);
map.put("similar", similar);
filterTitles.add(map);
}
}
// 按相似度从高到低降序
filterTitles = filterTitles.stream().sorted((t1, t2) -> Double.valueOf(t2.get("similar") + "")
.compareTo(Double.valueOf(t1.get("similar") + ""))).collect(Collectors.toList());
for (Map<String, Object> map : filterTitles) {
String templateTitle = String.valueOf(map.get("title"));
TemplateTitleVo templateTitleVo = templateTitleVos.get(templateTitle);
if (0 != templateRecordDao.count(new Query(Criteria.where("templateId").is(templateTitleVo.getId())
.and("mupdate").is(mupdate)))) {
return templateTitleVo.getTemplateTitle();
}
}
return null;
}
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
Map<String, Object> res = new HashMap<>(3);
res.put("isMatched", false);
if (StringUtils.isEmpty(title) || StringUtils.isEmpty(project)) {
res.put("errorMessage", "标题或项目为空");
return res;
}
Map<String, TemplateTitleVo> templateTitleVoMap = getTemplateTitleByProject(project);
if (templateTitleVoMap.isEmpty()) {
res.put("errorMessage", "该项目组未有聚合集");
return res;
}
TemplateTitleVo titleVo = null;
double similarity = 0.0;
for (TemplateTitleVo templateTitleVo : templateTitleVoMap.values()) {
if (TemplateStatus.已重置 == templateTitleVo.getStatus()) {
continue;
}
double currentSimilarity = CosineSimilarity.calculateTextSimWithBrand(templateTitleVo.getTemplateTitle(),
title);
if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) {
similarity = currentSimilarity;
titleVo = templateTitleVo;
}
}
if (Objects.nonNull(titleVo)) {
res.put("isMatched", true);
Map<String, Object> dataMap = new HashMap<>();
dataMap.put("title", titleVo.getTemplateTitle());
dataMap.put("markSum", titleVo.getMarkSum());
dataMap.put("updateTime", titleVo.getUpdateTime().getTime());
dataMap.put("createTime", titleVo.getCreateTime().getTime());
dataMap.put("mtag", titleVo.getMtag());
dataMap.put("similarity", similarity);
res.put("data", dataMap);
}
return res;
}
@Override
public boolean resetTemplate(String group, String templateTitle) {
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
Map<String, TemplateTitleVo> map = getTemplateTitleByProject(group);
if (Objects.isNull(map.get(templateTitle))) {
return false;
}
TemplateTitleVo templateTitleVo = map.get(templateTitle);
// 只要在运行中的模板 才能重置
if (Objects.isNull(templateTitleVo.getStatus()) || templateTitleVo.getStatus().name().equals(TemplateStatus.运行中.name())) {
templateTitleVo.setStatus(TemplateStatus.重置中);
setTemplateTitleByProject(group, map);
// 修改模板的标注信息
executor.execute(() -> {
try {
modifyTemplateMarkerInfo(templateTitleVo, group);
templateTitleVo.setStatus(TemplateStatus.已重置);
templateRecordDao.removeTemplateRecord(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
} catch (Exception e) {
templateTitleVo.setStatus(TemplateStatus.重置失败);
log.error("重置模板:修改聚和集错误,title:{},以加入重试队列", templateTitleVo.getTemplateTitle());
} finally {
setTemplateTitleByProject(group, map);
}
});
return true;
}
return false;
}
/**
* 模板自动标注的历史数据修改为新标签
* @param templateTitleVo 模板
* @return 是否修改成功
*/
private boolean modifyTemplateMarkerInfo (TemplateTitleVo templateTitleVo, String project) throws Exception {
long now = System.currentTimeMillis();
long count = templateRecordDao.count(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
if (count == 0) {
return true;
}
int page = (int) ((count)+ GenericAttribute.POINT_SIZE -1) / GenericAttribute.POINT_SIZE;
for (int i = 0; i < page; i ++) {
Query query = new Query(Criteria.where("templateId").is(templateTitleVo.getId()));
query.skip(i * GenericAttribute.POINT_SIZE)
.limit(GenericAttribute.POINT_SIZE);
// mongo 查询记录信息
List<TemplateRecord> templateRecord = templateRecordDao.findTemplateRecord(query);
// es 查询
SearchHits hits = findByMupdateInfos(templateRecord.stream().map(TemplateRecord::getMupdate).collect(Collectors.toList()));
// 转换成markInfo
List<MarkInfo> collect = Arrays.stream(hits.getHits())
.map(e -> MarkInfoUtil.distinguishMarkInfo(e, null))
.filter(Objects::nonNull).collect(Collectors.toList());
//发送给标注中间件处理
dubboHandler.markUpsert(collect);
}
log.info("自动标注模板:模板已改动,项目:{},模板标题:{},特征值数量:{}, 耗时:{}"
, project, templateTitleVo.getTemplateTitle(), count, System.currentTimeMillis() - now);
return true;
}
/**
* 根据特征值查询es数据
* @param mupdate 特征值
* @return es返回值
*/
private SearchHits findByMupdateInfos(List<String> mupdate) throws IOException {
List<String> mark2 = esIndexes.getMarkIndexes(Index.mark2.name());
String [] indexes = new String[mark2.size()];
mark2.toArray(indexes);
// 查询条件
BoolQueryBuilder query = QueryBuilders.boolQuery();
query.must(QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON));
BoolQueryBuilder should = QueryBuilders.boolQuery();
mupdate.forEach(e -> {
should.should(QueryBuilders.termQuery("mupdate", e));
});
query.must(should);
return esDao.search(indexes, null, query, null, 0, 1000, null);
}
/**
* 改项目的数据生成模板
* @param group 项目
* @param sourceList 数据集
*/
private void projectDataTemplate(String group, List<Map<String, Object>> sourceList) {
//聚合模板
Map<String, TemplateTitleVo> aggregation = aggregation(transferMark(sourceList));
//旧的聚合模板
Map<String, TemplateTitleVo> templateTitleByProject = getTemplateTitleByProject(group).entrySet().stream()
.filter(e -> {
String title = e.getKey();
TemplateTitleVo templateTitleVo = e.getValue();
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
long updateTime = templateTitleVo.getUpdateTime().getTime();
// 移除7天有效期外的数据
if (System.currentTimeMillis() - updateTime > ONE_DAY * 7 * 1000) {
log.info("{}-移除过期模板标题:{},最后更新时间:{}", group, title, updateTime);
return false;
} else if (e.getValue().getStatus() == TemplateStatus.已重置) {
log.info("已重置的模板从内存中删除,模板title:{}", e.getKey());
return false;
}
return true;
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// 新旧模板合并 且更新模板
setTemplateTitleByProject(group, mergeTemplate(aggregation, templateTitleByProject));
}
/**
* 新旧模板合并
* @param oldTemplate 旧模板
* @param newTemplate 新模板
*/
private Map<String, TemplateTitleVo> mergeTemplate(Map<String, TemplateTitleVo> oldTemplate, Map<String, TemplateTitleVo> newTemplate) {
for (Map.Entry<String, TemplateTitleVo> newEntry : newTemplate.entrySet()) {
List<String> templateKeys = oldTemplate.keySet().stream()
.filter(e -> CosineSimilarity.calculateTextSimWithBrand(newEntry.getKey(), e) >= 0.96)
.collect(Collectors.toList());
// 添加模板
if (templateKeys.isEmpty()) {
oldTemplate.put(newEntry.getKey(), newEntry.getValue());
} else {
// 更新标签
for (String oldKey : templateKeys) {
oldTemplate.get(oldKey).setMtag(newEntry.getValue().getMtag());
}
}
}
return oldTemplate;
}
/**
* 数据聚合成模板
* @param sourceList 数据集
* @return 模板集
*/
private Map<String, TemplateTitleVo> aggregation(List<CompleteTextMark> sourceList) {
Map<String, TemplateTitleVo> aggregationTitleTagMap = new ConcurrentHashMap<>();
List<String> titles = sourceList.stream().map(CompleteTextMark::getTitle).collect(Collectors.toList());
// 得到聚合集
List<KResult<Integer>> kResult = AggreeBootStarter.getKResult(titles, 0.1);
for (KResult<Integer> result : kResult) {
if (result.getDataPoints().size() < 3) {
continue;
}
// 标签统计
Map<String, Long> tagGroup = result.getDataPoints().stream().map(e -> sourceList.get(e).getMtag())
.collect(Collectors.groupingBy(mtag -> mtag, Collectors.counting()));
//得到数量最多的标签
String tag = tagGroup.entrySet().stream().max(Map.Entry.comparingByValue()).map(Map.Entry::getKey).get();
// 生成模板
String title = Tools.filterSymbol(result.getClusterName());
aggregationTitleTagMap.put(title, new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl()));
}
return aggregationTitleTagMap;
}
/**
* 查询该项目 指定时间范围的数据
* @param mgroup 项目
* @return 数据集
* @throws IOException io
*/
private List<Map<String, Object>> findRecentTimeData(String mgroup, Long startTime, Long endTime) throws IOException {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
// 标注时间小时级内
QueryBuilder mtimeBuilder = QueryBuilders.rangeQuery("mtime").from(startTime).to(endTime);
Calendar calendar2 = Calendar.getInstance();
calendar2.add(Calendar.DAY_OF_MONTH, -1);
// 文章时间一天内
QueryBuilder timeBuilder = QueryBuilders.rangeQuery("time").from(calendar2.getTime().getTime()).to(endTime);
QueryBuilder mgroupBuilder = QueryBuilders.matchPhraseQuery("mgroup", mgroup);
// 过滤自动化机器人标注数据
boolQueryBuilder.must(timeBuilder).must(mtimeBuilder).must(mgroupBuilder).mustNot(autoRobotQueryBuilder())
.mustNot(QueryBuilders.termQuery("c2", 25165824)).mustNot(QueryBuilders.termQuery("c2", 16777216));
sourceBuilder.query(boolQueryBuilder).size(10000)
.fetchSource(new String[] { "ind_full_text", "mtime", "mtag", "mperson", "url","id"}, null);
return esDao.afterSearch(esIndexes.getIndexes(Index.mark.name()).toArray(new String[]{}), sourceBuilder, 1000).stream().map(SearchHit::getSourceAsMap).collect(Collectors.toList());
}
/**
* 转换
* @param sourceMap 数据集
* @return 标准文本集
*/
private List<CompleteTextMark> transferMark(List<Map<String, Object>> sourceMap) {
return sourceMap.stream().map(CompleteTextMark::restoreFromEs).collect(Collectors.toList());
}
/**
* 查询条件
* @return 标注人为自动标注机器人
*/
private QueryBuilder autoRobotQueryBuilder() {
return QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON);
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.base.BulkTemplate;
import com.zhiwei.middleware.automatic.server.base.MarkCommonTemplate;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.*;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.UploadService;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;
import static com.alibaba.fastjson.JSON.*;
@Service
public class UploadServiceImpl implements UploadService {
private static final Logger log = LogManager.getLogger(UploadServiceImpl.class);
/* 数据上传模块源数据集(key:group-id value:待处理数据集) */
private static final Map<String, List<MarkUploadInfo>> downLoadDataSourceMap = new ConcurrentHashMap<>();
private static final ExecutorService UPLOAD_SERVICE = Executors.newFixedThreadPool(16);
private final RedissonUtil redissonUtil;
private final MarkCommonTemplate markCommonTemplate;
private final DubboHandler dubboHandler;
public UploadServiceImpl(RedissonUtil redissonUtil, MarkCommonTemplate markCommonTemplate,
DubboHandler dubboHandler) {
this.redissonUtil = redissonUtil;
this.markCommonTemplate = markCommonTemplate;
this.dubboHandler = dubboHandler;
}
@Override
public void addUploadList(String group, String id, String sourceStr) {
try {
String index = group + "-" + id;
// 解压
List<String> sourceList = parseArray(Tools.gunzip(sourceStr), String.class);
redissonUtil.setList(Tools.assembleKey(GenericAttribute.SOURCE_DATA, index), sourceList);
} catch (Exception e) {
log.error("addUploadList", e);
}
}
@Override
public void startUpload(MarkUploadRule rule) {
UploadStatus uploadStatus = new UploadStatus();
// 修改上传状态
upsetUploadResult(rule.getGroup(), rule.getId(), uploadStatus);
UPLOAD_SERVICE.submit(() -> {
try {
// 数据转换分组
Map<String, List<MarkUploadResult>> markResGroup = uploadRes(rule);
uploadStatus.setTotalCount(markResGroup.values().stream().mapToInt(List::size).sum());
// 转换成功 开始缓存
successUploadCache(rule.getGroup(), rule.getId(), uploadStatus, markResGroup, rule.getInsertType());
log.info("上传任务项目:{},已完成", rule.getGroup());
} catch (Exception e) {
// 修改上传状态
uploadStatus.setStatus(-1);
upsetUploadResult(rule.getGroup(), rule.getId(), uploadStatus);
log.error("startUpload-error", e);
}
});
}
@Override
public Map<String, Object> getUploadStatus(String group, String id) {
return parseObject(redissonUtil.getBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX)));
}
@Override
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc, String searchField, String keyword, UploadInfo.UploadType uploadType) {
String suffix = matchSuffix(uploadType);
Map<String, Object> returnMap = new HashMap<>();
returnMap.put("status", false);
returnMap.put("data", null);
returnMap.put("totalSize", 0);
try {
String redisKey = assembleKey(group, id, suffix);
// 计算start和end
int start = (page - 1) * size;
int end = page * size - 1;
List<String> list = redissonUtil.getList(redisKey, start, end);
long totalSize = redissonUtil.getListSize(redisKey);
if (null != list) {
returnMap.put("status", true);
returnMap.put("data", toJSONString(list));
returnMap.put("totalSize", totalSize);
}
} catch (Exception e) {
log.error("getUploadInfoList", e);
}
return returnMap;
}
@Override
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
Class<? extends CommonDO> markClazz;
Class<? extends CommonDO> yuqingClazz;
switch (typeB) {
case COMPLETE:
markClazz = CompleteTextMark.class;
yuqingClazz = CompleteText.class;
break;
case INCOMPLETE:
markClazz = IncompleteTextMark.class;
yuqingClazz = IncompleteText.class;
break;
case VIDEO:
markClazz = VideoMark.class;
yuqingClazz = Video.class;
break;
case QA:
markClazz = QATextMark.class;
yuqingClazz = QAText.class;
break;
default:
throw new IllegalArgumentException("TypeB-传参异常");
}
// 标注库是否存在
if (dubboHandler.contains(parseObject(json.toJSONString(), markClazz).filterInfo())) {
return UploadInfo.DataType.MARK;
}
// 舆情库是否存在
if (dubboHandler.contains(parseObject(json.toJSONString(), yuqingClazz).filterInfo())) {
return UploadInfo.DataType.DW;
}
return UploadInfo.DataType.EXTERNAL;
}
@Override
public void cleanUploadResult(String group, String id) {
downLoadDataSourceMap.remove(group + "-" + id);
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FORMAT_ERROR_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FIELD_ERROR_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FAILED_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.SUCCESS_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX));
log.info("id:{}清理缓存完毕", id);
}
/**
* 启动上传任务
* @param markUploadRule 上传规则
* @return 完成转换的数据
*/
private Map<String, List<MarkUploadResult>> uploadRes(MarkUploadRule markUploadRule) throws ExecutionException, InterruptedException {
String key = listKey(markUploadRule.getGroup(), markUploadRule.getId());
List<MarkUploadInfo> sourceList = redissonUtil.getList(key)
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(MarkUploadInfo.class)).collect(Collectors.toList());
if (sourceList.isEmpty()) {
throw new IllegalStateException("please do this after [addUploadList] method!");
}
log.info("index:{},启动上传任务-{}条", key, sourceList.size());
// 切割
List<List<MarkUploadInfo>> listSplit = Tools.spilt(sourceList, 100);
// for (List<MarkUploadInfo> e : listSplit) {
// asyncPoint(e, markUploadRule);
// }
// return null;
//异步执行
List<CompletableFuture<List<MarkUploadResult>>> futures = listSplit.stream().map(e -> CompletableFuture.supplyAsync(() -> asyncPoint(e, markUploadRule), UPLOAD_SERVICE)).collect(Collectors.toList());
CompletableFuture<Void> allOf = CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
//等待并回获取返回值
return allOf.thenApply(e -> futures.stream().map(CompletableFuture::join).collect(Collectors.toList()))
.get().stream().flatMap(Collection::stream).collect(Collectors.groupingBy(MarkUploadResult::getInfoType, Collectors.toList()));
}
/**
* 分段处理
* @return 标注返回值
*/
private List<MarkUploadResult> asyncPoint(List<MarkUploadInfo> infos, MarkUploadRule rule) {
// 数据类型转换
BulkTemplate<MarkUploadResult> bulkTemplate = new BulkTemplate<>(markCommonTemplate.dataTransform(infos), "初始化");
List<MarkUploadResult> results = new ArrayList<>(bulkTemplate.getSource());
// 批量模板数据重置
bulkTemplate.clean(results.stream().filter(e -> !e.isSuccess()).collect(Collectors.toList()), "url查询");
// 对转换失败的数据进行文本搜索
markCommonTemplate.textSearch(bulkTemplate, rule);
// 设置数据源信息
markCommonTemplate.uploadType(results);
// 只对转换成功的数据进行上传
List<MarkUploadResult> completeData = results.stream().filter(MarkUploadResult::isSuccess).collect(Collectors.toList());
// 不同数据源 二次搜索
markCommonTemplate.secondarySearch(completeData, bulkTemplate, rule);
return results;
// return infos.stream().map(uploadInfo -> markCommonTemplate.uploadResult(uploadInfo, rule)).collect(Collectors.toList());
}
/**
* 上传成功 缓存信息
* @param group 项目
* @param id 任务id
* @param uploadStatus 上传状态
* @param data 成功转换的数据
*/
private void successUploadCache(String group, String id, UploadStatus uploadStatus,
Map<String, List<MarkUploadResult>> data, InsertType insertType) {
for (Map.Entry<String, List<MarkUploadResult>> entry : data.entrySet()) {
// 同步格式错误集
if (GenericAttribute.FORMAT_ERROR_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.FORMAT_ERROR_SUFFIX),
Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFormatErrorCount(entry.getValue().size());
// 同步字段错误集
} else if (GenericAttribute.FIELD_ERROR_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.FIELD_ERROR_SUFFIX),
Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFieldErrorCount(entry.getValue().size());
// 同步正确上传集
} else if (GenericAttribute.SUCCESS_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.SUCCESS_SUFFIX), Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setSuccessCount(entry.getValue().size());
// 将成功的数据返回
sendMarker(entry.getValue().stream().map(MarkUploadResult::getMarkInfo).collect(Collectors.toList()), insertType);
} else if (GenericAttribute.FAILED_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id,GenericAttribute.FAILED_SUFFIX), Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFailedCount(entry.getValue().size());
}
}
// 同步上传信息实体
uploadStatus.setStatus(1);
redissonUtil.setBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX), toJSONString(uploadStatus));
}
/**
* 发送到标注中间件
* @param markInfos markInfo
* @param insertType 入库类型
*/
private void sendMarker(List<MarkInfo> markInfos, InsertType insertType) {
List<List<MarkInfo>> lists = Tools.spilt(markInfos, 1000);
for (List<MarkInfo> infos : lists) {
try {
// if (InsertType.MARK.equals(insertType)) {
// dubboHandler.markUpsert(infos);
// } else {
// dubboHandler.pluginUpsertBack(infos);
// }
} catch (Exception e) {
log.error("标注上传-数据url:{},发送到标注中间件错误:", infos.stream().map(json -> json.getSourceObj().getString("url")).collect(Collectors.joining("|")), e);
}
}
}
/**
*
* 同步redis缓存集
*
* @param group
* @param id
*
* @return void
*/
private void upsetUploadResult(String group, String id,
UploadStatus uploadStatus) {
// 同步上传信息实体
if (null != uploadStatus) {
redissonUtil.setBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX), toJSONString(uploadStatus));
}
}
/**
* 上传返回值转换未uploadInfo
* @param result 标注返回值
* @return uploadInfo
*/
private UploadInfo setUploadInfo(MarkUploadResult result) {
UploadInfo uploadInfo = new UploadInfo();
uploadInfo.setDataType(result.getDataType());
uploadInfo.setTypeB(result.getTypeB());
uploadInfo.setOriginData(result.getOriginData());
uploadInfo.setError(result.getInfoType());
uploadInfo.setCompound(new UploadInfo.CompoundCommonDO(result.getDw(), result.getMark()));
uploadInfo.setErrorMsg(result.getMessage());
return uploadInfo;
}
/**
* 返回组合K值
*
* @param keys
* @return String
*/
private String assembleKey(String... keys) {
StringBuilder sb = new StringBuilder(GenericAttribute.UNIFIED_PREFIX);
for (int i = 0; i < keys.length; i++) {
if (i == keys.length - 1) {
sb.append(keys[i]);
} else {
sb.append(keys[i]).append(":");
}
}
return sb.toString();
}
private String listKey(String group, String id) {
return group + "-" + id;
}
/**
* 匹配前缀
*
* @param uploadType
* @return String
*/
private String matchSuffix(UploadInfo.UploadType uploadType) {
switch (uploadType) {
case FORMAR_ERROR:
return GenericAttribute.FORMAT_ERROR_SUFFIX;
case FIELD_ERROR:
return GenericAttribute.FIELD_ERROR_SUFFIX;
case FAILED:
return GenericAttribute.FAILED_SUFFIX;
case SUCCESS:
return GenericAttribute.SUCCESS_SUFFIX;
default:
throw new IllegalArgumentException("UploadType匹配异常");
}
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.middleware.automatic.server.config.GlobalPojo;
import java.io.UnsupportedEncodingException;
import java.util.*;
public class CosineSimilarity {
public static double calculateTextSimWithBrand(String doc1, String doc2) {
if (handleByLength(doc1, doc2) && compareWithBrand(doc1, doc2)) {
return calculateSimilar(doc1, doc2);
}
return 0.0;
}
/**
* 输入两段文本利用字频率的余弦定理判断二者间的相似度
*
* @param doc1,文本1
* @param doc2,文本2
* @return 相似度值
*/
private static double calculateSimilar(String doc1, String doc2) {
if (doc1 != null && doc1.trim().length() > 0 && doc2 != null && doc2.trim().length() > 0) {
Map<Integer, int[]> algorithmMap = new HashMap<Integer, int[]>();
// doc1字符词频 量化
for (int i = 0; i < doc1.length(); i++) {
char d1 = doc1.charAt(i);
if (isHanZi(d1)) {
int charIndex = getGB2312Id(d1);
if (charIndex != -1) {
int[] fq = algorithmMap.get(charIndex);
if (fq != null && fq.length == 2) {
fq[0]++;
} else {
fq = new int[2];
fq[0] = 1;
algorithmMap.put(charIndex, fq);
}
}
}
}
// doc2字符词频 量化
for (int i = 0; i < doc2.length(); i++) {
char d2 = doc2.charAt(i);
if (isHanZi(d2)) {
int charIndex = getGB2312Id(d2);
if (charIndex != -1) {
int[] fq = algorithmMap.get(charIndex);
if (fq != null && fq.length == 2) {
fq[1]++;
} else {
fq = new int[2];
fq[1] = 1;
algorithmMap.put(charIndex, fq);
}
}
}
}
Iterator<Integer> iterator = algorithmMap.keySet().iterator();
// 三角形坐标用变量表示
double sqdoc1 = 0;
double sqdoc2 = 0;
double denominator = 0;
while (iterator.hasNext()) {
int[] c = algorithmMap.get(iterator.next());
denominator += c[0] * c[1];
sqdoc1 += c[0] * c[0];
sqdoc2 += c[1] * c[1];
}
// 夹角相似度
return denominator / Math.sqrt(sqdoc1 * sqdoc2);
}
return 0.0;
}
/**
* 特殊处理一些长短差距很大的文本相似度,短文本长度未达到长文本长度的一半那么自动降权
*
* @return boolean
*/
private static boolean handleByLength(String doc1, String doc2) {
if (null == doc1 || null == doc2) {
return false;
}
// 一半长文本字符长度
int standardLength = doc1.length() > doc2.length() ? doc1.length() / 2 : doc2.length() / 2;
// 长文本-短文本字符长度 > 一半长文本字符长度
return Math.abs(doc1.length() - doc2.length()) <= standardLength;
}
/**
* 根据输入的Unicode字符,获取它的GB2312编码或者ascii编码,
*
* @param ch 输入的GB2312中文字符或者ASCII字符(128个)
* @return ch在GB2312中的位置,-1表示该字符不认识
*/
public static short getGB2312Id(char ch) {
try {
byte[] buffer = Character.toString(ch).getBytes("GB2312");
if (buffer.length != 2) {
// 正常情况下buffer应该是两个字节,否则说明ch不属于GB2312编码,故返回'?',此时说明不认识该字符
return -1;
}
int b0 = (int) (buffer[0] & 0x0FF) - 161; // 编码从A1开始,因此减去0xA1=161
int b1 = (int) (buffer[1] & 0x0FF) - 161; // 第一个字符和最后一个字符没有汉字,因此每个区只收16*6-2=94个汉字
return (short) (b0 * 94 + b1);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return -1;
}
/**
* 两条文本命中指定关键字数量是否一致
* @param doc1 文本1
* @param doc2 文本2
* @return 关键字数量是否一致
*/
private static boolean compareWithBrand(String doc1, String doc2) {
if (null == GlobalPojo.BRAND_WORDS || GlobalPojo.BRAND_WORDS.isEmpty()) {
return true;
}
Set<String> set1 = new HashSet<>();
Set<String> set2 = new HashSet<>();
for (String brand : GlobalPojo.BRAND_WORDS) {
if (doc1.contains(brand)) {
set1.add(brand);
}
if (doc2.contains(brand)) {
set2.add(brand);
}
}
if (set1.size() != set2.size()) {
return false;
}
return set1.containsAll(set2);
}
/**
* 输入一个字符判断是否为中文汉字
*
* @param ch,字符
* @return true为中文汉字,否则为false
*/
public static boolean isHanZi(char ch) {
return (ch >= 0x4E00 && ch <= 0x9FA5);
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.IncompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.QATextMark;
import com.zhiwei.base.entity.subclass.mark.VideoMark;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.enums.Fields;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static com.zhiwei.middleware.automatic.server.config.GenericAttribute.SON_ID;
public class DataCollectionUtil {
/**
* 计算词频
*
* @param word
* @param text
* @return
*/
public static int calculateRate(String word, String text) {
if (StringUtils.isEmpty(word)) {
return 0;
}
int rate = 0;
int from = 0;
while (true) {
if ((from = text.indexOf(word, from) + 1) > 0) {
rate++;
} else {
break;
}
}
return rate;
}
public static List<FilterInfo> changeJSONList2FilterInfoList(List<JSONObject> list, String group, ClassB.TypeB typeB) {
Class<? extends CommonDO> clazz;
switch (typeB) {
case INCOMPLETE:
clazz = IncompleteTextMark.class;
break;
case COMPLETE:
clazz = CompleteTextMark.class;
break;
case QA:
clazz = QATextMark.class;
break;
case VIDEO:
clazz = VideoMark.class;
break;
default:
throw new IllegalArgumentException("未能解析到的typeB类型:" + typeB);
}
return list.stream().map(json -> {
json.put("mgroup", group);
return JSONObject.parseObject(json.toJSONString(), clazz).filterInfo();
}).collect(Collectors.toList());
}
public static void supplementForInsert(List<JSONObject> list, String group, String mtag, String mperson) {
for (JSONObject obj : list) {
obj.remove(SON_ID);
obj.put("mgroup", group);
obj.put("mtag", mtag);
if (StringUtils.isEmpty(mperson)) {
obj.put("mperson", GenericAttribute.AUTO_PERSON);
} else {
obj.put("mperson", mperson);
}
// 固定字段
obj.put("cid", GenericAttribute.AUTO_CID);
obj.put("cname", GenericAttribute.AUTO_CNAME);
}
}
/**
*
* 判断事件采集该部分是否有标签(以首个为例)
*
* @param fields
* @return boolean
*/
public static boolean hasTag(TemplateFatherVo fatherVo, Fields fields) {
if (null == fatherVo) {
return false;
}
if (null != fatherVo.getExample().getString(fields.mtag)) {
return true;
}
return false;
}
public static List<TemplateFatherVo> fuzzyMatch(List<TemplateFatherVo> list, String character, boolean isTitle,
int markFlag) {
if (null == list) {
return Collections.emptyList();
}
Boolean isMarked;
switch (markFlag) {
case (0):
isMarked = null;
break;
case (1):
isMarked = true;
break;
default:
isMarked = false;
}
List<List<String>> fuzzyList = null;
List<TemplateFatherVo> res = new ArrayList<>();
if (!StringUtils.isEmpty(character)) {
// 拆分匹配字符
fuzzyList = cutKeyword(character);
}
// 遍历目标集
for (TemplateFatherVo fatherVo : list) {
// 现有聚合集的标注结果
boolean marked = !StringUtils.isEmpty(fatherVo.getExample().getString(Fields.COMPLETE.mtag));
String title;
if (isTitle) {
title = fatherVo.getTitle();
} else {
title = fatherVo.getTitle() + fatherVo.getContent();
}
if ((null == isMarked || isMarked == marked) && isHit(fuzzyList, title)) {
res.add(fatherVo);
}
}
return res;
}
public static List<List<String>> cutKeyword(String keyword) {
List<List<String>> fuzzyList = new ArrayList<>();
if (StringUtils.isBlank(keyword)) {
return fuzzyList;
}
// 先根据"|"区分或关系
String[] anyStrs = keyword.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
return fuzzyList;
}
public static boolean isHit(List<List<String>> fuzzyList, String title) {
if (null == fuzzyList) {
return true;
}
if (null == title) {
return false;
}
for (List<String> ands : fuzzyList) {
boolean res = true;
for (String and : ands) {
// 每个"且"条件都必须符合
if (!title.contains(and)) {
res = false;
break;
}
}
if (res) {
return true;
}
}
return false;
}
public static List<TemplateFatherVo> getList(List<TemplateFatherVo> list, int page, int size) {
list.sort((x, y) -> Double.compare(Double.valueOf(y.getFatherId()), Double.valueOf(x.getFatherId())));
return Tools.listPagedQuery(list, page, size);
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.base.category.*;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.utils.MessageTypeUtils;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
public class DataUploadUtil {
/**
* 按默认字段入库
*
* @param commonDO
* @param markUploadInfo
* @return CommonDO
*/
public static CommonDO defaultCTypeAll(CommonDO commonDO, MarkUploadInfo markUploadInfo) {
CommonDO res = selfAdaptionCTypeAll(commonDO, markUploadInfo.getPlatform(), markUploadInfo.getSource());
commonDO.setC1(res.getC1());
commonDO.setC2(res.getC2());
commonDO.setC3(res.getC3());
commonDO.setC4(res.getC4());
commonDO.setC5(res.getC5());
commonDO.setForeign("外媒".equals(markUploadInfo.getPlatform()) ? 1 : 0);
if(StringUtils.isEmpty(res.getRealSource())){
commonDO.setRealSource(markUploadInfo.getRealSource());
}
return res;
}
/**
* 自匹配C1-C5类型字段
*
* @param commonDO
* @param platform
* @param source
* @return
* @return CommonDO
*/
private static CommonDO selfAdaptionCTypeAll(CommonDO commonDO, String platform, String source) {
CommonDO res;
int encode = getEndoceByPlatformAndSource(platform, source);
ClassD classD = ClassCodec.decodeClassD(encode);
switch (classD.typeB()) {
case COMPLETE:
CompleteText completeText = CompleteText.restoreFromEs(commonDO.toJSON());
// 脉脉平台特殊处理
if (completeText.getUrl().contains("maimai.cn")) {
ClassD cd = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC)
.selectD(ClassD.TypeD.脉脉行业头条);
completeText.setRealSource("脉脉");
completeText.setForeign(0);
completeText.setC1(cd.encodeA());
completeText.setC2(cd.encodeB());
completeText.setC3(cd.encodeC());
completeText.setC4(cd.encodeD());
completeText.setC5(cd.combineEncode());
res = completeText;
} else {
res = MessageTypeUtils.setCompleteRealSourceByUrl(CompleteText.restoreFromEs(commonDO.toJSON()));
}
break;
case INCOMPLETE:
res = MessageTypeUtils.setIncompleteRealSourceByUrl(IncompleteText.restoreFromEs(commonDO.toJSON()));
break;
case QA:
res = MessageTypeUtils.setQATextRealSourceByUrl(QAText.restoreFromEs(commonDO.toJSON()));
break;
case VIDEO:
Video video = Video.restoreFromEs(commonDO.toJSON());
if (video.getUrl().contains("weishi.qq.com")) {
ClassD cd = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC)
.selectD(ClassD.TypeD.短视频);
video.setRealSource("微视");
video.setForeign(0);
video.setC1(cd.encodeA());
video.setC2(cd.encodeB());
video.setC3(cd.encodeC());
video.setC4(cd.encodeD());
video.setC5(cd.combineEncode());
res = video;
} else {
res = MessageTypeUtils.setVideoRealSourceByUrl(Video.restoreFromEs(commonDO.toJSON()));
}
break;
default:
throw new IllegalStateException("selfAdaptionCTypeAll-无法识别的c2类型");
}
if (null == res.getC2()) {
// 使用默认值
res.setC1(classD.encodeA());
res.setC2(classD.encodeB());
res.setC3(classD.encodeC());
res.setC4(classD.encodeD());
res.setC5(classD.combineEncode());
// throw new IllegalStateException("根据url未能正确识别平台类型");
}
return res;
}
public static int getEndoceByPlatformAndSource(String platform, String source) {
// 默认值网媒
Integer encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒)
.combineEncode();
switch (platform) {
case "微博":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.新浪微博)
.combineEncode();
break;
case "微信":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.微信公众号)
.combineEncode();
break;
case "网媒":
break;
case "平媒":
break;
case "今日头条":
if ("微头条".equals(source)) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.微头条)
.combineEncode();
} else if (source.contains("问答")) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.悟空问答)
.combineEncode();
} else if (source.contains("视频")) {
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.西瓜视频)
.combineEncode();
}
break;
case "自媒体":
// 返回默认值
break;
case "知乎":
if ("知乎专栏".equals(source)) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.知乎专栏)
.combineEncode();
} else {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.知乎)
.combineEncode();
}
break;
case "问答":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.问答).combineEncode();
break;
case "贴吧论坛":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.论坛).combineEncode();
break;
case "视频":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒)
.combineEncode();
break;
case "抖音":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.抖音)
.combineEncode();
break;
case "外媒":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒).combineEncode();
break;
case "Twitter":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.TWITTER)
.combineEncode();
break;
case "短视频":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.短视频)
.combineEncode();
break;
}
return encode;
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.nlp.vo.KResult;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
public class EventCollectionUtil {
public static void supplementForMarkInfoList(List<MarkInfo> list, String group, String mperson) {
for (MarkInfo info : list) {
JSONObject obj = info.getSourceObj();
obj.put("mperson", mperson);
obj.put("mgroup", group);
// 固定字段
obj.put("cid", 100040002);
obj.put("cname", "上传标注补充采集");
}
}
public static void supplementForInsert(List<JSONObject> list, String group, String mperson) {
for (JSONObject obj : list) {
obj.put("mperson", mperson);
obj.put("mgroup", group);
// 固定字段
obj.put("cid", 100040002);
obj.put("cname", "上传标注补充采集");
}
}
/**
*
* 合并新旧噪音集
*
* @param list1
* @param list2
*
* @return void
*/
public static List<JSONObject> mergeNoiseList(List<JSONObject> list1, List<JSONObject> list2) {
List<JSONObject> resList = new ArrayList<>();
if (list1.size() >= list2.size()) {
addMergeNoiseList(resList, list1, list2);
} else {
addMergeNoiseList(resList, list2, list1);
}
return resList;
}
private static void addMergeNoiseList(List<JSONObject> resList, List<JSONObject> longList,
List<JSONObject> shortList) {
Set<String> tempSet = new HashSet<>();
resList.addAll(longList);
tempSet.addAll(longList.stream().map(json -> json.getString("title")).collect(Collectors.toSet()));
for (JSONObject json : shortList) {
if (!tempSet.contains(json.get("title"))) {
resList.add(json);
}
}
}
/**
*
* 判断事件采集该部分是否有标签(以首个为例)
*
* @param list
* @return boolean
*/
public static boolean hasTag(List<JSONObject> list) {
if (null == list || list.isEmpty()) {
return false;
}
if (!StringUtils.isEmpty(list.get(0).getString("mtag"))) {
return true;
}
return false;
}
public static Map<String, List<JSONObject>> fuzzyMatch(Map<String, List<JSONObject>> map, String character) {
if (StringUtils.isEmpty(character) || null == map) {
return map;
}
Map<String, List<JSONObject>> res = new HashMap<>();
// 拆分匹配字符
List<List<String>> fuzzyList = new ArrayList<>();
// 先根据"|"区分或关系
String[] anyStrs = character.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
// 遍历目标集
for (String title : map.keySet()) {
out: for (List<String> ands : fuzzyList) {
for (String and : ands) {
// 每个"且"条件都必须符合
if (!title.contains(and)) {
continue out;
}
}
// 在"或"关系层添加
res.put(title, map.get(title));
}
}
return res;
}
public static Map<String, List<JSONObject>> parseFromRedisHmStrWithGunZip(Map<String, String> map) {
Map<String, List<JSONObject>> resMap = new HashMap<>();
Set<Map.Entry<String, String>> set = map.entrySet();
for (Map.Entry<String, String> entry : set) {
List<JSONObject> innerList = new ArrayList<>();
List<String> list = JSONArray.parseArray(Tools.gunzip(entry.getValue()), String.class);
list.forEach(str -> {
innerList.add(JSONObject.parseObject(str));
});
resMap.put(entry.getKey(), innerList);
}
return resMap;
}
/**
*
* 根据噪音集减少源数据量
*
* @param noiseMap
* @param sourceList
*
* @return List<MarkInfo>
*/
public static List<MarkInfo> reduceByNoise(Map<String, List<JSONObject>> noiseMap, List<MarkInfo> sourceList,
double cosFreq) {
if (null == noiseMap || noiseMap.isEmpty()) {
return sourceList;
}
List<MarkInfo> resList = new ArrayList<>();
Set<String> noiseTitles = new HashSet<>();
for (List<JSONObject> list : noiseMap.values()) {
for (JSONObject json : list) {
noiseTitles.add(Tools.filterSymbol(json.getString("title")));
}
}
for (MarkInfo info : sourceList) {
String title = Tools.filterSymbol(info.getSourceObj().getString("title"));
boolean isNoise = false;
for (String noiseTitle : noiseTitles) {
// 超过相似度标准判为噪音,不添加
if (CosineSimilarity.calculateTextSimWithBrand(title, noiseTitle) >= cosFreq) {
isNoise = true;
break;
}
}
// 非噪音数据添加
if (!isNoise) {
resList.add(info);
}
}
return resList;
}
public static int restoreForAggreeTitleMap(List<KResult<Integer>> kResult, Map<String, List<JSONObject>> aggreeMap, List<MarkInfo> sourceList) {
AtomicInteger res = new AtomicInteger();
for (KResult<Integer> result : kResult) {
aggreeMap.put(result.getClusterName(), new ArrayList<>());
String mtag = getMtag(result.getDataPoints(), sourceList);
if (Objects.nonNull(mtag)) {
result.getDataPoints().forEach(e -> sourceList.get(e).getSourceObj().put("mtag", mtag));
res.addAndGet(result.getDataPoints().size());
}
}
return res.get();
}
private static String getMtag(List<Integer> indexes, List<MarkInfo> sourceList) {
for (Integer index : indexes) {
MarkInfo markInfo = sourceList.get(index);
if (Objects.nonNull(markInfo) && !Tools.isEmpty(markInfo.getSourceObj().getString("mtag"))) {
return markInfo.getSourceObj().getString("mtag");
}
}
return null;
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.entity.subclass.mark.*;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.search.SearchHit;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
*
* @ClassName: MarkInfoUtil
* @Description: MarkInfo工具类
* @author shenjunjie
* @date 2019年7月26日 下午7:06:11
*/
public class MarkInfoUtil {
public static MarkInfo transformToMarkInfo(JSONObject json) {
int c2 = json.getIntValue("c2");
if (0 == c2) {
return null;
}
TypeB typeB = TypeB.fromEncode(c2);
if (typeB == TypeB.COMPLETE) {
return new MarkInfo(CompleteTextMark.restoreFromEs(json));
} else if (typeB == TypeB.INCOMPLETE) {
return new MarkInfo(IncompleteTextMark.restoreFromEs(json));
} else if (typeB == TypeB.QA) {
return new MarkInfo(QATextMark.restoreFromEs(json));
} else if (typeB == TypeB.VIDEO) {
return new MarkInfo(VideoMark.restoreFromEs(json));
} else {
return null;
}
}
/**
* 根据c2 区分markInfo
* @return markInfo
*/
public static MarkInfo distinguishMarkInfo (SearchHit hit, String mtag) {
int c2 = Integer.parseInt(String.valueOf(hit.getSourceAsMap().get("c2")));
if (Objects.isNull(mtag)) {
hit.getSourceAsMap().remove("mtag");
} else {
hit.getSourceAsMap().put("mtag", mtag);
}
switch (ClassB.TypeB.fromEncode(c2)) {
case COMPLETE:
return new MarkInfo(CompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
case INCOMPLETE:
return new MarkInfo(IncompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
case QA:
return new MarkInfo(QATextMark.restoreFromEs(hit.getSourceAsMap()));
case VIDEO:
new MarkInfo(VideoMark.restoreFromEs(hit.getSourceAsMap()));
default:
return null;
}
}
public static List<MarkInfo> transformToMarkInfo(List<JSONObject> list) {
List<MarkInfo> resList = new ArrayList<>();
list.forEach(obj -> {
resList.add(transformToMarkInfo(obj));
});
return resList;
}
public static List<MarkInfo> filterTitleNon(List<MarkInfo> list) {
List<MarkInfo> resList = new ArrayList<>();
list.forEach(info -> {
TypeB typeB = info.getTypeB();
if (TypeB.COMPLETE == typeB || TypeB.VIDEO == typeB) {
if (StringUtils.isNotEmpty((info.getSourceObj().get("title") + ""))) {
resList.add(info);
}
}
});
return resList;
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import org.apache.commons.lang3.time.FastDateFormat;
import java.text.ParseException;
import java.util.*;
public class TimeUtil {
public static final FastDateFormat CONTENT_DF = FastDateFormat.getInstance("yyyy-MM-dd HH:mm");
public static final FastDateFormat TIME_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss");
public static final FastDateFormat DW_INDEX = FastDateFormat.getInstance("yyyyMM");
public static final FastDateFormat MARK_INDEX = FastDateFormat.getInstance("yyyy");
private static final Calendar CALENDAR = Calendar.getInstance();
private static final String MARK_PRE = "mark2_";
private static final List<String> INDEX_DW = Arrays.asList("complete_text_", "incomplete_text_", "qa_text_", "video_");
public static String[] getAccurateIndex(Long time, ClassB.TypeB typeB, boolean isMark) {
CALENDAR.setTime(new Date(time));
int year = CALENDAR.get(Calendar.YEAR);
if (isMark) {
return getIndexWithMark(MARK_PRE + year);
}
int month = CALENDAR.get(Calendar.MONTH) + 1;
String monthStr = month < 10 ? "0" + month : month + "";
// String prefix = getEsPrefixByTypeB(typeB);
// return null == prefix ? null : new String[] { prefix + year + monthStr };
List<String> allEsPrefix = new ArrayList<>();
INDEX_DW.forEach(prefix -> allEsPrefix.add(prefix + year + monthStr));
return allEsPrefix.toArray(new String[0]);
}
public static String[] getWholeIndexInMonth(Long time) {
if (null == time) {
return null;
}
List<String> resList = new ArrayList<>();
CALENDAR.setTime(new Date(time));
int year = CALENDAR.get(Calendar.YEAR);
int month = CALENDAR.get(Calendar.MONTH) + 1;
String monthStr = month < 10 ? "0" + month : month + "";
resList.add(MARK_PRE + year);
resList.add(MARK_PRE + (year - 1));
resList.add(MARK_PRE + (year - 2));
resList.add(getEsPrefixByTypeB(ClassB.TypeB.COMPLETE) + year + monthStr);
resList.add(getEsPrefixByTypeB(ClassB.TypeB.INCOMPLETE) + year + monthStr);
resList.add(getEsPrefixByTypeB(ClassB.TypeB.QA) + year + monthStr);
resList.add(getEsPrefixByTypeB(ClassB.TypeB.VIDEO) + year + monthStr);
return getIndexWithMark(resList);
}
public static String getDwIndex(Long time) {
return DW_INDEX.format(time);
}
public static String getDwIndex(String time) {
try {
return DW_INDEX.format(TIME_FORMAT.parse(time).getTime());
} catch (ParseException e) {
return null;
}
}
public static String getMarkIndex(Long time) {
return MARK_INDEX.format(time);
}
public static String getMarkIndex(String time) {
try {
return MARK_INDEX.format(TIME_FORMAT.parse(time).getTime());
} catch (ParseException e) {
return null;
}
}
public static String[] getEsIndex(Set<String> index, Set<ClassB.TypeB> typeBS) {
List<String> prefixs = new ArrayList<>();
if (Objects.nonNull(typeBS)) {
for (ClassB.TypeB typeB : typeBS) {
prefixs.add(getEsPrefixByTypeB(typeB));
}
} else {
prefixs.add("mark2_");
}
List<String> indexes = new ArrayList<>();
for (String prefix : prefixs) {
for (String i : index) {
indexes.add(prefix + i);
}
}
return indexes.toArray(new String[]{});
}
private static String getEsPrefixByTypeB(ClassB.TypeB typeB) {
switch (typeB) {
case COMPLETE:
return "complete_text_";
case INCOMPLETE:
return "incomplete_text_";
case QA:
return "qa_text_";
case VIDEO:
return "video_";
default:
return null;
}
}
private static String[] getIndexWithMark(List<String> list) {
return getIndexWithMark(list.toArray(new String[0]));
}
private static String[] getIndexWithMark(String... indexes) {
List<String> res = new ArrayList<>();
for (String index : indexes) {
res.add(index);
}
if (GenericAttribute.IS_TEST) {
res.add("mark-test");
}
return res.toArray(new String[0]);
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class Tools {
private static final Pattern SYMBOL_PATTERN = Pattern
.compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]");
public static <T> List<JSONObject> bean2JSON(List<T> list) {
if (null == list) {
return Collections.emptyList();
}
List<JSONObject> res = new ArrayList<>(list.size());
list.forEach(t -> {
res.add(JSONObject.parseObject(JSONObject.toJSONString(t)));
});
return res;
}
public static <T> T deepCopyByJson(T obj, Class<T> t) {
String json = JSON.toJSONString(obj);
return JSON.parseObject(json, t);
}
/**
*
* 转换成redisHm格式并压缩
*
* @param Map<String,T>
* @return
*
* @return Map<String,String>
*/
public static <T> Map<String, String> redisHmFormatWithGzip(Map<String, T> map) {
if (null == map) {
return null;
}
Map<String, String> resMap = new HashMap<>();
map.forEach((k, v) -> {
resMap.put(k, Tools.gzip(JSONObject.toJSONString(v)));
});
return resMap;
}
/**
* 列表分页查询
*
* @param list
* @param page
* @param size
* @Title: listPagedQuery
*/
public static <T> List<T> listPagedQuery(List<T> list, int page, int size) {
if (null == list) {
return null;
}
List<T> result = new ArrayList<>();
if (page > 0 && size > 0) {
if (list.size() > 0) {
int start = (page - 1) * size;
int end = page * size;
if (start < list.size()) {
result = new ArrayList<T>();
for (int i = start; i < end && i < list.size(); i++) {
result.add(list.get(i));
}
}
}
}
return result;
}
/**
* 还原成实体类map
*
* @param strMap map
* @param clazz 目标对象
*/
public static <T> Map<String, T> restoreTMap(Map<String, String> strMap, Class<T> clazz) {
Map<String, T> resMap = new HashMap<>();
if (null == strMap) {
return resMap;
}
for (String key : strMap.keySet()) {
resMap.put(key, JSON.parseObject(strMap.get(key), clazz));
}
return resMap;
}
/**
* list切割
* @param list 源数据
* @param limit limit
* @param <T> 泛型
* @return 切割后
*/
public static<T> List<List<T>> spilt(List<T> list, int limit) {
int pageTotal = (list.size() + limit - 1) / limit;
List<List<T>> result = new ArrayList<>(pageTotal);
for (int i = 0; i < pageTotal; i++) {
int end = i + 1 == pageTotal ? list.size() : (i + 1) * limit;
result.add(list.subList(i * limit, end));
}
return result;
}
public static String urlReplace(String url) {
String prefix = url.contains("https") ? "https" : "http";
return url.replace(prefix, "");
}
public static String randomUUID() {
return UUID.randomUUID().toString().replace("-", "");
}
/**
* 返回组合K值
*
* @param keys
* @return String
*/
public static String assembleKey(String... keys) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < keys.length; i++) {
if (keys[i] == null) {continue;}
if (i == keys.length - 1) {
sb.append(keys[i]);
} else {
sb.append(keys[i] + ":");
}
}
return sb.toString();
}
public static List<Map<String, Integer>> highWordMatch(String text, String highWords) {
List<Map<String, Integer>> res = new ArrayList<>();
for (List<String> ands : cutKeyword(highWords)) {
int rate = 0;
for (String and : ands) {
int oneRate = calculateRate(and, text);
// 该词未被命中,清除词频记录并返回
if (0 == oneRate) {
rate = 0;
break;
}
// 有单个词被命中且取最低值
if (0 == rate || oneRate < rate) {
rate = oneRate;
}
}
// 该组合词被命中
if (rate > 0) {
Map<String, Integer> map = new HashMap<>(1);
map.put(String.join(" ", ands), rate);
res.add(map);
}
}
res.sort((x, y) -> Integer.compare(y.values().toArray(new Integer[0])[0],
x.values().toArray(new Integer[0])[0]));
return res;
}
public static List<FilterInfo> changeJSONList2FilterInfoList(List<JSONObject> list, String group, ClassB.TypeB typeB) {
Class<? extends CommonDO> clazz;
switch (typeB) {
case INCOMPLETE:
clazz = IncompleteTextMark.class;
break;
case COMPLETE:
clazz = CompleteTextMark.class;
break;
case QA:
clazz = QATextMark.class;
break;
case VIDEO:
clazz = VideoMark.class;
break;
default:
throw new IllegalArgumentException("未能解析到的typeB类型:" + typeB);
}
return list.stream().map(json -> {
json.put("mgroup", group);
return JSONObject.parseObject(json.toJSONString(), clazz).filterInfo();
}).collect(Collectors.toList());
}
/**
* 计算词频
*
* @param word
* @param text
* @return
*/
public static int calculateRate(String word, String text) {
if (StringUtils.isEmpty(word)) {
return 0;
}
int rate = 0;
int from = 0;
while (true) {
if ((from = text.indexOf(word, from) + 1) > 0) {
rate++;
} else {
break;
}
}
return rate;
}
public static List<List<String>> cutKeyword(String keyword) {
List<List<String>> fuzzyList = new ArrayList<>();
if (StringUtils.isBlank(keyword)) {
return fuzzyList;
}
// 先根据"|"区分或关系
String[] anyStrs = keyword.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
return fuzzyList;
}
public static ClassB.TypeB getTypeB(JSONObject json) {
return ClassB.TypeB.fromEncode(json.getInteger("c2"));
}
/**
* 过滤掉标题里面的标点符号
*
* @param title 标题
* @return 去除特殊符号后的标题
*/
public static String filterSymbol(String title) {
if (null != title) {
return SYMBOL_PATTERN.matcher(title).replaceAll("");
}
return null;
}
/**
* 是否为空,数据为空
*
* @param obj obj
* @return boolean 返回类型
*/
public static boolean isEmpty(Object obj) {
if (!Objects.nonNull(obj)) {
return true;
}
if ((obj instanceof List)) {
return CollectionUtils.isEmpty((List<?>) obj);
}
if ((obj instanceof String)) {
return StringUtils.isEmpty((String) obj);
}
if ((obj instanceof Map)) {
return MapUtils.isEmpty((Map<?, ?>) obj);
}
return false;
}
/**
* 休眠x毫秒
*
* @param millis void
*/
public static void sleep(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
/**
* 重叠标签修正(保留彼此未重复属性标签)
*
* @param originTag
* @param latestTag
*
* @return void
*/
public static String partialUpdateTag(String originTag, String latestTag) {
Map<String, String> originMap = change2MapTag(originTag);
Map<String, String> latestMap = change2MapTag(latestTag);
if (null == latestMap) {
return "";
}
if (null == originMap) {
return latestTag;
}
originMap.putAll(latestMap);
return mapTag2String(originMap);
}
/**
* 正则表达式匹配结果
*
* @param line
* @param pattern
* @return List<String>
*/
public static List<String> patternMatchFind(String line, String pattern) {
List<String> res = new ArrayList<>();
// 创建 Pattern 对象
Pattern r = Pattern.compile(pattern);
// 现在创建 matcher 对象
Matcher m = r.matcher(line);
while (m.find()) {
res.add(m.group());
}
return res;
}
/**
* 获取主机名
*
* @param url
* @return
*/
public static String getHost(String url) {
try {
return hostUnified(new URL(url).getHost());
} catch (MalformedURLException e) {
e.printStackTrace();
throw new RuntimeException("url不合法获取域名出错!!!" + url);
}
}
/**
* 时间校验
* @param time
* @return
*/
public static boolean isLegalTime(Long time) {
if (null == time || time.toString().length() != 13) {
return false;
}
boolean flag = true;
try {
long startLimit = -639129600000L;
// 合法结束选取为后一年内
long endLimit = System.currentTimeMillis() + 365 * 24 * 60 * 60 * 1000L;
// 政府文件发布前和后一年内
if (time < startLimit || time > endLimit) {
flag = false;
}
} catch (Exception e) {
return false;
}
return flag;
}
public static String gzip(String primStr) {
if (primStr == null || primStr.length() == 0) {
return primStr;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = null;
try {
gzip = new GZIPOutputStream(out);
gzip.write(primStr.getBytes());
} catch (IOException e) {
e.printStackTrace();
} finally {
if (gzip != null) {
try {
gzip.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return new String(new org.apache.commons.codec.binary.Base64().encode(out.toByteArray()));
}
public static String gunzip(String compressedStr) {
if (compressedStr == null) {
return null;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = null;
GZIPInputStream ginzip = null;
byte[] compressed = null;
String decompressed = null;
try {
compressed = new Base64().decode(compressedStr);
in = new ByteArrayInputStream(compressed);
ginzip = new GZIPInputStream(in);
byte[] buffer = new byte[102400];
int offset = -1;
while ((offset = ginzip.read(buffer)) != -1) {
out.write(buffer, 0, offset);
}
decompressed = out.toString();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (ginzip != null) {
try {
ginzip.close();
} catch (IOException e) {
}
}
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
if (out != null) {
try {
out.close();
} catch (IOException e) {
}
}
}
return decompressed;
}
public static List<String> gzipWithUploadInfoList(List<UploadInfo> list) {
if (null == list || list.isEmpty()) {
return null;
}
List<String> resList = new ArrayList<>();
for (UploadInfo info : list) {
String jsonStr = JSONObject.toJSONString(info);
resList.add(gzip(jsonStr));
}
return resList;
}
public static List<MarkInfo> getMarkInfos(List<String> cachedMarkInfos) {
List<MarkInfo> list = new ArrayList<>();
MarkInfo markInfo = null;
for (String s : cachedMarkInfos) {
JSONObject jsonObject = JSONObject.parseObject(Tools.gunzip(s));
String typeB = jsonObject.get("typeB").toString();
String sourceObj = jsonObject.get("sourceObj").toString();
switch (typeB) {
case "COMPLETE":
markInfo = new MarkInfo(JSON.parseObject(sourceObj, CompleteTextMark.class));
break;
case "VIDEO":
markInfo = new MarkInfo(JSON.parseObject(sourceObj, VideoMark.class));
break;
default:
break;
}
list.add(markInfo);
}
return list;
}
private static String hostUnified(String host) {
// 微信链接统一
if ("weixin.sogou.com".equals(host)) {
host = "mp.weixin.qq.com";
}
// 抖音链接统一
if ("www.douyin.com".equals(host)) {
host = "www.iesdouyin.com";
}
return host;
}
private static Map<String, String> change2MapTag(String mtag) {
if (null == mtag || "".equals(mtag)) {
return null;
}
// 标签形式如:",1=10"
String[] origins = mtag.split(",");
Map<String, String> originMap = new HashMap<>();
for (String origin : origins) {
if (!"".equals(origin)) {
String[] fianls = origin.split("=");
originMap.put(fianls[0], fianls.length == 2 ? fianls[1] : "");
}
}
return originMap;
}
private static String mapTag2String(Map<String, String> tagMap) {
if (null == tagMap || tagMap.isEmpty()) {
return null;
}
StringBuilder sb = new StringBuilder();
for (String key : tagMap.keySet()) {
sb.append(",").append(key).append("=").append(tagMap.get(key));
}
return sb.toString();
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.wechat.search.WechatReal;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.Proxy.Type;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class WechatUtil {
private static Logger logger = LogManager.getLogger(WechatUtil.class);
private static final ScheduledExecutorService SCHEDULE = Executors.newScheduledThreadPool(1);
private WechatUtil() {
}
private static final List<Proxy> SCOKET_ADDRESS_CACHE = new ArrayList<>(5);
private static final Map<Proxy, WechatReal> PROXY_CACHE = new HashMap<>(5);
/** 屏蔽失效列表 **/
private static List<Integer> shieldList = new ArrayList<>(5);
/**
* 华为云高匿代理
*/
static {
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("122.112.137.194", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("122.112.163.207", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("119.3.86.205", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("119.3.38.9", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("121.36.135.139", 31128)));
SCOKET_ADDRESS_CACHE.forEach(proxy -> PROXY_CACHE.put(proxy, new WechatReal()));
SCHEDULE.scheduleAtFixedRate(() -> cleanShield(5), 1, 1, TimeUnit.HOURS);
}
public static String wechatRealLinkget(String url) {
int index = selectProxy();
Proxy proxy = SCOKET_ADDRESS_CACHE.get(index);
try {
return PROXY_CACHE.get(proxy).getRealLink(url, proxy);
} catch (Exception e) {
logger.error("代理ip:{}或失效,暂时停用!", proxy.address());
shieldList.add(index);
}
return null;
}
private static int selectProxy() {
final int limit = SCOKET_ADDRESS_CACHE.size();
int index = (int) Math.random() * limit;
if (!shieldList.contains(index)) {
return index;
}
for (int i = 0; i < limit; i++) {
if (!shieldList.contains(i)) {
index = i;
break;
}
if (i == limit - 1) {
cleanShield(limit);
}
}
return index;
}
/**
* 屏蔽列表全部释放
*
* @param limit
*/
private static void cleanShield(int limit) {
shieldList = new ArrayList<>(limit);
}
// TODO 暂不支持自传代理
// public String wechatRealLinkget(String url, Proxy proxy) throws Exception {
// return WechatReal.getRealLink(url, proxy);
// }
}
package com.zhiwei.middleware.automatic.server.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
*
* @ClassName: WeiboMidUrlDeal
* @Description: 微博mid 和url 后缀之间的转换
* @author 0xff
* @date 2019年8月29日 下午3:07:15
*/
public class WeiboMidUrlDealUtil {
private static final Logger logger = LogManager.getLogger(WeiboMidUrlDealUtil.class);
private static String[] str62keys = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e",
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
"V", "W", "X", "Y", "Z" };
private WeiboMidUrlDealUtil() {
}
/**
** 转换字符
*
* @param int10
* @return
* @return String
*/
private static String intToEnode62(Integer int10) {
String s62 = "";
int r = 0;
while (int10 != 0) {
r = int10 % 62;
s62 = StringUtils.join(str62keys[r], s62);
int10 = (int) Math.floor(int10 / 62.0);
}
return s62;
}
/**
** 62进制转成10进制
*
* @param str62
* @return
* @return String
*/
private static String str62toInt(String str62) {
long i64 = 0;
for (int i = 0; i < str62.length(); i++) {
long vi = (long) Math.pow(62, (str62.length() - i - 1));
String t = str62.substring(i, i + 1);
i64 += vi * findindex(t);
}
return Long.toString(i64);
}
/**
** 查找t字符 所在位置
*
* @param t
* @return
* @return int
*/
private static int findindex(String t) {
int index = 0;
for (int i = 0; i < str62keys.length; i++) {
if (str62keys[i].equals(t)) {
index = i;
break;
}
}
return index;
}
/**
** url后缀 转换成 mid
*
* @param mid
* @return
* @return String
*/
public static String uid2Mid(String mid) {
String id = "";
for (int i = mid.length() - 4; i > -4; i = i - 4) { // 从最后往前以4字节为一组读取URL字符
int offset1 = i < 0 ? 0 : i;
int len = i < 0 ? mid.length() % 4 : 4;
String str = mid.substring(offset1, offset1 + len);
str = str62toInt(str);
if (offset1 > 0) { // 若不是第一组,则不足7位补0
while (str.length() < 7) {
str = StringUtils.join("0", str);
}
}
id = StringUtils.join(str, id);
}
return id;
}
/**
** mid 转换成 url后缀
*
* @param str10
* @return
* @return String
*/
public static String mid2Uid(String str10) {
String mid = "";
int count = 1;
for (int i = str10.length() - 7; i > -7; i = i - 7) { // 从最后往前以7字节为一组读取字符
int offset = i < 0 ? 0 : i;
int len = i < 0 ? str10.length() % 7 : 7;
String temp = str10.substring(offset, offset + len);
String url = intToEnode62(Integer.valueOf(temp));
if (count != 3) {// z xghm uXym 生成的链接从右往左的前2组,4位一组,不足4位的补0
for (int j = 0; j < 4 - url.length(); j++) {
url = StringUtils.join("0", url);
}
}
mid = StringUtils.join(url, mid);
count++;
}
return mid;
}
/**
** url -> mid
*
* @param url
* @return
* @return String
*/
public static String urlToMid(String url) {
try {
String mid = url.trim();
if (url.contains("weibo.cn/status")) {
mid = url.split("status/")[1].split("\\?")[0];
return mid;
}
if (url.contains("weibo.com")) {
url = url.replaceAll("\\?.*|#.*|.*/", "");
if (url.length() >= 9) {
url = url.length() == 9 ? url : url.substring(0, 9);
mid = uid2Mid(url);
}
}
return mid;
} catch (Exception e) {
logger.debug("错误链接{}链接转换mid出错{}", url, e.getMessage());
return null;
}
}
/**
** 批量转换(url -> mid)
*
* @param urlList
* @return
* @return List<String>
*/
public static List<String> weiboToMidToList(List<String> urlList) {
try {
List<String> midList = new ArrayList<>();
urlList.forEach(url -> {
try {
String mid = urlToMid(url);
if (Objects.nonNull(mid) && !mid.isEmpty()) {
midList.add(mid);
}
} catch (Exception e) {
logger.error("exception:", e);
}
});
return midList;
} catch (Exception e) {
logger.error(" 微博 转 mid 出错 :", e);
}
return Collections.emptyList();
}
/**
** 传入批量转换
*
* @param urlList
* @return
* @return Map<String,String>
*/
public static Map<String, String> weiboToMidToMap(List<String> urlList) {
try {
Map<String, String> rMap = new HashMap<>();
urlList.forEach(url -> {
try {
String mid = urlToMid(url);
if (Objects.nonNull(mid) && !mid.isEmpty()) {
rMap.put(mid, url);
}
} catch (Exception e) {
logger.error("exception:", e);
}
});
return rMap;
} catch (Exception e) {
logger.error(" 微博 转 mid 出错 :", e);
}
return Collections.emptyMap();
}
}
server.port=7778
#path to redisson.yaml or redisson.json
spring.redis.redisson.file=classpath:redisson-local.yaml
dubbo.application.name=automatic-provider
dubbo.application.qos.enable=false
dubbo.registry.address=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
#dubbo.config-center.timeout=60000
dubbo.protocol.name=dubbo
dubbo.protocol.port=7779
dubbo.provider.timeout=60000
dubbo.registry.timeout=60000
dubbo.registry.version=*
dubbo.provider.group=zhiwei-automatic
dubbo.scan.basePackages=com.zhiwei.middleware.automatic.server.dubbo.service.impl
dubbo.monitor.protocol=registry
dubbo.application.shutwait=30s
#mongo
#primary.uri.marker=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
primary.uri.marker=mongodb://localhost:27017/localhost
primary.uri.hangzhou=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
#primary.uri=mongodb://qbjc:asSADf5ffs@202.107.192.94:17150/qbjc?authSource=admin
mongo.connectTimeout=30000
mongo.maxWaitTime=50000
mongo.dataBaseMarker=marker
mongo.hangzhouMarker=qbjc
# es
es.esClientAddresses=202.107.192.94:1443:qbjc-back:yuqing.zhiweidata.com,202.107.192.94:29400:elastic:qWxZRW42OHkuOhmF5AXX
es.clusterNodes=202.107.192.94:1443
es.clusterName=zhiweidata-new-es
es.httpClusterNodes=202.107.192.94:1443:middleware-automaticmark:auto.zhiweidata.com
#es.username=middleware-automaticmark
#es.password=auto.zhiweidata.com
es.username=joker
es.password=jokerdevops
middleware.zookeeperAddress=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
middleware.appName=automatic-provider
middleware.markGroup=zhiwei-mark-local-liuyu
middleware.filterGroup=local-filter
#kafka topic
crawler.topic = crawler-test_1
#kafka配置
# kafka服务器地址(可以多个)
spring.kafka.bootstrap-servers=192.168.0.11:9092,192.168.0.30:9092,192.168.0.35:9092
#生产者
spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.compression-type=snappy
\ No newline at end of file
#spring.profiles.active=prod
#spring.profiles.active=dev
spring.profiles.active=local
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xml>
<!-- log4j2 自身的日志级别 -->
<Configuration status="WARN">
<Properties> <!-- 配置日志文件输出目录 -->
<Property name="LOG_HOME">./log/</Property>
<property name="APP_NAME">automatic-center-server</property>
</Properties>
<Appenders>
<!-- 定义日志输出地 -->
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} %L [%t] - %msg%n"/>
</Console>
<!--[%t]-->
<!-- INFO级别日志 -->
<RollingFile name="info_appender" fileName="${LOG_HOME}/${APP_NAME}.log"
filePattern="${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}-%i.log">
<PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} %L [%t] - %msg%n"/>
<Filters>
<PushLogFilter onMatch="NEUTRAL" onMismatch="DENY"/>
</Filters>
<Policies>
<TimeBasedTriggeringPolicy/>
<SizeBasedTriggeringPolicy size="20 MB"/>
</Policies>
</RollingFile>
</Appenders>
<Loggers>
<Logger name="org.apache.curator" additivity="false"
level="trace">
<AppenderRef ref="Console" />
</Logger>
<Logger name="org.apache.zookeeper" additivity="false"
level="trace">
<AppenderRef ref="Console" />
</Logger>
<AsyncRoot level="info">
<AppenderRef ref="Console"/>
<AppenderRef ref="info_appender"/>
</AsyncRoot>
<Logger name="mylog" level="error" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
</Loggers>
</Configuration>
prod.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
prod.robot.push.enable=false
prod.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
prod.robot.push.level=error
prod.robot.push.app.name=automatic-server-prod
dev.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
dev.robot.push.enable=false
dev.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
dev.robot.push.level=error
dev.robot.push.app.name=automatic-server-dev
local.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
local.robot.push.enable=false
local.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
local.robot.push.level=error
local.robot.push.app.name=automatic-center-local
\ No newline at end of file
---
singleServerConfig:
idleConnectionTimeout: 10000
connectTimeout: 10000
timeout: 3000
retryAttempts: 3
retryInterval: 1500
subscriptionsPerConnection: 5
address: "redis://192.168.0.225:6379"
subscriptionConnectionMinimumIdleSize: 1
subscriptionConnectionPoolSize: 50
connectionMinimumIdleSize: 32
connectionPoolSize: 64
database: 4
dnsMonitoringInterval: 5000
threads: 0
nettyThreads: 0
codec: !<org.redisson.codec.JsonJacksonCodec> {}
transportMode: "NIO"
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>middleware-automatic-center-client</module>
<module>middleware-automatic-center-webapi</module>
<module>middleware-automatic-center-client-autoconfigure</module>
<module>middleware-automatic-center-server</module>
</modules>
<name>middleware-automatic</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<spring-boot.version>2.7.0</spring-boot.version>
<log4j.version>2.11.2</log4j.version>
<slf4j.version>1.8.0-beta4</slf4j.version>
<dubbo.version>2.7.16</dubbo.version>
<zookeeper.version>3.4.12</zookeeper.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.dubbo/dubbo -->
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<version>${dubbo.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 日志依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-1.2-api -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<version>${log4j.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</dependencyManagement>
<!-- 打包管理 -->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.4</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment