Commit 720a2127 by liuyu

2023年3月2日 第一次提交

parents
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>middleware-automatic-center-autoconfigure</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<automatic.version>1.0-SNAPSHOT</automatic.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center-client</artifactId>
<version>${automatic.version}</version>
<exclusions>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.configuration;
import com.zhiwei.middleware.automatic.server.core.*;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import org.springframework.boot.autoconfigure.AutoConfigureAfter;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@ConditionalOnProperty(prefix = "auto.matic.center.client", name = "enable", matchIfMissing = true)
@EnableConfigurationProperties(AutoMaticClientConfigurationProperties.class)
@AutoConfigureAfter(AutoMaticClientConfigurationProperties.class)
@Configuration
public class AutoMaticClientConfiguration {
@Bean
@ConditionalOnMissingBean(AutoMaticClient.class)
public AutoMaticClient authClient(AutoMaticClientConfigurationProperties properties) {
return new AutoMaticClient(AutoMaticClientFactory.createInstance(AutoMaticService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(CommonClient.class)
public CommonClient commonClient(AutoMaticClientConfigurationProperties properties) {
return new CommonClient(AutoMaticClientFactory.createInstance(CommonService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataCollectionClient.class)
public DataCollectionClient dataCollectionClient(AutoMaticClientConfigurationProperties properties) {
return new DataCollectionClient(AutoMaticClientFactory.createInstance(DataCollectionService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataUploadClient.class)
public DataUploadClient dataUploadClient(AutoMaticClientConfigurationProperties properties) {
return new DataUploadClient(AutoMaticClientFactory.createInstance(DataUploadService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
}
package com.zhiwei.middleware.automatic.configuration;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties("auto.matic.center.client")
public class AutoMaticClientConfigurationProperties {
private ApplicationConfig application;
private RegistryConfig registry;
private ConsumerConfig consumer;
public ApplicationConfig getApplication() {
return application;
}
public void setApplication(ApplicationConfig application) {
this.application = application;
}
public RegistryConfig getRegistry() {
return registry;
}
public void setRegistry(RegistryConfig registry) {
this.registry = registry;
}
public ConsumerConfig getConsumer() {
return consumer;
}
public void setConsumer(ConsumerConfig consumer) {
this.consumer = consumer;
}
}
org.springframework.boot.autoconfigure.EnableAutoConfiguration=com.zhiwei.middleware.automatic.configuration.AutoMaticClientConfiguration
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>middleware-automatic-center-client</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<curator.version>2.12.0</curator.version>
<base.version>2.0.0-SNAPSHOT</base.version>
<easyexcel.version>2.1.2</easyexcel.version>
<json.version>1.2.58</json.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/easyexcel -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>${easyexcel.version}</version>
<scope>provided</scope>
</dependency>
<!-- 日志依赖 -->
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-1.2-api -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
<version>${dubbo.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<!-- <scope>provided</scope>-->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.base</groupId>
<artifactId>base-objects-application</artifactId>
<version>${base.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public class AutoMaticClient {
private final AutoMaticService autoMaticService;
public AutoMaticClient(AutoMaticService autoMaticService) {
this.autoMaticService = autoMaticService;
}
public void autoMark(List<MarkInfo> infos) {
autoMaticService.autoMark(infos);
}
public void autoMarkMulti(List<MarkInfoMulti> infos) {
autoMaticService.autoMarkMulti(infos);
}
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
return autoMaticService.modifyTemplateTitle(group, templateTitle, fixTag);
}
public List<String> getMupdateByTemplateTitle(String group, String templateTitle) {
return autoMaticService.getMupdateByTemplateTitle(group, templateTitle);
}
public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) {
return autoMaticService.tryGetTemplateTitleByMupdate(group, title, mupdate);
}
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return autoMaticService.compareWithTemplateTileOL(project, title);
}
public boolean resetTemplate(String group, String templateTitle) {
return autoMaticService.resetTemplate(group, templateTitle);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import org.apache.dubbo.common.utils.StringUtils;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.ReferenceConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.locks.ReentrantLock;
import static java.util.Objects.*;
import static java.util.Objects.isNull;
public class AutoMaticClientFactory {
private static final Logger logger = LogManager.getLogger(AutoMaticClientFactory.class);
private static final ReentrantLock lock = new ReentrantLock();
private static final Map<String, ReferenceConfig<?>> REFERENCES = new HashMap<>();
private AutoMaticClientFactory() {
}
public static <T> T createInstance(Class<T> clazz, ApplicationConfig application, RegistryConfig registry, ConsumerConfig consumer) {
lock.lock();
try {
if (isNull(application)) {
throw new NullPointerException("获取dubbo配置文件失败");
}
// 检查参数 (zookeeper地址,服务端名字)
requireNonNull(registry);
requireNonNull(consumer);
// 生成唯一缓存 key
String[] cacheAdd = { clazz.getName(),registry.getGroup(),registry.getAddress(),consumer.getGroup(),consumer.getVersion()};
String cacheKey = StringUtils.join(cacheAdd,"|");
ReferenceConfig<?> reference = REFERENCES.get(cacheKey);
if (nonNull(reference)) {
logger.info("{}实例已存在,返回复用实例", clazz.getSimpleName());
return (T) reference.get();
}
if (isNull(application.getQosEnable())) {
// 如果没有配置 Qos,则默认关闭 Qos
application.setQosEnable(false);
}
reference = new ReferenceConfig<>();
reference.setApplication(application);
//向注册中心注册
registry.setTimeout(600000);
reference.setRegistry(registry);
if (isNull(consumer.isCheck())) {
// 如果消费者没有配置检查,则默认不检查
consumer.setCheck(false);
// reference 没有配置时会使用 consumer 配置
reference.setCheck(false);
}
// 设置消费者配置
consumer.setTimeout(600000);
reference.setConsumer(consumer);
reference.setInterface(clazz);
REFERENCES.put(cacheKey, reference);
//获取目标接口
return (T) reference.get();
} catch (Exception e) {
logger.error("创建{}实例出错", clazz.getName(), e);
} finally {
lock.unlock();
}
return null;
}
public static <T> T createInstance(Class<T> clazz, String registry, String group, String appName) {
ApplicationConfig application = new ApplicationConfig();
RegistryConfig reg = new RegistryConfig(registry);
application.setName(appName);
ConsumerConfig consumer = new ConsumerConfig();
consumer.setGroup(group);
return createInstance(clazz, application, reg, consumer);
}
/**
* 获取自动标注client
* @param autoMaticService 代理接口
* @return 自动标注client
*/
public static AutoMaticClient getAutoMaticClient(AutoMaticService autoMaticService) {
return new AutoMaticClient(autoMaticService);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public class CommonClient {
private final CommonService commonService;
public CommonClient(CommonService commonService) {
this.commonService = commonService;
}
public String generateAggreeOrder() {
return commonService.generateAggreeOrder();
}
public boolean appendAggreeOrder(String id, List<AggreeDTO> list) {
return commonService.appendAggreeOrder(id, list);
}
public boolean startAggree(String id) {
return commonService.startAggree(id);
}
public boolean startAggree(String id, double limit) {
return commonService.startAggree(id, limit);
}
public CommonAggreeResult getAggreeResult(String id) {
return commonService.getAggreeResult(id);
}
public CommonAggreeResult getAggreeResult(String id, int page, int pageLimit) {
return commonService.getAggreeResult(id, page, pageLimit);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import java.util.List;
import java.util.Map;
public class DataCollectionClient {
private final DataCollectionService dataCollectionService;
public DataCollectionClient(DataCollectionService dataCollectionService) {
this.dataCollectionService = dataCollectionService;
}
public void cleanCache(String group, String id) {
dataCollectionService.cleanCache(group, id);
}
public void cleanCacheExceptNoise(String group, String id) {
dataCollectionService.cleanCacheExceptNoise(group, id);
}
public void addDataCollection(String group, String id, List<String> compressedlist) {
dataCollectionService.addDataCollection(group, id, compressedlist);
}
public void startAggree(String group, String id, String highWords) {
dataCollectionService.startAggree(group, id, highWords);
}
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.batchModifyFatherTag(group, id, fatherIds, mtag, mperson, typeB);
}
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollectionService.modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.modifySonTag(group, id, fatherId, sonId, mtag, mperson, typeB);
}
public boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.throwIntoNoise(group, id, fatherId, typeB);
}
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB) {
return dataCollectionService.batchThrowIntoNoise(group, id, fatherIds, typeB);
}
public boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.restoreFromNoise(group, id, fatherId, typeB);
}
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getNoiseFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getNoiseSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public void checkedThenInsert(String group, String id) {
dataCollectionService.checkedThenInsert(group, id);
}
public int getAggreResultNow(String group, String id) {
return dataCollectionService.getAggreResultNow(group, id);
}
public int getInsertResultNow(String group, String id) {
return dataCollectionService.getInsertResultNow(group, id);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public class DataUploadClient {
private final DataUploadService dataUploadService;
public DataUploadClient(DataUploadService dataUploadService) {
this.dataUploadService = dataUploadService;
}
public void addUploadList(String group, String id, String sourceStr) {
dataUploadService.addUploadList(group, id, sourceStr);
}
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType) {
dataUploadService.startUpload(group, id, mperson, mtagType, filterType, projectId, insertType);
}
public Map<String, Object> getUploadStatus(String group, String id) {
return dataUploadService.getUploadStatus(group, id);
}
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType) {
return dataUploadService.getUploadInfoList(group, id, page, size, isAsc, searchField, keyword, uploadType);
}
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
return dataUploadService.getDataType(json, typeB);
}
public void cleanUploadResult(String group, String id) {
dataUploadService.cleanUploadResult(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public interface AutoMaticService {
void autoMark(List<MarkInfo> markInfos);
void autoMarkMulti(List<MarkInfoMulti> markInfoMultis);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 根据模板标题获取数据(仅最新100条)
*
* @param group 项目
* @param templateTitle 模板标题
* @return 特征值
*/
List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/**
* 根据标题和特征值尝试搜索模板标题
*
* @param group 项目
* @param title 标题
* @param mupdate 特征值
* @return 模板标题
*/
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/**
* 根据项目组和标题在线匹配已有聚合标题
*
* @param project 项目
* @param title 标题
* @return 返回值
*/
public Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB.TypeB;
import java.util.List;
import java.util.Map;
/**
* @ClassName: DataCollectionService
* @Description: 数据采集模块服务
* @author SJJ
* @date 2020年4月7日 下午3:02:05
*/
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group
* @param id
*/
public void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group
* @param id
*/
public void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group
* @param id
*/
public void addDataCollection(String group, String id, List<String> compressedlist);
/**
* 启动聚合
*
* @param group
* @param id
*/
public void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherIds
* @param mtag
* @param mperson
* @param typeB
* @return
*/
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean throwIntoNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean restoreFromNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
public void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
public int getInsertResultNow(String group, String id);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
/**
* @ClassName: DataUploadService
* @Description: 数据上传服务
* @author SJJ
* @date 2020年2月25日 下午6:02:26
*/
public interface DataUploadService {
/**
* 添加源数据集
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*
* @param group
* @param id
* @param mperson
*
* @return void
*/
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType);
/**
* 获取上传状态(进度)
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param searchField
* @param keyword
* @param uploadType
* @return
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
public UploadInfo.DataType getDataType(JSONObject json, TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
public void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.graphs;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
/**
*
* @ClassName: Graphs
* @Description: 关键词匹配图
* @author SJJ
* @date 2020年8月11日18:31:57
*/
public class Graphs {
/** 内置图指针 **/
protected Map<Character, Node<Character>> innerPoint = new HashMap<>();
/**
* 添加内置图指针
*
* @param keywords 关键字集
*/
public void addGraph(List<String> keywords) {
if (null == keywords) {
return;
}
keywords.forEach(this::addGraph);
}
/**
* 添加内置图指针
*
* @param keyword 关键字
*/
public synchronized void addGraph(String keyword) {
if (StringUtils.isEmpty(keyword)) {
return;
}
keyword = StringUtils.lowerCase(keyword);
// 图指针 头节点
Map<Character, Node<Character>> point = innerPoint;
for (int i = 0; i < keyword.length(); i++) {
char c = keyword.charAt(i);
// 搜索当前指向的子图是否包含节点
Node<Character> node = point.computeIfAbsent(c, k -> {
Node<Character> newNode = new Node<>();
newNode.setKey(c);
return newNode;
});
// 完整关键字标识
if (i == keyword.length() - 1) {
node.setEnd(true);
} else {
if (node.getNext() == null) {
node.setNext(new HashMap<>());
}
point = node.getNext();
}
}
}
/**
* 搜索匹配结果
*
* @param text
* @return
*/
public List<Keyword> find(String text) {
List<Keyword> keywords = new LinkedList<>();
if (StringUtils.isEmpty(text)) {
return keywords;
}
text = StringUtils.lowerCase(text);
// 迭代器,匹配图第一层级节点信息,一直往下迭代
Map<Integer, Map<Character, Node<Character>>> points = new TreeMap<>();
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
// 2.1 第一层级字符子图
Iterator<Map.Entry<Integer, Map<Character, Node<Character>>>> iterator = points.entrySet().iterator();
while (iterator.hasNext()) {
// 文本开始,子图指针
Map.Entry<Integer, Map<Character, Node<Character>>> entry = iterator.next();
Node<Character> node = entry.getValue().get(c);
// 2.2 无法连续命中 匹配失败
if (null == node) {
iterator.remove();
continue;
}
// 2.3 一条路径结尾 (关键字匹配必须是连续命中字符,一直到命中一条完整的关键字)
if (node.isEnd()) {
Keyword keyword = new Keyword();
keyword.setStart(entry.getKey());
keyword.setEnd(i);
keyword.setKey(text.substring(keyword.getStart(), keyword.getEnd() + 1));
keywords.add(keyword);
}
// 2.4 路径彻底结束(存在词语完全包含的情况,AB,ABC)
if (node.getNext() == null) {
iterator.remove();
} else {
// 2.5 继续迭代,直到无法连续命中子节点为止
entry.setValue(node.getNext());
}
}
// 1.1入口:字符命中第一层级,添加子图 开始向下探索
Node<Character> node = innerPoint.get(c);
if (null == node) {
continue;
}
// 2020/7/24 10:19 支持单字符匹配
if (node.isEnd()) {
Keyword keyword = new Keyword();
keyword.setStart(i);
keyword.setEnd(i);
keyword.setKey(text.substring(keyword.getStart(), keyword.getEnd() + 1));
keywords.add(keyword);
}
// 1.2 添加迭代器,向下探索
if (null != node.getNext()) {
points.put(i, node.getNext());
}
}
return keywords;
}
/**
* 转换成关键词-词频统计结果
*
* @param keywords 关键字
* @return 词频
*/
public Map<String, Integer> change2Statistics(List<Keyword> keywords) {
Map<String, Integer> res = new HashMap<>();
keywords.forEach(keyword -> {
String k = StringUtils.lowerCase(keyword.getKey());
res.putIfAbsent(k, 0);
res.put(k, res.get(k) + 1);
});
return res;
}
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.List;
import java.util.Map;
public interface GraphsServer<T, O> {
void addGraph(List<T> t);
List<O> find(String text);
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.io.Serializable;
public class Keyword implements Serializable {
private static final long serialVersionUID = 6917681073354631602L;
private String key;
private int start;
private int end;
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
Keyword keyword = (Keyword) o;
if (start != keyword.start)
return false;
return end == keyword.end;
}
@Override
public int hashCode() {
int result = start;
result = 31 * result + end;
return result;
}
@Override
public String toString() {
return "Keyword{key=" + key + ", start=" + start + ", end=" +
end + '}';
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.graphs;
import com.zhiwei.middleware.automatic.server.pojo.GroupTerm;
import com.zhiwei.middleware.automatic.server.pojo.MonitorKeyword;
import com.zhiwei.middleware.automatic.server.pojo.QbjcRuleMatchedInfo;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
public class MonitorGraphsImpl implements GraphsServer<MonitorKeyword, QbjcRuleMatchedInfo> {
/**
* 匹配图
*/
private Graphs graphs;
/**
* 匹配图对应的绑定信息
*/
private Map<String, List<GroupTerm>> terms;
public MonitorGraphsImpl() {
this.graphs = new Graphs();
this.terms = new HashMap<>();
}
@Override
public void addGraph(List<MonitorKeyword> monitorKeyword) {
Graphs tempGraphs = new Graphs();
Map<String, List<GroupTerm>> tempTerms = new HashMap<>();
monitorKeyword.forEach(keyword -> {
if (null != keyword.getMonitorLevel()) {
preGraphs(keyword.getKeywords(), tempGraphs);
preTerms(keyword, tempTerms);
}
});
graphs = tempGraphs;
terms = tempTerms;
}
@Override
public List<QbjcRuleMatchedInfo> find(String text) {
text = StringUtils.lowerCase(text);
// 返回值
List<QbjcRuleMatchedInfo> res = new ArrayList<>();
// 匹配
List<Keyword> results = graphs.find(text);
// 根据Keyword:key 统计分组
Map<String, List<Keyword>> kResults = results.stream().collect(Collectors.groupingBy(Keyword::getKey));
// 已统计列表,防止A&B 被统计两次
Set<String> hasMatched = new HashSet<>();
// 统计结果
Map<String, Integer> statis = graphs.change2Statistics(results);
statis.forEach((keyword, rate) -> {
Set<Keyword> hitKeywords = new HashSet<>();
List<GroupTerm> list = terms.get(keyword);
if (null == list) {
throw new IllegalStateException("keyword不存在:" + keyword);
}
// 可能符合的结果
list.forEach(groupTerm -> {
MonitorKeyword monitorKeyword = groupTerm.getMonitorKeyword();
int count = -1;
for (String checkWord : groupTerm.getAndKeywords()) {
// 已统计过,跳过该词组
if (hasMatched.contains(checkWord)) {
count = -1;
break;
}
int current = statis.getOrDefault(checkWord, 0);
if (current > 0) {
hitKeywords.addAll(kResults.get(checkWord));
}
// 初次重置或大于最小值
count = (count == -1 || current < count) ? current : count;
}
// 大于0判定为命中
if (count > 0) {
QbjcRuleMatchedInfo ruleMatchedInfo = new QbjcRuleMatchedInfo();
// 设置基本信息
ruleMatchedInfo.setId(monitorKeyword.getId());
ruleMatchedInfo.setProject(monitorKeyword.getProject());
ruleMatchedInfo.setRuleType(QbjcRuleMatchedInfo.RuleType.getByName(monitorKeyword.getType()));
ruleMatchedInfo.setChannels(monitorKeyword.getChannels());
ruleMatchedInfo.setPlatforms(monitorKeyword.getPlatforms());
ruleMatchedInfo.setMonitorLevel(monitorKeyword.getMonitorLevel());
// 设置匹配关键词相关信息
List<QbjcRuleMatchedInfo.HitInfo> infos = new ArrayList<>();
infos.add(new QbjcRuleMatchedInfo.HitInfo(hitKeywords, groupTerm.getFullName(), count));
ruleMatchedInfo.setHitInfos(infos);
res.add(ruleMatchedInfo);
}
});
hasMatched.add(keyword);
});
return res;
}
/**
* 添加词关联表
*/
private void preTerms(MonitorKeyword monitorKeyword, Map<String, List<GroupTerm>> terms) {
List<String> usedKeywords = monitorKeyword.getKeywords();
// 解析字词
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
// 拆分或逻辑(都可以作为主键)
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
String[] ands = andStr.trim().split(" +");
for (String str : ands) {
// 初次创建
terms.putIfAbsent(str, new ArrayList<>());
// 添加对应节点
terms.get(str).add(new GroupTerm(Arrays.asList(ands), usedKeyword, monitorKeyword));
}
}
});
}
/**
* 添加关键字表
*
* @param usedKeywords void
*/
private void preGraphs(List<String> usedKeywords, Graphs graphs) {
// 拆分关键词到子
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
for (String str : andStr.trim().split(" +")) {
graphs.addGraph(str);
}
}
});
}
}
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.Map;
public class Node<K> {
/** 主键 **/
private K key;
/** 是否满足任一路径完结的条件 **/
private boolean end;
/** 下个节点,为null则表示彻底完结 **/
private Map<K, Node<K>> next;
public K getKey() {
return key;
}
public void setKey(K key) {
this.key = key;
}
public boolean isEnd() {
return end;
}
public void setEnd(boolean end) {
this.end = end;
}
public Map<K, Node<K>> getNext() {
return next;
}
public void setNext(Map<K, Node<K>> next) {
this.next = next;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
public class AggreeResult implements Serializable {
private static final long serialVersionUID = 8971968054515154622L;
private Status status;
private int totalPage;
private Map<String, List<Integer>> data;
public AggreeResult(Status status) {
this.status = status;
}
public Status getStatus() {
return this.status;
}
public int getTotalPage() {
return this.totalPage;
}
public Map<String, List<Integer>> getData() {
return this.data;
}
public void setStatus(Status status) {
this.status = status;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}
public void setData(Map<String, List<Integer>> data) {
this.data = data;
}
public AggreeResult(Status status, int totalPage, Map<String, List<Integer>> data) {
this.status = status;
this.totalPage = totalPage;
this.data = data;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.io.Serializable;
import java.util.List;
public class CommonAggreeResult implements Serializable {
private static final long serialVersionUID = 8971968054515154622L;
private Status status;
private PageData<CommonAggreeResult.ResultInfo> results;
public CommonAggreeResult(Status status) {
this.status = status;
}
public Status getStatus() {
return this.status;
}
public PageData<CommonAggreeResult.ResultInfo> getResults() {
return this.results;
}
public void setStatus(Status status) {
this.status = status;
}
public void setResults(PageData<CommonAggreeResult.ResultInfo> results) {
this.results = results;
}
public CommonAggreeResult(Status status, PageData<CommonAggreeResult.ResultInfo> results) {
this.status = status;
this.results = results;
}
public static class ResultInfo implements Serializable {
private static final long serialVersionUID = -3656509880731033198L;
private String clusterName;
private Integer size;
private List<AggreeDTO> indexes;
private AggreeDTO templateData;
public String getClusterName() {
return this.clusterName;
}
public Integer getSize() {
return this.size;
}
public List<AggreeDTO> getIndexes() {
return this.indexes;
}
public AggreeDTO getTemplateData() {
return this.templateData;
}
public void setClusterName(String clusterName) {
this.clusterName = clusterName;
}
public void setSize(Integer size) {
this.size = size;
}
public void setIndexes(List<AggreeDTO> indexes) {
this.indexes = indexes;
}
public void setTemplateData(AggreeDTO templateData) {
this.templateData = templateData;
}
public boolean equals(Object o) {
if (o == this) {
return true;
} else if (!(o instanceof CommonAggreeResult.ResultInfo)) {
return false;
} else {
CommonAggreeResult.ResultInfo other = (CommonAggreeResult.ResultInfo)o;
if (!other.canEqual(this)) {
return false;
} else {
label59: {
Object this$clusterName = this.getClusterName();
Object other$clusterName = other.getClusterName();
if (this$clusterName == null) {
if (other$clusterName == null) {
break label59;
}
} else if (this$clusterName.equals(other$clusterName)) {
break label59;
}
return false;
}
Object this$size = this.getSize();
Object other$size = other.getSize();
if (this$size == null) {
if (other$size != null) {
return false;
}
} else if (!this$size.equals(other$size)) {
return false;
}
Object this$indexes = this.getIndexes();
Object other$indexes = other.getIndexes();
if (this$indexes == null) {
if (other$indexes != null) {
return false;
}
} else if (!this$indexes.equals(other$indexes)) {
return false;
}
Object this$templateData = this.getTemplateData();
Object other$templateData = other.getTemplateData();
if (this$templateData == null) {
if (other$templateData != null) {
return false;
}
} else if (!this$templateData.equals(other$templateData)) {
return false;
}
return true;
}
}
}
protected boolean canEqual(Object other) {
return other instanceof CommonAggreeResult.ResultInfo;
}
public String toString() {
return "CommonAggreeResult.ResultInfo(clusterName=" + this.getClusterName() + ", size=" + this.getSize() + ", indexes=" + this.getIndexes() + ", templateData=" + this.getTemplateData() + ")";
}
public ResultInfo(String clusterName, Integer size, List<AggreeDTO> indexes, AggreeDTO templateData) {
this.clusterName = clusterName;
this.size = size;
this.indexes = indexes;
this.templateData = templateData;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.util.List;
public class GroupTerm {
/** 组合关键词 **/
private List<String> andKeywords;
/** 关键词完整名 **/
private String fullName;
/** 隶属监测关键词 **/
private MonitorKeyword monitorKeyword;
public GroupTerm(List<String> asList, String usedKeyword, MonitorKeyword monitorKeyword) {
this.andKeywords = asList;
this.fullName = usedKeyword;
this.monitorKeyword = monitorKeyword;
}
public List<String> getAndKeywords() {
return andKeywords;
}
public void setAndKeywords(List<String> andKeywords) {
this.andKeywords = andKeywords;
}
public String getFullName() {
return fullName;
}
public void setFullName(String fullName) {
this.fullName = fullName;
}
public MonitorKeyword getMonitorKeyword() {
return monitorKeyword;
}
public void setMonitorKeyword(MonitorKeyword monitorKeyword) {
this.monitorKeyword = monitorKeyword;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import java.io.Serializable;
import java.util.List;
public class MarkInfoMulti implements Serializable {
private static final long serialVersionUID = 124627162986379948L;
private MarkInfo markInfo;
private List<String> projects;
public MarkInfoMulti(MarkInfo markInfo, List<String> projects) {
this.markInfo = markInfo;
this.projects = projects;
}
public MarkInfo getMarkInfo() {
return markInfo;
}
public List<String> getProjects() {
return projects;
}
public void setMarkInfo(MarkInfo markInfo) {
this.markInfo = markInfo;
}
public void setProjects(List<String> projects) {
this.projects = projects;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
public class MarkUploadRule {
private String id;
private String group;
private String mperson;
private UploadInfo.MtagType mtagType;
private UploadInfo.FilterType filterType;
private String projectId;
private InsertType insertType;
public MarkUploadRule(String id, String group, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType,
String projectId, InsertType insertType) {
this.id = id;
this.group = group;
this.mperson = mperson;
this.mtagType = mtagType;
this.filterType = filterType;
this.projectId = projectId;
this.insertType = insertType;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getGroup() {
return group;
}
public void setGroup(String group) {
this.group = group;
}
public String getMperson() {
return mperson;
}
public void setMperson(String mperson) {
this.mperson = mperson;
}
public UploadInfo.MtagType getMtagType() {
return mtagType;
}
public void setMtagType(UploadInfo.MtagType mtagType) {
this.mtagType = mtagType;
}
public UploadInfo.FilterType getFilterType() {
return filterType;
}
public void setFilterType(UploadInfo.FilterType filterType) {
this.filterType = filterType;
}
public String getProjectId() {
return projectId;
}
public void setProjectId(String projectId) {
this.projectId = projectId;
}
public InsertType getInsertType() {
return insertType;
}
public void setInsertType(InsertType insertType) {
this.insertType = insertType;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.util.List;
public class MonitorKeyword {
private String id;
private String name;
private String type;
private List<String> keywords;
private List<String> channels;
private List<String> platforms;
private String project;
private MonitorLevel monitorLevel;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public List<String> getKeywords() {
return keywords;
}
public void setKeywords(List<String> keywords) {
this.keywords = keywords;
}
public List<String> getChannels() {
return channels;
}
public void setChannels(List<String> channels) {
this.channels = channels;
}
public List<String> getPlatforms() {
return platforms;
}
public void setPlatforms(List<String> platforms) {
this.platforms = platforms;
}
public String getProject() {
return project;
}
public void setProject(String project) {
this.project = project;
}
public MonitorLevel getMonitorLevel() {
return monitorLevel;
}
public void setMonitorLevel(MonitorLevel monitorLevel) {
this.monitorLevel = monitorLevel;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
/**
* @Description:监测分层等级
* @Author: shentao
* @Date: 2021/9/16 18:17
*/
public class MonitorLevel {
/**
* id ex.1111000000 1112000000 1113000000 1114000000 1115000000 1211000000
* 1212000000 1213000000 1214000000 1215000000 1311000000 1312000000 1313000000
* 1314000000 1315000000
*/
private Integer id;
/**
* level 分层 ex.红色I级、红色II级、红色III级……
*/
private String level;
/**
* levelOne 分类1层 ex. 红色 黄色 蓝色
*/
private String levelOne;
/**
* levelOneWeights 分层权重值 ex.1100000000 1200000000 1300000000
*/
private Integer levelOneWeights;
/**
* levelTwo 分类2层 ex. I级 II级 III级 IV级 V级
*/
private String levelTwo;
/**
* levelTwoWeights 分层权重值 ex.11000000 12000000 13000000 14000000 15000000
*/
private Integer levelTwoWeights;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getLevel() {
return level;
}
public void setLevel(String level) {
this.level = level;
}
public String getLevelOne() {
return levelOne;
}
public void setLevelOne(String levelOne) {
this.levelOne = levelOne;
}
public Integer getLevelOneWeights() {
return levelOneWeights;
}
public void setLevelOneWeights(Integer levelOneWeights) {
this.levelOneWeights = levelOneWeights;
}
public String getLevelTwo() {
return levelTwo;
}
public void setLevelTwo(String levelTwo) {
this.levelTwo = levelTwo;
}
public Integer getLevelTwoWeights() {
return levelTwoWeights;
}
public void setLevelTwoWeights(Integer levelTwoWeights) {
this.levelTwoWeights = levelTwoWeights;
}
public MonitorLevel(LevelOne levelOne, LevelTwo levelTwo) {
this.id = levelOne.weights + levelTwo.weights;
this.level = levelOne.name() + levelTwo.name();
this.levelOne = levelOne.name();
this.levelOneWeights = levelOne.weights;
this.levelTwo = levelTwo.name();
this.levelTwoWeights = levelTwo.weights;
}
/**
* 分层2
*/
public enum LevelTwo {
I(11000000), II(12000000), III(13000000), IV(14000000), V(15000000);
private Integer weights;
LevelTwo(Integer weights) {
this.weights = weights;
}
public Integer getWeights() {
return weights;
}
}
/**
* 分层1
*/
public enum LevelOne {
红色(1100000000), 黄色(1200000000), 蓝色(1300000000);
private Integer weights;
LevelOne(Integer weights) {
this.weights = weights;
}
public Integer getWeights() {
return weights;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
import java.util.List;
public class PageData<T extends Serializable> implements Serializable {
private static final long serialVersionUID = -9171451550170066449L;
private int page;
private int total;
private int totalPage;
private int pageLimit;
private List<T> list;
public int getPage() {
return this.page;
}
public int getTotal() {
return this.total;
}
public int getTotalPage() {
return this.totalPage;
}
public int getPageLimit() {
return this.pageLimit;
}
public List<T> getList() {
return this.list;
}
public void setPage(int page) {
this.page = page;
}
public void setTotal(int total) {
this.total = total;
}
public void setTotalPage(int totalPage) {
this.totalPage = totalPage;
}
public void setPageLimit(int pageLimit) {
this.pageLimit = pageLimit;
}
public void setList(List<T> list) {
this.list = list;
}
public PageData() {
}
public PageData(int page, int total, int totalPage, int pageLimit, List<T> list) {
this.page = page;
this.total = total;
this.totalPage = totalPage;
this.pageLimit = pageLimit;
this.list = list;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.graphs.Keyword;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class QbjcRuleMatchedInfo {
/** 唯一id(使用ruleId) **/
private String id;
/** 项目名 **/
private String project;
/** 规则类型 **/
private RuleType ruleType;
/** 渠道列表 **/
private List<String> channels;
/** 平台列表 **/
private List<String> platforms;
/** 监测等级 **/
private MonitorLevel monitorLevel;
/** 匹配id列表 **/
private Set<String> hitIds;
/** 匹配信息列表 **/
private List<HitInfo> hitInfos;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getProject() {
return project;
}
public void setProject(String project) {
this.project = project;
}
public RuleType getRuleType() {
return ruleType;
}
public void setRuleType(RuleType ruleType) {
this.ruleType = ruleType;
}
public List<String> getChannels() {
return channels;
}
public void setChannels(List<String> channels) {
this.channels = channels;
}
public List<String> getPlatforms() {
return platforms;
}
public void setPlatforms(List<String> platforms) {
this.platforms = platforms;
}
public MonitorLevel getMonitorLevel() {
return monitorLevel;
}
public void setMonitorLevel(MonitorLevel monitorLevel) {
this.monitorLevel = monitorLevel;
}
public Set<String> getHitIds() {
return hitIds;
}
public void setHitIds(Set<String> hitIds) {
this.hitIds = hitIds;
}
public List<HitInfo> getHitInfos() {
return hitInfos;
}
public void setHitInfos(List<HitInfo> hitInfos) {
this.hitInfos = hitInfos;
}
public QbjcRuleMatchedInfo appendhitIds() {
return appendhitIds(id);
}
public QbjcRuleMatchedInfo appendhitIds(String id) {
synchronized (this) {
if (null == hitIds) {
hitIds = new HashSet<>();
hitIds.add(this.id);
}
hitIds.add(id);
return this;
}
}
public static class HitInfo {
/** 命中的Keywords信息 **/
private Set<Keyword> hitKeywords;
/** 完整关键词名 **/
private String fullName;
/** 命中次数(取低值) **/
private int rate;
public HitInfo() {}
public HitInfo(Set<Keyword> hitKeywords, String fullName, int count) {
this.hitKeywords = hitKeywords;
this.fullName = fullName;
this.rate = count;
}
public Set<Keyword> getHitKeywords() {
return hitKeywords;
}
public void setHitKeywords(Set<Keyword> hitKeywords) {
this.hitKeywords = hitKeywords;
}
public String getFullName() {
return fullName;
}
public void setFullName(String fullName) {
this.fullName = fullName;
}
public int getRate() {
return rate;
}
public void setRate(int rate) {
this.rate = rate;
}
}
public enum RuleType {
KEYWORD("关键词"), CHANNEL("渠道");
private String name;
private static RuleType[] values = RuleType.values();
private RuleType(String name) {
this.name = name;
}
public String getName() {
return name;
}
public static RuleType getByName(String name) {
for (RuleType type : values) {
if (type.getName().equals(name)) {
return type;
}
}
return null;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
public enum Status {
START,
RUN,
END,
ERROR;
private Status() {
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.entity.CommonDO;
import java.io.Serializable;
/**
*
* @ClassName: UploadInfo
* @Description: 数据上传实体
* @author SJJ
* @date 2020年2月17日 上午10:19:08
*/
public class UploadInfo implements Serializable {
private static final long serialVersionUID = -1339177542820210256L;
/**
* 原始数据
*/
MarkUploadInfo originData;
/**
* 基础数据
*/
CompoundCommonDO compound;
/**
* 消息类型
*/
TypeB typeB;
/**
* 数据类型
*/
DataType dataType;
/**
* 大库上传结果
*/
Boolean dwResult;
/**
* 标注库上传结果
*/
Boolean markResult;
/**
* 是否异常操作
*/
boolean isError;
/**
* 错误描述
*/
String errorMsg;
public UploadInfo() {
}
public UploadInfo(MarkUploadInfo originData) {
this.originData = originData;
}
public UploadInfo(MarkUploadInfo originData, CompoundCommonDO compound, TypeB typeB) {
this.originData = originData;
this.compound = compound;
this.typeB = typeB;
}
public void setError(String errorMsg) {
this.errorMsg = errorMsg;
this.isError = true;
}
public void removeError() {
this.errorMsg = null;
this.isError = false;
}
public MarkUploadInfo getOriginData() {
return originData;
}
public void setOriginData(MarkUploadInfo originData) {
this.originData = originData;
}
public CompoundCommonDO getCompound() {
return compound;
}
public void setCompound(CompoundCommonDO compound) {
this.compound = compound;
}
public TypeB getTypeB() {
return typeB;
}
public void setTypeB(TypeB typeB) {
this.typeB = typeB;
}
public DataType getDataType() {
return dataType;
}
public void setDataType(DataType dataType) {
this.dataType = dataType;
}
public Boolean getDwResult() {
return dwResult;
}
public void setDwResult(Boolean dwResult) {
this.dwResult = dwResult;
}
public Boolean getMarkResult() {
return markResult;
}
public void setMarkResult(Boolean markResult) {
this.markResult = markResult;
}
public boolean isError() {
return isError;
}
public void setError(boolean error) {
isError = error;
}
public String getErrorMsg() {
return errorMsg;
}
public void setErrorMsg(String errorMsg) {
this.errorMsg = errorMsg;
}
public enum FilterType{
/** 过滤 **/
FILTER,
/** 过滤补全 **/
FILTER_COMPLETE;
}
public enum MtagType{
/** 覆盖历史标签 **/
INDEX,
/** 更新历史标签 **/
UPDATE
}
public enum DataType {
/** 库里暂未匹配到的新数据 **/
EXTERNAL,
/** 舆情库存在 **/
DW,
/** 标注库存在 **/
MARK;
}
public enum UploadType {
/** 格式校验错误 **/
FORMAR_ERROR,
/** 字段格式错误 **/
FIELD_ERROR,
/** 成功上传 **/
SUCCESS,
/** 失败上传 **/
FAILED;
}
public static class CompoundCommonDO implements Serializable {
private static final long serialVersionUID = -657894841924114949L;
CommonDO dw;
CommonDO mark;
public CommonDO getDw() {
return dw;
}
public void setDw(CommonDO dw) {
this.dw = dw;
}
public CommonDO getMark() {
return mark;
}
public void setMark(CommonDO mark) {
this.mark = mark;
}
public CompoundCommonDO() {}
public CompoundCommonDO(CommonDO dw, CommonDO mark) {
this.dw = dw;
this.mark = mark;
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
public class UploadStatus {
int status = 0;
/**
* 共上传数据条数
*/
int totalCount;
/**
* 格式错误数
*/
int formatErrorCount;
/**
* 字段错误数
*/
int fieldErrorCount;
/**
* 成功数
*/
int successCount;
/**
* 失败数
*/
int failedCount;
public UploadStatus() {
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("共上传数据条数:");
sb.append(totalCount);
sb.append(",格式错误数:");
sb.append(formatErrorCount);
sb.append(",字段错误数:");
sb.append(fieldErrorCount);
sb.append(",成功数:");
sb.append(successCount);
sb.append(",失败数:");
sb.append(failedCount);
return sb.toString();
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public int getTotalCount() {
return totalCount;
}
public void setTotalCount(int totalCount) {
this.totalCount = totalCount;
}
public int getFormatErrorCount() {
return formatErrorCount;
}
public void setFormatErrorCount(int formatErrorCount) {
this.formatErrorCount = formatErrorCount;
}
public int getFieldErrorCount() {
return fieldErrorCount;
}
public void setFieldErrorCount(int fieldErrorCount) {
this.fieldErrorCount = fieldErrorCount;
}
public int getSuccessCount() {
return successCount;
}
public void setSuccessCount(int successCount) {
this.successCount = successCount;
}
public int getFailedCount() {
return failedCount;
}
public void setFailedCount(int failedCount) {
this.failedCount = failedCount;
}
}
package com.zhiwei.middleware.automatic.server.pojo.dto;
public class AggreeDTO {
private static final long serialVersionUID = -2649288545116289667L;
private String id;
private String text;
public String getId() {
return this.id;
}
public String getText() {
return this.text;
}
public void setId(String id) {
this.id = id;
}
public void setText(String text) {
this.text = text;
}
public AggreeDTO(String id, String text) {
this.id = id;
this.text = text;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum InsertType {
MARK,
ALL;
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum TemplateStatus {
运行中,
重置中,
已重置,
重置失败,
待删除;
}
package com.zhiwei.middleware.automatic.server;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class Server {
private static final Logger log = LogManager.getLogger(Server.class);
public static void main(String[] args) {
try {
SpringApplication.run(Server.class, args);
} catch (Exception e) {
System.out.println(e.getMessage());
}
log.info("时间:,自动标注中间件启动成功");
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
public interface BaseDataUploadService {
ClassB.TypeB getTypeB();
/**
* 通过文本搜索大库数据
*
* @param info 上传信息
* @return CommonDO
*/
CommonDO searchDwByContentNew(MarkUploadResult info);
/**
* 将上传表格实体转换为数据上传实体
*
* @param info 上传信息
* @return UploadInfo
* @throws Exception
*/
UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info)
throws Exception;
/**
* 构建url查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder urlSearchQuery(MarkUploadResult result);
/**
* 构建文本查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder textSearchQuery(MarkUploadResult result);
/**
* es数据转base实体
* @param hit es数据
* @return base实体
*/
CommonDO getCommonDOBySearchHit(SearchHit hit);
/**
* 标注markInfo转换
* @param result 标注结果
* @param mperson 标注人
* @param group 项目
* @param originMtag 标签
* @return MarkInfo
*/
MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag);
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.functional.*;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import java.util.*;
public class BulkTemplate<T> {
private static final Logger log = LogManager.getLogger(BulkTemplate.class);
private List<T> source;
private String state;
private boolean isNext;
private BoolQueryBuilder queryBuilder;
private Set<String> indexSet;
private Set<ClassB.TypeB> typeSet;
public BulkTemplate(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
public void bulkQuery(EsRowQuery<T> esRowQuery, EsIndex<T> esIndex, DataClassType<T> classType, UploadRowException<T> exception) {
for (T t : source) {
try {
this.queryBuilder.should(esRowQuery.rowQuery(t));
if (Objects.nonNull(esIndex)) {
this.indexSet.add(esIndex.getIndex(t));
}
if (Objects.nonNull(classType)) {
this.typeSet.add(classType.getClassType(t));
}
} catch (Exception e) {
exception.rowException(t, state + "-构建查询条件", e.getMessage());
}
}
this.isNext = true;
if (indexSet.isEmpty()) {
this.isNext = false;
log.error("批量操作-构建查询条件阶段 es索引为空");
}
}
public void searchCallback(Map<String, List<SearchHit>> hitMap, MarkUploadRule rule, RowKey<T> rowKey, DataMerge<T> dataMerge, UploadRowException<T> exception) {
if (!isNext) {
return;
}
for (T t : source) {
try {
String key = rowKey.getRowKey(t);
dataMerge.dataMerge(hitMap.get(key), t, rule);
} catch (Exception e) {
exception.rowException(t, state + "-es数据合并", e.getMessage());
}
}
}
public Set<String> getIndexSet() {
return indexSet;
}
public Set<ClassB.TypeB> getTypeSet() {
return typeSet;
}
public BoolQueryBuilder getQueryBuilder() {
return queryBuilder;
}
public List<T> getSource() {
return source;
}
public void clean(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.ValueFilter;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import java.util.ArrayList;
import java.util.List;
public class DataUploadCommon {
public ClassB.TypeB getTypeB() {
return typeB;
}
public Class<? extends CommonDO> getDwClazz() {
return dwClazz;
}
public Class<? extends CommonDO> getMarkClazz() {
return markClazz;
}
public DubboHandler getDubboHandler() {
return dubboHandler;
}
private final ClassB.TypeB typeB;
private final Class<? extends CommonDO> dwClazz;
private final Class<? extends CommonDO> markClazz;
private final DubboHandler dubboHandler;
public DataUploadCommon(ClassB.TypeB typeB, Class<? extends CommonDO> dwClazz, Class<? extends CommonDO> markClazz,
DubboHandler dubboHandler) {
this.typeB = typeB;
this.dwClazz = dwClazz;
this.markClazz = markClazz;
this.dubboHandler = dubboHandler;
}
/**
* 获取数据类型
*
* @param info
* @return DataType
*/
public final UploadInfo.DataType getDataType(UploadInfo info) throws Exception {
UploadInfo.DataType dataType = UploadInfo.DataType.EXTERNAL;
// 标注库是否存在
if (dubboHandler.contains(info.getCompound().getMark().filterInfo())) {
dataType = UploadInfo.DataType.MARK;
// 舆情库是否存在
} else if (dubboHandler.contains(info.getCompound().getDw().filterInfo())) {
dataType = UploadInfo.DataType.DW;
}
info.setDataType(dataType);
return dataType;
}
/**
* 转换为markCommon
*
* @param dw
* @param mgroup
* @return CommonDO
*/
public final CommonDO convert2Mark(CommonDO dw, String mgroup) {
JSONObject json = dw.toJSON();
json.put(GenericAttribute.ES_M_GROUP, mgroup);
return JSONObject.parseObject(json.toJSONString(), markClazz);
}
public static BoolQueryBuilder urlQuery(String url, String urlName) {
List<String> urls = new ArrayList<>(2);
urls.add(url);
if (url.contains("https:")) {
urls.add(url.replaceFirst("https", "http"));
} else if (url.contains("http")) {
urls.add(url.replaceFirst("http", "https"));
}
if (url.contains("toutiao.com")) {
String pattern = "[\\d]+";
List<String> result = Tools.patternMatchFind(url, pattern);
if (!result.isEmpty()) {
String toutiaoSuffix = result.get(0);
url = "https://www.toutiao.com/a" + toutiaoSuffix;
urls.add(url);
}
}
BoolQueryBuilder bool = QueryBuilders.boolQuery();
urls.forEach(value -> {
bool.should(QueryBuilders.termQuery(urlName, value));
});
return bool;
}
/**
* 补充可能缺失的必要字段
*
* @param commonDO
* @param mperson
* @return CommonDO
*/
public CommonDO addDefault(CommonDO commonDO, String mperson, String mgroup, String originTag,
String mtag, Class<? extends CommonDO> clazz) {
JSONObject json = commonDO.toJSON();
// 未有ctime,cid,cname作补充
if (null == json.get(GenericAttribute.ES_C_TIME)) {
json.put(GenericAttribute.ES_C_TIME, System.currentTimeMillis());
}
Long cid = json.getLong(GenericAttribute.ES_CID);
if (null == cid || -1 == cid) {
json.put(GenericAttribute.ES_CID, GenericAttribute.ES_CID_DEFAULT);
}
if (!json.containsKey(GenericAttribute.ES_C_NAME)) {
json.put(GenericAttribute.ES_C_NAME, GenericAttribute.AUTO_CNAME);
}
json.put(GenericAttribute.ES_M_GROUP, mgroup);
json.put(GenericAttribute.ES_M_PERSON, mperson);
json.put(GenericAttribute.ES_M_TAG, Tools.partialUpdateTag(originTag, mtag));
ValueFilter filter = (o, n, v) -> {
if ("".equals(v)) {
return null;
}
return v;
};
return JSONObject.parseObject(JSON.toJSONString(json, filter), clazz);
}
}
package com.zhiwei.middleware.automatic.server.base;
public class FieldErrorException extends Exception {
private static final long serialVersionUID = 6671756541874479047L;
public FieldErrorException(String msg) {
super(msg);
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class CompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(CompleteTextServiceImpl.class);
private final EsDao esDao;
public CompleteTextServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.COMPLETE, CompleteText.class, CompleteTextMark.class, dubboHandler);
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
CompleteText dw = (CompleteText) info.getDw();
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
try {
CompleteText text = CompleteText.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
} catch (Exception e) {
log.info("debug-esMap:{}", JSONObject.toJSONString(map));
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
CompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), CompleteTextMark.class);
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 重置userId
mark.setUserId(info.getUid());
try {
String[] mupdates = getDubboHandler().getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
CompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), CompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
CompleteTextMark mark = (CompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((CompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), CompleteTextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.mark.IncompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.middleware.automatic.server.util.WeiboMidUrlDealUtil;
import io.micrometer.core.instrument.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.util.Objects;
@Service
public class IncompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(IncompleteTextServiceImpl.class);
private final DubboHandler dubboHandler;
public IncompleteTextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.INCOMPLETE, IncompleteText.class, IncompleteTextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
if (null == info.getMgroup()) {
// TODO 测试打印
log.info("出现mgroup为空数据,data:{}", JSONObject.toJSONString(info));
}
IncompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info),
IncompleteTextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 文本为空-选用标题数据
if (StringUtils.isEmpty(mark.getContent())) {
mark.setContent(info.getTitle());
}
// 粉丝数
if (null != info.getFans()) {
mark.setFollowersNum(Integer.valueOf(info.getFans()));
}
// 还原认证类型
String vtype = info.getAuthenticationType();
// 微博必须要有vtype
if (null != vtype) {
mark.setVtype(restoreVtype(vtype));
}
// 是否原发(默认值:原创)
if (StringUtils.isEmpty(info.getPrimary())) {
mark.setIsForward(false);
} else {
mark.setIsForward(!"原创".equals(info.getPrimary()));
}
// source也为screenName
mark.setScreenName(info.getSource());
// rootSource意为rootScreenName
mark.setRootScreenName(info.getRootSource());
if ("微博".equals(info.getPlatform())) {
// 去重信息需要携带c4
mark.setC4(1020);
if (null == mark.getMid()) {
String mid = WeiboMidUrlDealUtil.urlToMid(mark.getUrl());
if (null == mid) {
throw new FieldErrorException("转换mid出错");
} else {
mark.setMid(mid);
}
}
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
IncompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), IncompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (Objects.nonNull(result.getOriginData().getMid())) {
boolQueryBuilder.should(QueryBuilders.termQuery(GenericAttribute.ES_MID, result.getOriginData().getMid()));
}
return boolQueryBuilder.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
IncompleteTextMark mark = (IncompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
if (Objects.nonNull(mark.getMid())) {
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_MID, mark.getMid()));
}
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((IncompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), IncompleteTextMark.class));
}
/**
*
* 根据微博规则还原认证类型
*
* @param vtypeStr
*
* @return int
*/
private int restoreVtype(String vtypeStr) {
if (null == vtypeStr) {
throw new IllegalArgumentException("微博必须要有vtype!!!");
}
switch (vtypeStr) {
case "未知":
return -2;
case "普通用户":
return -1;
case "名人":
return 0;
case "政府":
return 1;
case "企业":
return 2;
case "媒体":
return 3;
case "校园":
return 4;
case "网站":
return 5;
case "应用":
return 6;
case "团体":
return 7;
case "微博女郎":
return 10;
default:
// 其中 "达人" 对应200和220,返回默认值
// 默认返回-2(未知)
return -2;
}
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.QATextMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
@Service
public class QATextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(QATextServiceImpl.class);
private final DubboHandler dubboHandler;
public QATextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.QA, QAText.class, QATextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
JSONObject json = JSONObject.parseObject(JSONObject.toJSONString(info));
String url = info.getUrl();
String title = info.getTitle();
String content = info.getContent();
String source = info.getSource();
Long time = TimeUtil.TIME_FORMAT.parse(info.getTime()).getTime();
// 论坛数据以questionUrl title为准
json.put("questionTitle", title);
json.put("questionUrl", url);
// 做简单问答判断
if ("www.zhihu.com".equals(Tools.getHost(url)) && !url.contains("answer")) {
json.put("questionTime", time);
json.put("questionUsername", source);
json.put("questionContent", content);
} else {
json.put("answerTime", time);
json.put("answerUrl", url);
json.put("answerUsername",source);
json.put("answerContent", content);
}
QATextMark mark = JSONObject.parseObject(json.toJSONString(), QATextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
QAText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), QAText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder should = QueryBuilders.boolQuery().should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_ANSWER_URL));
return QueryBuilders.boolQuery().must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, result.getOriginData().getSource())).must(should);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
QATextMark mark = (QATextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
BoolQueryBuilder urlQuery = QueryBuilders.boolQuery();
BoolQueryBuilder qaUrl = QueryBuilders.boolQuery()
.must(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.must(QueryBuilders.existsQuery(GenericAttribute.ES_QA_ANSWER_URL));
urlQuery.should(qaUrl);
urlQuery.should(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_ANSWER_URL));
bool.must(urlQuery);
result.setKey(mark.getMgroup() + Tools.urlReplace(result.getOriginData().getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((QATextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), QATextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.VideoMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class VideoServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(VideoServiceImpl.class);
private final DubboHandler dubboHandler;
private final EsDao esDao;
public VideoServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.VIDEO, Video.class, VideoMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
Video dw = (Video) info.getDw();
// 查询条件
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
Video text = Video.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
VideoMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), VideoMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
VideoMark dw = JSONObject.parseObject(mark.toJSON().toJSONString(), VideoMark.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return QueryBuilders.boolQuery().must(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
VideoMark mark = (VideoMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), this.getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((VideoMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), VideoMark.class));
}
}
package com.zhiwei.middleware.automatic.server.config;
import com.zhiwei.es.pojo.Address;
import com.zhiwei.es.util.IndexUtil;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Configuration
public class EsClientConfig {
private static final Logger log = LogManager.getLogger(EsClientConfig.class);
private static final String COLON = ":";
private static final String COMMA = ",";
private final EsProperties esProperties;
public EsClientConfig(EsProperties esProperties) {
this.esProperties = esProperties;
}
@Bean(name = "esIndexes")
public IndexUtil.ESIndexes esIndexes() {
try {
List<Address> addresses = new ArrayList<>();
Assert.hasText(esProperties.getEsClientAddresses(), "Http Cluster nodes source must not be null or empty!");
String[] nodes = StringUtils.delimitedListToStringArray(esProperties.getEsClientAddresses(), COMMA);
Arrays.stream(nodes).forEach(node -> {
String[] segments = StringUtils.delimitedListToStringArray(node, COLON);
Assert.isTrue(segments.length == 2 || segments.length == 4,
() -> String.format("Invalid cluster node %s in %s! Must be in the format host:port or " +
"host:port:username:password!", node, esProperties.getEsClientAddresses()));
String host = segments[0].trim();
String port = segments[1].trim();
Assert.hasText(host, () -> String.format("No host name given cluster node %s!", node));
Assert.hasText(port, () -> String.format("No port given in cluster node %s!", node));
if (segments.length == 2) {
addresses.add(new Address(host, Integer.parseInt(port)));
} else {
String username = segments[2].trim();
String password = segments[3].trim();
Assert.hasText(username, () -> String.format("No username given cluster node %s!", node));
Assert.hasText(password, () -> String.format("No password given in cluster node %s!", node));
addresses.add(new Address(host, Integer.parseInt(port), username, password));
}
});
return IndexUtil.create(addresses);
} catch (Exception e) {
log.error("esIndexes初始化异常", e);
return null;
}
}
@Bean("restHighLevelClient")
public RestHighLevelClient restHighLevelClient() {
return buildRestHighLevelClient(esProperties.getClusterNodes(), esProperties.getUsername(),
esProperties.getPassword());
}
private RestHighLevelClient buildRestHighLevelClient(String clusterNodes, String esUsername, String esPassword) {
List<HttpHost> httpHostList = new ArrayList<>();
try {
Assert.hasText(clusterNodes, "Cluster nodes source must not be null or empty!");
String[] nodes = StringUtils.delimitedListToStringArray(clusterNodes, COMMA);
Arrays.stream(nodes).forEach(node -> {
String[] segments = StringUtils.delimitedListToStringArray(node, COLON);
Assert.isTrue(segments.length == 2,
() -> String.format("Invalid cluster node %s in %s! Must be in the format host:port!", node,
clusterNodes));
String host = segments[0].trim();
String port = segments[1].trim();
Assert.hasText(host, () -> String.format("No host name given cluster node %s!", node));
Assert.hasText(port, () -> String.format("No port given in cluster node %s!", node));
httpHostList.add(new HttpHost(host, Integer.parseInt(port)));
});
HttpHost[] httpHosts = httpHostList.toArray(new HttpHost[httpHostList.size()]);
//判断,如果未配置用户名,则进行无用户名密码连接,配置了用户名,则进行用户名密码连接
if (StringUtils.isEmpty(esUsername)) {
RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(httpHosts));
return client;
} else {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY,
//es账号密码
new UsernamePasswordCredentials(esUsername, esPassword));
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(httpHosts)
.setHttpClientConfigCallback((httpClientBuilder) -> {
//这里可以设置一些参数,比如cookie存储、代理等等
httpClientBuilder.disableAuthCaching();
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
})
);
return client;
}
} catch (Exception e) {
log.error("es client初始化异常", e);
}
return null;
}
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
/**
* @Description:
* @Author: shentao
* @Date: 2020/4/27 13:47
*/
@Component
@ConfigurationProperties(prefix = "es")
public class EsProperties {
/**
* esClientAddresses
*/
private String esClientAddresses;
/**
* httpClusterNodes
*/
private String httpClusterNodes;
/**
* clusterName 集群名
*/
private String clusterName;
/**
* 集群节点s
*/
private String clusterNodes;
private String username;
private String password;
public String getEsClientAddresses() {
return esClientAddresses;
}
public void setEsClientAddresses(String esClientAddresses) {
this.esClientAddresses = esClientAddresses;
}
public String getHttpClusterNodes() {
return httpClusterNodes;
}
public void setHttpClusterNodes(String httpClusterNodes) {
this.httpClusterNodes = httpClusterNodes;
}
public String getClusterName() {
return clusterName;
}
public void setClusterName(String clusterName) {
this.clusterName = clusterName;
}
public String getClusterNodes() {
return clusterNodes;
}
public void setClusterNodes(String clusterNodes) {
this.clusterNodes = clusterNodes;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
}
package com.zhiwei.middleware.automatic.server.config;
public class GenericAttribute {
public static final boolean IS_TEST = false;
public static final String UNIFIED_PREFIX = "dataUpload";
public static final String SOURCE_DATA = "sourceData";
public static final String FORMAT_ERROR_SUFFIX = "formatError";
public static final String FIELD_ERROR_SUFFIX = "fieldError";
public static final String SYSTEM_ERROR_SUFFIX = "fieldError";
public static final String SUCCESS_SUFFIX = "successed";
public static final String FAILED_SUFFIX = "failed";
public static final String STATUS_SUFFIX = "status";
public static final String REDIS_PREFIX = "DATA-COLLECTION";
public static final String SOURCE = "SOURCE";
public static final String STATUS = "STATUS";
public static final String NOISE = "NOISE";
public static final String MAP_SET = "|MAP_SET";
public static final String KEY_SET = "|KEY_SET";
public static final String NOISE_SET = "|NOISE_SET";
public static final String HIT_WORD_RATE = "hitWordAndRate";
public static final double SIMILAR_STANDARD_NOISE = 0.8;
public static final String KEY_INCREMENT = "increment";
public static final String REDIS_QUEUE_ONE_KEY = "autoDataOneQueue";
public static final String REDIS_QUEUE_MULTI_KEY = "autoDataMultiQueue";
public static final String REDIS_MAP_KEY = "autoDataMap";
public static final int REDIS_QUEUE_LIMIT = 1000;
public static final double SIMILAR_STANDARD = 0.7;
public static final String SON_ID = "sonId";
/**
* 修改模板标签最大处理数据的数量
*/
public static final int POINT_SIZE = 100;
public static final String AUTO_PERSON = "自动化机器人";
public static final long AUTO_CID = 100040002;
public static final String LOCK_TEMPLATE_HOUR = "lock:template:hour";
public static final String LOCK_TEMPLATE_DAY = "lock:template:day";
public static final String LOCK_TEMPLATE_NUMBER = "lock:template:number";
public static final String ES_C_TIME = "ctime";
public static final String ES_M_TIME = "mtime";
public static final String ES_CID = "cid";
public static final long ES_CID_DEFAULT = 100040002L;
public static final String ES_C_NAME = "cname";
public static final String AUTO_CNAME = "上传标注补充采集";
public static final String ES_M_GROUP = "mgroup";
public static final String ES_M_PERSON = "mperson";
public static final String ES_M_TAG = "mtag";
public static final String ES_URL = "url";
public static final String ES_MID = "mid";
public static final String ES_QA_QUESTION_URL = "question_url";
public static final String ES_QA_ANSWER_URL = "answer_url";
public static final String ES_SOURCE = "source";
public static final String ES_TITLE = "title";
public static final String ES_CONTENT = "content";
}
package com.zhiwei.middleware.automatic.server.config;
import java.util.List;
import java.util.Set;
public class GlobalPojo {
public static Set<String> BRAND_WORDS;
public static List<String> ALL_GROUP;
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "middleware")
public class MiddlewareProperties {
private String zookeeperAddress;
private String appName;
private String markGroup;
private String filterGroup;
public String getZookeeperAddress() {
return zookeeperAddress;
}
public void setZookeeperAddress(String zookeeperAddress) {
this.zookeeperAddress = zookeeperAddress;
}
public String getAppName() {
return appName;
}
public void setAppName(String appName) {
this.appName = appName;
}
public String getMarkGroup() {
return markGroup;
}
public void setMarkGroup(String markGroup) {
this.markGroup = markGroup;
}
public String getFilterGroup() {
return filterGroup;
}
public void setFilterGroup(String filterGroup) {
this.filterGroup = filterGroup;
}
}
package com.zhiwei.middleware.automatic.server.config;
import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.data.mongodb.MongoDatabaseFactory;
import org.springframework.data.mongodb.SpringDataMongoDB;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.SimpleMongoClientDatabaseFactory;
import org.springframework.data.mongodb.core.convert.DbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultMongoTypeMapper;
import org.springframework.data.mongodb.core.convert.MappingMongoConverter;
import org.springframework.data.mongodb.core.mapping.MongoMappingContext;
import java.util.concurrent.TimeUnit;
/**
* @ClassName
* @Description TODO
* @Author ${"liu-yu"}
* @Date 2022/12/21 18:01
**/
@Configuration
public class MongoConfig {
@Value("${mongo.connectTimeout}")
private int connectTimeout;
@Value("${mongo.maxWaitTime}")
private int maxWaitTime;
@Value("${mongo.dataBaseMarker}")
private String dataBaseMarker;
@Value("${primary.uri.marker}")
private String uriMarker;
@Value("${primary.uri.hangzhou}")
private String uriHangZhou;
@Value("${mongo.hangzhouMarker}")
private String dataBaseHangZhou;
private MongoDatabaseFactory mongoDbMarkerFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriMarker));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseMarker);
}
@Primary
@Bean(name = "markerMongoTemplate")
public MongoTemplate getMongoTemplateMarker() {
MongoDatabaseFactory mongoDbFactory = mongoDbMarkerFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbMarkerFactory(), converter);
}
private MongoDatabaseFactory mongoDbHangZhouFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriHangZhou));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseHangZhou);
}
@Bean(name = "hangzhouMongoTemplate")
public MongoTemplate getMongoTemplateHangZhou() {
MongoDatabaseFactory mongoDbFactory = mongoDbHangZhouFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbHangZhouFactory(), converter);
}
}
package com.zhiwei.middleware.automatic.server.config;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.concurrent.ThreadPoolExecutor;
@Component
public class TaskPoolConfig {
@Bean("autMarkExecutor")
public ThreadPoolTaskExecutor autMarkExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("autoMark-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("asyncExecutor")
public ThreadPoolTaskExecutor asyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(4);
// 配置最大线程数
executor.setMaxPoolSize(5);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("async-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("aggreeNoiseExecutor")
public ThreadPoolTaskExecutor aggreeNoiseExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(32);
// 配置最大线程数
executor.setMaxPoolSize(64);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-noise-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("aggreeExecutor")
public ThreadPoolTaskExecutor aggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeEasyExecutor")
public ThreadPoolTaskExecutor eventAggreeEasyExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(6);
// 配置最大线程数
executor.setMaxPoolSize(8);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-easy-aggree-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeExecutor")
public ThreadPoolTaskExecutor eventAggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(60);
// 配置最大线程数
executor.setMaxPoolSize(100);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
}
package com.zhiwei.middleware.automatic.server.dao;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import java.io.IOException;
import java.util.List;
public interface EsDao {
/**
* es查询
* @param indexes 索引
* @param postFilter 过滤条件
* @param query 查询条件
* @param sort 排序
* @param from 起始下标
* @param size 返回数量
* @param highlighter 高亮词
* @return es数据
* @throws IOException io
*/
SearchHits search(String[] indexes, QueryBuilder postFilter, QueryBuilder query, FieldSortBuilder sort, int from, int size, HighlightBuilder highlighter) throws IOException;
/**
* 深度分页
* @param indexes 索引
* @param size 返回数量
* @return es数据
* @throws IOException io
*/
List<SearchHit> afterSearch(String [] indexes, SearchSourceBuilder searchSourceBuilder, int size) throws IOException;
SearchHits searchHitsByQuery(String[] indexes, BoolQueryBuilder bool) throws IOException;
}
package com.zhiwei.middleware.automatic.server.dao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.data.mongodb.core.query.Query;
import java.util.List;
public interface TemplateRecordDao {
/**
* 根据条件查询模板记录集
* @param query 条件
* @return 记录集
*/
List<TemplateRecord> findTemplateRecord (Query query);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 查询模板记录数量
* @param query 条件
* @return 声量
*/
long count(Query query);
/**
* 根据插件删除模板记录
* @param query 条件
*/
void removeTemplateRecord (Query query);
}
package com.zhiwei.middleware.automatic.server.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@Component
public class EsDaoImpl implements EsDao {
private final RestHighLevelClient esClient;
public EsDaoImpl(RestHighLevelClient esClient) {
this.esClient = esClient;
}
@Override
public SearchHits search(String[] indexes, QueryBuilder postFilter, QueryBuilder query, FieldSortBuilder sort, int from, int size, HighlightBuilder highlighter) throws IOException {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//检索的索引库
searchRequest.indices(indexes);
//过滤条件
searchSourceBuilder.postFilter(postFilter);
//查询条件
searchSourceBuilder.query(query);
//排序
if(null != sort){
searchSourceBuilder.sort(sort);
}
if(from >= 0){
searchSourceBuilder.from(from);
}
if (size >= 0) {
searchSourceBuilder.size(size);
}
if(highlighter != null){
searchSourceBuilder.highlighter(highlighter);
}
searchRequest.source(searchSourceBuilder);
SearchResponse response = esClient.search(searchRequest, RequestOptions.DEFAULT);
return response.getHits();
}
@Override
public List<SearchHit> afterSearch(String [] indexes, SearchSourceBuilder builder, int size) throws IOException {
List<SearchHit> searchHits = new ArrayList<>();
Object [] objects = new Object[]{};
SearchRequest request = new SearchRequest();
request.indices(indexes);
if (size >= 0) {
builder.size(size);
}
builder.sort("_id", SortOrder.ASC);
String scrollId = "";
while (null != scrollId) {
if (objects.length > 0) {
builder.searchAfter(objects);
}
request.source(builder);
SearchResponse response = esClient.search(request, RequestOptions.DEFAULT);
if (1 > response.getHits().getHits().length) {
scrollId = null;
} else {
for (SearchHit hit : response.getHits()) {
searchHits.add(hit);
}
SearchHit[] hits = response.getHits().getHits();
objects = hits[hits.length-1].getSortValues();
}
}
return searchHits;
}
@Override
public SearchHits searchHitsByQuery(String[] indexes, BoolQueryBuilder bool) throws IOException {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchRequest.indices(indexes);
searchSourceBuilder.size(5000);
searchSourceBuilder.query(bool);
searchRequest.source(searchSourceBuilder);
return esClient.search(searchRequest, RequestOptions.DEFAULT).getHits();
}
}
package com.zhiwei.middleware.automatic.server.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
public class TemplateRecordDaoImpl implements TemplateRecordDao {
private final MongoTemplate mongoTemplate;
public TemplateRecordDaoImpl(@Qualifier("markerMongoTemplate") MongoTemplate mongoTemplate) {
this.mongoTemplate = mongoTemplate;
}
@Override
public List<TemplateRecord> findTemplateRecord(Query query) {
return mongoTemplate.find(query, TemplateRecord.class);
}
@Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
mongoTemplate.insert(templateRecord);
}
@Override
public long count(Query query) {
return mongoTemplate.count(query, TemplateRecord.class);
}
@Override
public void removeTemplateRecord(Query query) {
mongoTemplate.remove(query, TemplateRecord.class);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.handle;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.config.MiddlewareProperties;
import com.zhiwei.middleware.automatic.server.util.DataCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.EventCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.cleaner.filter.UnifiedFilterClient;
import com.zhiwei.middleware.mark.service.MarkerClient;
import com.zhiwei.middleware.mark.vo.QueryResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
@Component
public class DubboHandler {
private static final Logger log = LogManager.getLogger(DubboHandler.class);
private final UnifiedFilterClient unifiedFilterClient;
private final MarkerClient markerClient;
public DubboHandler(MiddlewareProperties properties) {
unifiedFilterClient = UnifiedFilterClient.getClient(properties.getAppName(), properties.getZookeeperAddress(),
properties.getFilterGroup());
markerClient = MarkerClient.getService(properties.getZookeeperAddress(), properties.getMarkGroup(), properties.getAppName());
}
public boolean contains(FilterInfo filterInfo) {
return unifiedFilterClient.contains(filterInfo, true);
}
public String[] getMupdates(FilterInfo filterInfo) {
return unifiedFilterClient.getUpdateInfo(filterInfo);
}
public void markUpsert(List<MarkInfo> collect) {
markerClient.upsert(collect);
}
/**
* 批量查询标签结果
*
* @param list
* @return
*/
public Map<String, QueryResult> matchQueryResult(List<FilterInfo> list) {
return markerClient.matchQueryResult(list);
}
/**
* 数据采集标注接口
*
* @return list(重新计算filterInfo后的数据)
*/
public int dataCollectionUpsert(List<JSONObject> list, String mgroup, String mtag, String mperson) {
// 补充必要字段
DataCollectionUtil.supplementForInsert(list, mgroup, mtag, mperson);
List<MarkInfo> markInfoList = MarkInfoUtil.transformToMarkInfo(list);
markerClient.eventCollectionUpsert(markInfoList);
log.info("数据采集-调用标注中间件插入数据{}条", list.size());
return markInfoList.size();
}
/**
* 事件采集标注清洗接口
*
* @param consumers
* @return list(重新计算filterInfo后的数据)
*/
public List<MarkInfo> eventCollectionUpsertWithSupplement(List<JSONObject> list, String mgroup, String mperson) {
// 补充必要字段
EventCollectionUtil.supplementForInsert(list, mgroup, mperson);
List<MarkInfo> markInfoList = MarkInfoUtil.transformToMarkInfo(list);
markerClient.eventCollectionUpsert(markInfoList);
log.info("调用标注中间件插入数据{}条", list.size());
return markInfoList;
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import java.util.List;
import java.util.Map;
public interface AutoMaticService {
void autoMark(List<MarkInfo> markInfos);
void autoMarkMulti(List<MarkInfoMulti> markInfoMultis);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 根据模板标题获取数据(仅最新100条)
*
* @param group 项目
* @param templateTitle 模板标题
* @return 特征值
*/
List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/**
* 根据标题和特征值尝试搜索模板标题
*
* @param group 项目
* @param title 标题
* @param mupdate 特征值
* @return 模板标题
*/
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/**
* 根据项目组和标题在线匹配已有聚合标题
*
* @param project 项目
* @param title 标题
* @return 返回值
*/
public Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB;
import java.util.List;
import java.util.Map;
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group 项目
* @param id id
*/
void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group 项目
* @param id id
*/
void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group 项目
* @param id id
* @param compressedList 数据集
*/
void addDataCollection(String group, String id, List<String> compressedList);
/**
* 启动聚合
*
* @param group 项目
* @param id id
*/
void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group 项目
* @param id id
* @param fatherIds 父级id
* @param mtag 标签
* @param mperson 标注人
* @param typeB typeB
* @return 是否成功
*/
boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @return
*/
boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
int getInsertResultNow(String group, String id);
}
This diff is collapsed. Click to expand it.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment