Commit 401c8e2e by liuyu

2023年3月22日 server - son模式

parent 720a2127
...@@ -3,9 +3,6 @@ package com.zhiwei.middleware.automatic.configuration; ...@@ -3,9 +3,6 @@ package com.zhiwei.middleware.automatic.configuration;
import com.zhiwei.middleware.automatic.server.core.*; import com.zhiwei.middleware.automatic.server.core.*;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService; import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import org.springframework.boot.autoconfigure.AutoConfigureAfter; import org.springframework.boot.autoconfigure.AutoConfigureAfter;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
...@@ -27,28 +24,4 @@ public class AutoMaticClientConfiguration { ...@@ -27,28 +24,4 @@ public class AutoMaticClientConfiguration {
properties.getConsumer())); properties.getConsumer()));
} }
@Bean
@ConditionalOnMissingBean(CommonClient.class)
public CommonClient commonClient(AutoMaticClientConfigurationProperties properties) {
return new CommonClient(AutoMaticClientFactory.createInstance(CommonService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataCollectionClient.class)
public DataCollectionClient dataCollectionClient(AutoMaticClientConfigurationProperties properties) {
return new DataCollectionClient(AutoMaticClientFactory.createInstance(DataCollectionService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
@Bean
@ConditionalOnMissingBean(DataUploadClient.class)
public DataUploadClient dataUploadClient(AutoMaticClientConfigurationProperties properties) {
return new DataUploadClient(AutoMaticClientFactory.createInstance(DataUploadService.class, properties.getApplication(),properties.getRegistry(),
properties.getConsumer()));
}
} }
...@@ -19,19 +19,10 @@ ...@@ -19,19 +19,10 @@
<curator.version>2.12.0</curator.version> <curator.version>2.12.0</curator.version>
<base.version>2.0.0-SNAPSHOT</base.version> <base.version>2.0.0-SNAPSHOT</base.version>
<easyexcel.version>2.1.2</easyexcel.version> <easyexcel.version>2.1.2</easyexcel.version>
<json.version>1.2.58</json.version>
</properties> </properties>
<dependencies> <dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/easyexcel --> <!-- https://mvnrepository.com/artifact/com.alibaba/easyexcel -->
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>
......
package com.zhiwei.middleware.automatic.server.common;
public class GenericAttribute {
public static final boolean IS_TEST = false;
public static final String REDIS_QUEUE_ONE_KEY = "autoDataOneQueue";
public static final String REDIS_QUEUE_MULTI_KEY = "autoDataMultiQueue";
public static final String REDIS_MAP_KEY = "autoDataMap";
public static final double SIMILAR_STANDARD = 0.7;
public static final String SON_ID = "sonId";
/**
* 修改模板标签最大处理数据的数量
*/
public static final int POINT_SIZE = 100;
public static final String AUTO_PERSON = "自动化机器人";
public static final long AUTO_CID = 100040002;
public static final String ES_M_TIME = "mtime";
public static final String AUTO_CNAME = "上传标注补充采集";
public static final String ES_M_PERSON = "mperson";
public static final String ES_M_TAG = "mtag";
public static final String ES_TITLE = "title";
public static final String GROUP_PARAM = "group";
public static final String START_PARAM = "startTime";
public static final String END_PARAM = "endTime";
public static final String TEMPLATE_TITLE = "templateTitle";
public static final String FIX_TAG = "fixTag";
public static final String KEY = "task";
}
...@@ -3,6 +3,7 @@ package com.zhiwei.middleware.automatic.server.core; ...@@ -3,6 +3,7 @@ package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.entity.subclass.mark.MarkInfo; import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService; import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti; import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -23,8 +24,16 @@ public class AutoMaticClient { ...@@ -23,8 +24,16 @@ public class AutoMaticClient {
autoMaticService.autoMarkMulti(infos); autoMaticService.autoMarkMulti(infos);
} }
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) { public void modifyTemplateTitle(String group, String templateTitle, String fixTag) {
return autoMaticService.modifyTemplateTitle(group, templateTitle, fixTag); autoMaticService.modifyTemplateTitle(group, templateTitle, fixTag);
}
public void resetTemplate(String group, String templateTitle) {
autoMaticService.resetTemplate(group, templateTitle);
}
public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) {
return autoMaticService.getTemplateTitleByProject(project);
} }
public List<String> getMupdateByTemplateTitle(String group, String templateTitle) { public List<String> getMupdateByTemplateTitle(String group, String templateTitle) {
...@@ -38,8 +47,4 @@ public class AutoMaticClient { ...@@ -38,8 +47,4 @@ public class AutoMaticClient {
public Map<String, Object> compareWithTemplateTileOL(String project, String title) { public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return autoMaticService.compareWithTemplateTileOL(project, title); return autoMaticService.compareWithTemplateTileOL(project, title);
} }
public boolean resetTemplate(String group, String templateTitle) {
return autoMaticService.resetTemplate(group, templateTitle);
}
} }
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public class CommonClient {
private final CommonService commonService;
public CommonClient(CommonService commonService) {
this.commonService = commonService;
}
public String generateAggreeOrder() {
return commonService.generateAggreeOrder();
}
public boolean appendAggreeOrder(String id, List<AggreeDTO> list) {
return commonService.appendAggreeOrder(id, list);
}
public boolean startAggree(String id) {
return commonService.startAggree(id);
}
public boolean startAggree(String id, double limit) {
return commonService.startAggree(id, limit);
}
public CommonAggreeResult getAggreeResult(String id) {
return commonService.getAggreeResult(id);
}
public CommonAggreeResult getAggreeResult(String id, int page, int pageLimit) {
return commonService.getAggreeResult(id, page, pageLimit);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import java.util.List;
import java.util.Map;
public class DataCollectionClient {
private final DataCollectionService dataCollectionService;
public DataCollectionClient(DataCollectionService dataCollectionService) {
this.dataCollectionService = dataCollectionService;
}
public void cleanCache(String group, String id) {
dataCollectionService.cleanCache(group, id);
}
public void cleanCacheExceptNoise(String group, String id) {
dataCollectionService.cleanCacheExceptNoise(group, id);
}
public void addDataCollection(String group, String id, List<String> compressedlist) {
dataCollectionService.addDataCollection(group, id, compressedlist);
}
public void startAggree(String group, String id, String highWords) {
dataCollectionService.startAggree(group, id, highWords);
}
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.batchModifyFatherTag(group, id, fatherIds, mtag, mperson, typeB);
}
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollectionService.modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB) {
return dataCollectionService.modifySonTag(group, id, fatherId, sonId, mtag, mperson, typeB);
}
public boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.throwIntoNoise(group, id, fatherId, typeB);
}
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB) {
return dataCollectionService.batchThrowIntoNoise(group, id, fatherIds, typeB);
}
public boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollectionService.restoreFromNoise(group, id, fatherId, typeB);
}
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollectionService.getNoiseFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollectionService.getNoiseSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public void checkedThenInsert(String group, String id) {
dataCollectionService.checkedThenInsert(group, id);
}
public int getAggreResultNow(String group, String id) {
return dataCollectionService.getAggreResultNow(group, id);
}
public int getInsertResultNow(String group, String id) {
return dataCollectionService.getInsertResultNow(group, id);
}
}
package com.zhiwei.middleware.automatic.server.core;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public class DataUploadClient {
private final DataUploadService dataUploadService;
public DataUploadClient(DataUploadService dataUploadService) {
this.dataUploadService = dataUploadService;
}
public void addUploadList(String group, String id, String sourceStr) {
dataUploadService.addUploadList(group, id, sourceStr);
}
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType) {
dataUploadService.startUpload(group, id, mperson, mtagType, filterType, projectId, insertType);
}
public Map<String, Object> getUploadStatus(String group, String id) {
return dataUploadService.getUploadStatus(group, id);
}
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType) {
return dataUploadService.getUploadInfoList(group, id, page, size, isAsc, searchField, keyword, uploadType);
}
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
return dataUploadService.getDataType(json, typeB);
}
public void cleanUploadResult(String group, String id) {
dataUploadService.cleanUploadResult(group, id);
}
}
...@@ -2,6 +2,7 @@ package com.zhiwei.middleware.automatic.server.dubbo.service; ...@@ -2,6 +2,7 @@ package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo; import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti; import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -18,7 +19,21 @@ public interface AutoMaticService { ...@@ -18,7 +19,21 @@ public interface AutoMaticService {
* @param templateTitle 模板标题 * @param templateTitle 模板标题
* @param fixTag 正确的标签 * @param fixTag 正确的标签
*/ */
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag); void modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
*/
void resetTemplate (String group, String templateTitle);
/**
* 获取项目文本模板
* @param project 项目
* @return 模板集
*/
Map<String, TemplateTitleVo> getTemplateTitleByProject(String project);
/** /**
* 根据模板标题获取数据(仅最新100条) * 根据模板标题获取数据(仅最新100条)
...@@ -46,13 +61,5 @@ public interface AutoMaticService { ...@@ -46,13 +61,5 @@ public interface AutoMaticService {
* @param title 标题 * @param title 标题
* @return 返回值 * @return 返回值
*/ */
public Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
} }
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB.TypeB;
import java.util.List;
import java.util.Map;
/**
* @ClassName: DataCollectionService
* @Description: 数据采集模块服务
* @author SJJ
* @date 2020年4月7日 下午3:02:05
*/
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group
* @param id
*/
public void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group
* @param id
*/
public void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group
* @param id
*/
public void addDataCollection(String group, String id, List<String> compressedlist);
/**
* 启动聚合
*
* @param group
* @param id
*/
public void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherIds
* @param mtag
* @param mperson
* @param typeB
* @return
*/
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean throwIntoNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
public boolean restoreFromNoise(String group, String id, String fatherId, TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
public void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
public int getInsertResultNow(String group, String id);
}
\ No newline at end of file
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
/**
* @ClassName: DataUploadService
* @Description: 数据上传服务
* @author SJJ
* @date 2020年2月25日 下午6:02:26
*/
public interface DataUploadService {
/**
* 添加源数据集
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*
* @param group
* @param id
* @param mperson
*
* @return void
*/
public void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType);
/**
* 获取上传状态(进度)
*
* @param group
* @param id
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param searchField
* @param keyword
* @param uploadType
* @return
*
* @return Map<String,Object>
*/
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
public UploadInfo.DataType getDataType(JSONObject json, TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
public void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.alibaba.fastjson.JSONObject;
public class AutoTask {
private JSONObject paramSource;
private String type;
public AutoTask() {}
public AutoTask(String type) {
this.type = type;
this.paramSource = new JSONObject();
}
public JSONObject getParamSource() {
return paramSource;
}
public void setParamSource(JSONObject paramSource) {
this.paramSource = paramSource;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
package com.zhiwei.middleware.automatic.server.pojo; package com.zhiwei.middleware.automatic.server.pojo;
import org.springframework.data.mongodb.core.mapping.Document;
/**
* @ClassName
* @Description 模板标注信息记录
* @Author ${"liu-yu"}
* @Date 2022/5/6 17:05
**/
@Document("automaticmark_template_record")
public class TemplateRecord { public class TemplateRecord {
/** /**
* id * id
*/ */
......
package com.zhiwei.middleware.automatic.server.pojo.vo; package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus; import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
...@@ -15,7 +15,7 @@ public class TemplateTitleVo implements Serializable { ...@@ -15,7 +15,7 @@ public class TemplateTitleVo implements Serializable {
private String templateTitle; private String templateTitle;
private Date updateTime; private Date updateTime;
private Date createTime; private Date createTime;
private AtomicLong markSum; private Long markSum;
private String mtag; private String mtag;
private String url; private String url;
private TemplateStatus status; private TemplateStatus status;
...@@ -61,11 +61,11 @@ public class TemplateTitleVo implements Serializable { ...@@ -61,11 +61,11 @@ public class TemplateTitleVo implements Serializable {
this.createTime = createTime; this.createTime = createTime;
} }
public AtomicLong getMarkSum() { public Long getMarkSum() {
return markSum; return markSum;
} }
public void setMarkSum(AtomicLong markSum) { public void setMarkSum(Long markSum) {
this.markSum = markSum; this.markSum = markSum;
} }
...@@ -93,23 +93,18 @@ public class TemplateTitleVo implements Serializable { ...@@ -93,23 +93,18 @@ public class TemplateTitleVo implements Serializable {
this.status = status; this.status = status;
} }
public void emptyNum() {
this.markSum = new AtomicLong(0);
}
public TemplateTitleVo(String templateTitle, String mtag, String url) { public TemplateTitleVo(String templateTitle, String mtag, String url) {
this.updateTime = new Date(); this.updateTime = new Date();
this.createTime = new Date(); this.createTime = new Date();
this.templateTitle = templateTitle; this.templateTitle = templateTitle;
this.markSum = new AtomicLong(); this.markSum = 0L;
this.mtag = mtag; this.mtag = mtag;
this.url = url; this.url = url;
this.status = TemplateStatus.运行中; this.status = TemplateStatus.运行中;
} }
public void refreshMark() { public void refreshMark() {
this.getMarkSum().getAndIncrement();
this.updateTime = new Date(); this.updateTime = new Date();
} }
} }
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum TaskType {
COMMON_ONE("common_one","common", "commonCache"),
COMMON_TWO("common_two","common", "commonCache"),
TEMPLATE("template", "template", ""),
TEMPLATE_MODIFY("template_modify","template", ""),
TEMPLATE_RESET("template_reset","template", "");
final String type;
final String name;
final String cacheId;
TaskType(String type, String name, String cacheId) {
this.type = type;
this.name = name;
this.cacheId = cacheId;
}
public String getName() {
return this.name;
}
public String getType() {
return this.type;
}
public String getCacheId() {
return cacheId;
}
public static TaskType create(String type) {
for (TaskType taskType : TaskType.values()) {
if (taskType.type.equals(type)) {
return taskType;
}
}
return null;
}
}
...@@ -18,47 +18,36 @@ ...@@ -18,47 +18,36 @@
<json.version>1.2.47</json.version> <json.version>1.2.47</json.version>
<push-log.version>2.17.0-SNAPSHOT</push-log.version> <push-log.version>2.17.0-SNAPSHOT</push-log.version>
<curator.version>2.12.0</curator.version> <curator.version>2.12.0</curator.version>
<es.version>7.9.2</es.version>
<es-client.version>0.0.4-SNAPSHOT</es-client.version>
<filter.version>1.1.6-SNAPSHOT</filter.version>
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version> <qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version> <nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
<dubbo-server.version>2.7.4.1</dubbo-server.version> <dubbo-server.version>2.7.4.1</dubbo-server.version>
<automatic.version>1.0-SNAPSHOT</automatic.version> <automatic.version>1.0-SNAPSHOT</automatic.version>
<base.version>2.0.0-SNAPSHOT</base.version> <base.version>2.0.0-SNAPSHOT</base.version>
<marker.version>1.2.3-SNAPSHOT</marker.version>
<kafka.version>2.4.1.RELEASE</kafka.version>
</properties> </properties>
<dependencies> <dependencies>
<!-- kafka -->
<dependency> <dependency>
<groupId>org.springframework.kafka</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-kafka</artifactId> <artifactId>spring-boot-starter-data-mongodb</artifactId>
<version>${kafka.version}</version> <version>${spring-boot.version}</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>org.springframework</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-context</artifactId> <artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion> </exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId>
<version>1.3.7-SNAPSHOT</version>
</dependency>
<!-- 标注客户端 -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>marker-client</artifactId>
<version>${marker.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei.base</groupId> <groupId>com.zhiwei.base</groupId>
<artifactId>base-objects-application</artifactId> <artifactId>base-objects-application</artifactId>
<version>${base.version}</version> <version>${base.version}</version>
...@@ -97,63 +86,12 @@ ...@@ -97,63 +86,12 @@
<dependency> <dependency>
<groupId>com.zhiwei.nlp</groupId>
<artifactId>nlp-aggree</artifactId>
<version>${nlp-aggree.version}</version>
</dependency>
<!-- 日志依赖使用crawler-filter -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-filter</artifactId>
<version>${filter.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>qbjc-bean</artifactId> <artifactId>qbjc-bean</artifactId>
<version>${qbjc-bean.version}</version> <version>${qbjc-bean.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>es-client</artifactId>
<version>${es-client.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
<version>${spring-boot.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId> <groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId> <artifactId>curator-recipes</artifactId>
<version>${curator.version}</version> <version>${curator.version}</version>
......
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
public interface BaseDataUploadService {
ClassB.TypeB getTypeB();
/**
* 通过文本搜索大库数据
*
* @param info 上传信息
* @return CommonDO
*/
CommonDO searchDwByContentNew(MarkUploadResult info);
/**
* 将上传表格实体转换为数据上传实体
*
* @param info 上传信息
* @return UploadInfo
* @throws Exception
*/
UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info)
throws Exception;
/**
* 构建url查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder urlSearchQuery(MarkUploadResult result);
/**
* 构建文本查询条件
* @param result 标注信息
* @return BoolQueryBuilder
*/
BoolQueryBuilder textSearchQuery(MarkUploadResult result);
/**
* es数据转base实体
* @param hit es数据
* @return base实体
*/
CommonDO getCommonDOBySearchHit(SearchHit hit);
/**
* 标注markInfo转换
* @param result 标注结果
* @param mperson 标注人
* @param group 项目
* @param originMtag 标签
* @return MarkInfo
*/
MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag);
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.functional.*;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import java.util.*;
public class BulkTemplate<T> {
private static final Logger log = LogManager.getLogger(BulkTemplate.class);
private List<T> source;
private String state;
private boolean isNext;
private BoolQueryBuilder queryBuilder;
private Set<String> indexSet;
private Set<ClassB.TypeB> typeSet;
public BulkTemplate(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
public void bulkQuery(EsRowQuery<T> esRowQuery, EsIndex<T> esIndex, DataClassType<T> classType, UploadRowException<T> exception) {
for (T t : source) {
try {
this.queryBuilder.should(esRowQuery.rowQuery(t));
if (Objects.nonNull(esIndex)) {
this.indexSet.add(esIndex.getIndex(t));
}
if (Objects.nonNull(classType)) {
this.typeSet.add(classType.getClassType(t));
}
} catch (Exception e) {
exception.rowException(t, state + "-构建查询条件", e.getMessage());
}
}
this.isNext = true;
if (indexSet.isEmpty()) {
this.isNext = false;
log.error("批量操作-构建查询条件阶段 es索引为空");
}
}
public void searchCallback(Map<String, List<SearchHit>> hitMap, MarkUploadRule rule, RowKey<T> rowKey, DataMerge<T> dataMerge, UploadRowException<T> exception) {
if (!isNext) {
return;
}
for (T t : source) {
try {
String key = rowKey.getRowKey(t);
dataMerge.dataMerge(hitMap.get(key), t, rule);
} catch (Exception e) {
exception.rowException(t, state + "-es数据合并", e.getMessage());
}
}
}
public Set<String> getIndexSet() {
return indexSet;
}
public Set<ClassB.TypeB> getTypeSet() {
return typeSet;
}
public BoolQueryBuilder getQueryBuilder() {
return queryBuilder;
}
public List<T> getSource() {
return source;
}
public void clean(List<T> source, String state) {
this.source = source;
this.state = state;
this.isNext = false;
this.indexSet = new HashSet<>();
this.typeSet = new HashSet<>();
this.queryBuilder = QueryBuilders.boolQuery();
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.ValueFilter;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import java.util.ArrayList;
import java.util.List;
public class DataUploadCommon {
public ClassB.TypeB getTypeB() {
return typeB;
}
public Class<? extends CommonDO> getDwClazz() {
return dwClazz;
}
public Class<? extends CommonDO> getMarkClazz() {
return markClazz;
}
public DubboHandler getDubboHandler() {
return dubboHandler;
}
private final ClassB.TypeB typeB;
private final Class<? extends CommonDO> dwClazz;
private final Class<? extends CommonDO> markClazz;
private final DubboHandler dubboHandler;
public DataUploadCommon(ClassB.TypeB typeB, Class<? extends CommonDO> dwClazz, Class<? extends CommonDO> markClazz,
DubboHandler dubboHandler) {
this.typeB = typeB;
this.dwClazz = dwClazz;
this.markClazz = markClazz;
this.dubboHandler = dubboHandler;
}
/**
* 获取数据类型
*
* @param info
* @return DataType
*/
public final UploadInfo.DataType getDataType(UploadInfo info) throws Exception {
UploadInfo.DataType dataType = UploadInfo.DataType.EXTERNAL;
// 标注库是否存在
if (dubboHandler.contains(info.getCompound().getMark().filterInfo())) {
dataType = UploadInfo.DataType.MARK;
// 舆情库是否存在
} else if (dubboHandler.contains(info.getCompound().getDw().filterInfo())) {
dataType = UploadInfo.DataType.DW;
}
info.setDataType(dataType);
return dataType;
}
/**
* 转换为markCommon
*
* @param dw
* @param mgroup
* @return CommonDO
*/
public final CommonDO convert2Mark(CommonDO dw, String mgroup) {
JSONObject json = dw.toJSON();
json.put(GenericAttribute.ES_M_GROUP, mgroup);
return JSONObject.parseObject(json.toJSONString(), markClazz);
}
public static BoolQueryBuilder urlQuery(String url, String urlName) {
List<String> urls = new ArrayList<>(2);
urls.add(url);
if (url.contains("https:")) {
urls.add(url.replaceFirst("https", "http"));
} else if (url.contains("http")) {
urls.add(url.replaceFirst("http", "https"));
}
if (url.contains("toutiao.com")) {
String pattern = "[\\d]+";
List<String> result = Tools.patternMatchFind(url, pattern);
if (!result.isEmpty()) {
String toutiaoSuffix = result.get(0);
url = "https://www.toutiao.com/a" + toutiaoSuffix;
urls.add(url);
}
}
BoolQueryBuilder bool = QueryBuilders.boolQuery();
urls.forEach(value -> {
bool.should(QueryBuilders.termQuery(urlName, value));
});
return bool;
}
/**
* 补充可能缺失的必要字段
*
* @param commonDO
* @param mperson
* @return CommonDO
*/
public CommonDO addDefault(CommonDO commonDO, String mperson, String mgroup, String originTag,
String mtag, Class<? extends CommonDO> clazz) {
JSONObject json = commonDO.toJSON();
// 未有ctime,cid,cname作补充
if (null == json.get(GenericAttribute.ES_C_TIME)) {
json.put(GenericAttribute.ES_C_TIME, System.currentTimeMillis());
}
Long cid = json.getLong(GenericAttribute.ES_CID);
if (null == cid || -1 == cid) {
json.put(GenericAttribute.ES_CID, GenericAttribute.ES_CID_DEFAULT);
}
if (!json.containsKey(GenericAttribute.ES_C_NAME)) {
json.put(GenericAttribute.ES_C_NAME, GenericAttribute.AUTO_CNAME);
}
json.put(GenericAttribute.ES_M_GROUP, mgroup);
json.put(GenericAttribute.ES_M_PERSON, mperson);
json.put(GenericAttribute.ES_M_TAG, Tools.partialUpdateTag(originTag, mtag));
ValueFilter filter = (o, n, v) -> {
if ("".equals(v)) {
return null;
}
return v;
};
return JSONObject.parseObject(JSON.toJSONString(json, filter), clazz);
}
}
package com.zhiwei.middleware.automatic.server.base;
public class FieldErrorException extends Exception {
private static final long serialVersionUID = 6671756541874479047L;
public FieldErrorException(String msg) {
super(msg);
}
}
package com.zhiwei.middleware.automatic.server.base;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.category.ClassCodec;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.functional.FunctionalImpl;
import com.zhiwei.middleware.automatic.server.listener.BaseServiceContext;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.text.ParseException;
import java.util.*;
import java.util.stream.Collectors;
/**
* 上传模板 获取转换的数据
*/
@Service
public class MarkCommonTemplate extends FunctionalImpl {
private static final Logger log = LogManager.getLogger(MarkCommonTemplate.class);
protected final DubboHandler dubboHandler;
private final EsDao esDao;
public MarkCommonTemplate(DubboHandler dubboHandler, EsDao esDao) {
this.dubboHandler = dubboHandler;
this.esDao = esDao;
}
/**
* 标注上传数据 转换上传实体
* @param infos 上传数据
* @return 上传实体
*/
public List<MarkUploadResult> dataTransform(List<MarkUploadInfo> infos) {
List<MarkUploadResult> result = new ArrayList<>();
for (MarkUploadInfo info : infos) {
ClassB.TypeB typeB = getInfoTypeB(info);
MarkUploadResult uploadResult = new MarkUploadResult(info);
if (Objects.isNull(typeB)) {
uploadResult.setInfo(GenericAttribute.FORMAT_ERROR_SUFFIX, "解析用户上传c2出错");
result.add(uploadResult);
continue;
}
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(typeB);
uploadResult.setTypeB(typeB);
UploadInfo uploadInfo = setTransformInfo(dataUploadService, uploadResult, info);
if (Objects.nonNull(uploadInfo)) {
uploadResult.setCommonDO(uploadInfo.getCompound().getDw(), uploadInfo.getCompound().getMark());
}
result.add(uploadResult);
}
return result;
}
/**
* 批量文本搜索
* @param template 批量操作模板
*/
public void textSearch(BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
if (template.getSource().isEmpty()) {
return;
}
try {
template.bulkQuery(this::urlSearchQuery, this::getDwIndex, this::getTypeB, this::rowException);
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), template.getTypeSet()), template.getQueryBuilder());
Map<String, List<SearchHit>> urlGroup = Arrays.stream(searchHits.getHits()).collect(Collectors.groupingBy(hit -> {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String key = Objects.nonNull(sourceAsMap.get("answer_url")) ? String.valueOf(sourceAsMap.get("answer_url"))
: Objects.nonNull(sourceAsMap.get("question_url")) ? String.valueOf(sourceAsMap.get("question_url")) : String.valueOf(sourceAsMap.get("url"));
return Tools.urlReplace(key);
}, Collectors.toList()));
template.searchCallback(urlGroup, rule, this::getTextSearchRowKey, this::searchHitMerge, this::rowException);
} catch (IOException e) {
log.error("标注上传-批量url查询出错:", e);
}
}
/**
* 数据类型设置
* @param results 上传实体
*/
public void uploadType(List<MarkUploadResult> results) {
for (MarkUploadResult result : results) {
if (!result.isSuccess()) {
continue;
}
//判重
try {
result.setDataType(getDataType(result.getMark(), result.getDw()));
} catch (Exception e) {
log.error("数据url:{},判重失败:", result.getOriginData().getUrl(), e);
result.setInfo(GenericAttribute.FIELD_ERROR_SUFFIX, "判重失败:" + e.getMessage());
}
}
}
/**
* 二次搜索
* @param results 上传实体
* @param bulkTemplate 批量操作模板
*/
public void secondarySearch(List<MarkUploadResult> results, BulkTemplate<MarkUploadResult> bulkTemplate, MarkUploadRule rule) {
try {
Map<UploadInfo.DataType, List<MarkUploadResult>> resGroup = results.stream().collect(Collectors.groupingBy(MarkUploadResult::getDataType, Collectors.toList()));
Set<UploadInfo.DataType> dataTypes = resGroup.keySet();
for (UploadInfo.DataType dataType : dataTypes) {
switch (dataType) {
case MARK:
markHandle(resGroup.get(dataType), bulkTemplate, rule);
break;
case DW:
dwHandle(resGroup.get(dataType), bulkTemplate, rule);
break;
case EXTERNAL:
externalHandle(resGroup.get(dataType));
break;
default:break;
}
}
} catch (Exception e) {
log.error("标注上传-批量二次搜索查询出错:", e);
}
}
private UploadInfo setTransformInfo(BaseDataUploadService dataUploadService , MarkUploadResult result, MarkUploadInfo info) {
try {
UploadInfo uploadInfo = dataUploadService.parseMarkUploadInfo2UploadInfo(info);
// 表示不需要往后继续清洗
result.setSuccess(true);
return uploadInfo;
} catch (FieldErrorException e) {
// 设置为格式解析错误
result.setInfo(GenericAttribute.FORMAT_ERROR_SUFFIX, e.getMessage());
} catch (Exception e) {
//设置为字段错误
result.setInfo(GenericAttribute.FIELD_ERROR_SUFFIX, e.getMessage());
}
log.error("上传数据转标注数据失败,数据url:{}, 错误信息:{}, {}", result.getOriginData().getUrl(), result.getInfoType(), result.getMessage());
return null;
}
private void dwHandle(List<MarkUploadResult> res, BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
template.clean(res, "大库url查询");
if (template.getSource().isEmpty()) {
return;
}
try {
template.bulkQuery(this::urlSearchQuery, this::getDwIndex, this::getTypeB, this::rowException);
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), template.getTypeSet()), template.getQueryBuilder());
Map<String, List<SearchHit>> urlGroup = Arrays.stream(searchHits.getHits()).collect(Collectors.groupingBy(hit -> {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String key = Objects.nonNull(sourceAsMap.get("answer_url")) ? String.valueOf(sourceAsMap.get("answer_url"))
: Objects.nonNull(sourceAsMap.get("question_url")) ? String.valueOf(sourceAsMap.get("question_url")) : String.valueOf(sourceAsMap.get("url"));
return Tools.urlReplace(key);
}, Collectors.toList()));
template.searchCallback(urlGroup, rule, this::getTextSearchRowKey, this::dwSearchHitMerge, this::rowException);
} catch (IOException e) {
log.error("标注上传-批量url查询出错:", e);
}
}
private void markHandle(List<MarkUploadResult> res, BulkTemplate<MarkUploadResult> template, MarkUploadRule rule) {
template.clean(res, "标注库url查询");
if (template.getSource().isEmpty()) {
return;
}
template.bulkQuery(this::textSearchQuery, this::getMarkIndex, null, this::rowException);
try {
SearchHits searchHits = esDao.searchHitsByQuery(TimeUtil.getEsIndex(template.getIndexSet(), null), template.getQueryBuilder());
Map<String, List<SearchHit>> hitGroup = new HashMap<>();
for (SearchHit hit : searchHits) {
Map<String, Object> sourceAsMap = hit.getSourceAsMap();
String group = String.valueOf(sourceAsMap.get("mgroup"));
Object url = sourceAsMap.get("url");
Object questionUrl = sourceAsMap.get("question_url");
Object answerUrl = sourceAsMap.get("answer_url");
if (Objects.nonNull(url)) {
String key = group + Tools.urlReplace(String.valueOf(url));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
if (Objects.nonNull(questionUrl)) {
String key = group + Tools.urlReplace(String.valueOf(questionUrl));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
if (Objects.nonNull(answerUrl)) {
String key = group + Tools.urlReplace(String.valueOf(answerUrl));
hitGroup.putIfAbsent(key, new ArrayList<>());
hitGroup.get(key).add(hit);
}
}
template.searchCallback(hitGroup, rule, this::markHandleRowKey, this::markSearchHitMerge, this::rowException);
} catch (IOException e) {
log.error("大库文本搜索失败:", e);
}
}
private void externalHandle(List<MarkUploadResult> res) {
for (MarkUploadResult result : res) {
try {
// 补全C1-C5类型字段
CommonDO wholeMark = DataUploadUtil.defaultCTypeAll(result.getMark(), result.getOriginData());
result.setMark(wholeMark);
} catch (Exception e) {
log.error("externalHandle-", e);
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "externalHandle处理异常");
}
}
}
/**
* 获取消息类型
*
* @param info 标注上传实体
* @return TypeB
*/
private ClassB.TypeB getInfoTypeB(MarkUploadInfo info) {
Integer c2 = info.getC2();
try {
return null == c2 || 0 == c2 ? selfAdaptionTypeB(info.getPlatform(), info.getClientFrom(), info.getTime(), info.getUrl())
: ClassB.TypeB.fromEncode(c2);
} catch (Exception e) {
log.error(e);
return null;
}
}
/**
* 获取typeB
* @param platform 平台
* @param source 搜索
* @param timeStr 时间
* @param url url
* @return typeB
* @throws ParseException 时间格式转换异常
* @throws IOException es查询异常
*/
private ClassB.TypeB selfAdaptionTypeB(String platform, String source, String timeStr, String url) throws ParseException, IOException {
if (StringUtils.isBlank(platform) || "未知".equals(platform)) {
SearchHits search = esDao.search(TimeUtil.getWholeIndexInMonth(TimeUtil.TIME_FORMAT.parse(timeStr).getTime()),
QueryBuilders.termQuery(GenericAttribute.ES_URL, url), null, null, 0, 1, null);
if (0 != search.getTotalHits().value) {
return ClassB.TypeB.fromEncode(Integer.parseInt(search.getAt(0).getSourceAsMap().get("c2") + ""));
}
return null;
}
return ClassCodec.decodeClassB(DataUploadUtil.getEndoceByPlatformAndSource(platform, source)).typeB();
}
/**
* 获取数据源类型
* @param markDO 标注实体
* @param dwDo 大库实体
* @return dataType
*/
private UploadInfo.DataType getDataType(CommonDO markDO, CommonDO dwDo) {
UploadInfo.DataType dataType = UploadInfo.DataType.EXTERNAL;
// 标注库是否存在
if (Objects.nonNull(markDO) && dubboHandler.contains(markDO.filterInfo())) {
dataType = UploadInfo.DataType.MARK;
// 舆情库是否存在
} else if (Objects.nonNull(dwDo) && dubboHandler.contains(dwDo.filterInfo())) {
dataType = UploadInfo.DataType.DW;
}
return dataType;
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class CompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(CompleteTextServiceImpl.class);
private final EsDao esDao;
public CompleteTextServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.COMPLETE, CompleteText.class, CompleteTextMark.class, dubboHandler);
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
CompleteText dw = (CompleteText) info.getDw();
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
try {
CompleteText text = CompleteText.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
} catch (Exception e) {
log.info("debug-esMap:{}", JSONObject.toJSONString(map));
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
CompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), CompleteTextMark.class);
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 重置userId
mark.setUserId(info.getUid());
try {
String[] mupdates = getDubboHandler().getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
CompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), CompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
CompleteTextMark mark = (CompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((CompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), CompleteTextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.mark.IncompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.middleware.automatic.server.util.WeiboMidUrlDealUtil;
import io.micrometer.core.instrument.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.util.Objects;
@Service
public class IncompleteTextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(IncompleteTextServiceImpl.class);
private final DubboHandler dubboHandler;
public IncompleteTextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.INCOMPLETE, IncompleteText.class, IncompleteTextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
if (null == info.getMgroup()) {
// TODO 测试打印
log.info("出现mgroup为空数据,data:{}", JSONObject.toJSONString(info));
}
IncompleteTextMark mark = JSONObject.parseObject(JSONObject.toJSONString(info),
IncompleteTextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
// 文本为空-选用标题数据
if (StringUtils.isEmpty(mark.getContent())) {
mark.setContent(info.getTitle());
}
// 粉丝数
if (null != info.getFans()) {
mark.setFollowersNum(Integer.valueOf(info.getFans()));
}
// 还原认证类型
String vtype = info.getAuthenticationType();
// 微博必须要有vtype
if (null != vtype) {
mark.setVtype(restoreVtype(vtype));
}
// 是否原发(默认值:原创)
if (StringUtils.isEmpty(info.getPrimary())) {
mark.setIsForward(false);
} else {
mark.setIsForward(!"原创".equals(info.getPrimary()));
}
// source也为screenName
mark.setScreenName(info.getSource());
// rootSource意为rootScreenName
mark.setRootScreenName(info.getRootSource());
if ("微博".equals(info.getPlatform())) {
// 去重信息需要携带c4
mark.setC4(1020);
if (null == mark.getMid()) {
String mid = WeiboMidUrlDealUtil.urlToMid(mark.getUrl());
if (null == mid) {
throw new FieldErrorException("转换mid出错");
} else {
mark.setMid(mid);
}
}
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
IncompleteText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), IncompleteText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
if (Objects.nonNull(result.getOriginData().getMid())) {
boolQueryBuilder.should(QueryBuilders.termQuery(GenericAttribute.ES_MID, result.getOriginData().getMid()));
}
return boolQueryBuilder.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
IncompleteTextMark mark = (IncompleteTextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
if (Objects.nonNull(mark.getMid())) {
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_MID, mark.getMid()));
}
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((IncompleteTextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), IncompleteTextMark.class));
}
/**
*
* 根据微博规则还原认证类型
*
* @param vtypeStr
*
* @return int
*/
private int restoreVtype(String vtypeStr) {
if (null == vtypeStr) {
throw new IllegalArgumentException("微博必须要有vtype!!!");
}
switch (vtypeStr) {
case "未知":
return -2;
case "普通用户":
return -1;
case "名人":
return 0;
case "政府":
return 1;
case "企业":
return 2;
case "媒体":
return 3;
case "校园":
return 4;
case "网站":
return 5;
case "应用":
return 6;
case "团体":
return 7;
case "微博女郎":
return 10;
default:
// 其中 "达人" 对应200和220,返回默认值
// 默认返回-2(未知)
return -2;
}
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.QATextMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
@Service
public class QATextServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(QATextServiceImpl.class);
private final DubboHandler dubboHandler;
public QATextServiceImpl(DubboHandler dubboHandler) {
super(ClassB.TypeB.QA, QAText.class, QATextMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
return null;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
JSONObject json = JSONObject.parseObject(JSONObject.toJSONString(info));
String url = info.getUrl();
String title = info.getTitle();
String content = info.getContent();
String source = info.getSource();
Long time = TimeUtil.TIME_FORMAT.parse(info.getTime()).getTime();
// 论坛数据以questionUrl title为准
json.put("questionTitle", title);
json.put("questionUrl", url);
// 做简单问答判断
if ("www.zhihu.com".equals(Tools.getHost(url)) && !url.contains("answer")) {
json.put("questionTime", time);
json.put("questionUsername", source);
json.put("questionContent", content);
} else {
json.put("answerTime", time);
json.put("answerUrl", url);
json.put("answerUsername",source);
json.put("answerContent", content);
}
QATextMark mark = JSONObject.parseObject(json.toJSONString(), QATextMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
QAText dw = JSONObject.parseObject(mark.toJSON().toJSONString(), QAText.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
BoolQueryBuilder should = QueryBuilders.boolQuery().should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.should(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_QA_ANSWER_URL));
return QueryBuilders.boolQuery().must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, result.getOriginData().getSource())).must(should);
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
QATextMark mark = (QATextMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
BoolQueryBuilder urlQuery = QueryBuilders.boolQuery();
BoolQueryBuilder qaUrl = QueryBuilders.boolQuery()
.must(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_QUESTION_URL))
.must(QueryBuilders.existsQuery(GenericAttribute.ES_QA_ANSWER_URL));
urlQuery.should(qaUrl);
urlQuery.should(urlQuery(mark.getQuestionUrl(), GenericAttribute.ES_QA_ANSWER_URL));
bool.must(urlQuery);
result.setKey(mark.getMgroup() + Tools.urlReplace(result.getOriginData().getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((QATextMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), QATextMark.class));
}
}
package com.zhiwei.middleware.automatic.server.base.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.entity.subclass.mark.VideoMark;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.base.FieldErrorException;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.util.DataUploadUtil;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class VideoServiceImpl extends DataUploadCommon implements BaseDataUploadService {
private static final Logger log = LogManager.getLogger(VideoServiceImpl.class);
private final DubboHandler dubboHandler;
private final EsDao esDao;
public VideoServiceImpl(DubboHandler dubboHandler, EsDao esDao) {
super(ClassB.TypeB.VIDEO, Video.class, VideoMark.class, dubboHandler);
this.dubboHandler = dubboHandler;
this.esDao = esDao;
}
@Override
public CommonDO searchDwByContentNew(MarkUploadResult info) {
CommonDO res = null;
// 还原数据
Video dw = (Video) info.getDw();
// 查询条件
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_SOURCE, dw.getSource()));
// 文本去重需要的精确到分的时间以及host
String ruleTime = TimeUtil.CONTENT_DF.format(dw.getTime());
String host = Tools.getHost(dw.getUrl());
List<Map<String, Object>> allResults = null;
try {
allResults = Arrays.stream(esDao.search(TimeUtil.getAccurateIndex(dw.getTime(), getTypeB(), false), bool, null, null, 0, 1000, null).getHits())
.map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} catch (IOException e) {
log.error("es文本搜索失败:", e);
return res;
}
boolean matched = false;
for (Map<String, Object> map : allResults) {
Video text = Video.restoreFromEs(map);
// 任一条件不匹配
if (ruleTime.equals(TimeUtil.CONTENT_DF.format(text.getTime())) && host.equals(Tools.getHost(text.getUrl()))) {
matched = true;
res = text;
break;
}
}
if (!matched) {
// 文本匹配任未找到
log.info("文本匹配任未找到!title:{},source:{},time:{},host:{}", dw.getTitle(), dw.getSource(), ruleTime, host);
}
return res;
}
@Override
public UploadInfo parseMarkUploadInfo2UploadInfo(MarkUploadInfo info) throws Exception {
VideoMark mark = JSONObject.parseObject(JSONObject.toJSONString(info), VideoMark.class);
// 若没有c1-c5字段则自动补全
if (null == mark.getC5()) {
DataUploadUtil.defaultCTypeAll(mark, info);
}
if (!Tools.isLegalTime(mark.getTime())) {
throw new FieldErrorException("time字段不符合规则");
}
try {
String[] mupdates = dubboHandler.getMupdates(mark.filterInfo());
// 设置标注特征字段
mark.setMupdate(mupdates[0]);
if (mupdates.length == 2) {
mark.setMupdateTwo(mupdates[1]);
}
} catch (Exception e) {
log.error("parseMarkUploadInfo2UploadInfo-getMupdates",e);
throw new FieldErrorException(e.getMessage());
}
VideoMark dw = JSONObject.parseObject(mark.toJSON().toJSONString(), VideoMark.class);
return new UploadInfo(info, new UploadInfo.CompoundCommonDO(dw, mark), getTypeB());
}
@Override
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return QueryBuilders.boolQuery().must(urlQuery(result.getOriginData().getUrl(), GenericAttribute.ES_URL));
}
@Override
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
// 还原数据
VideoMark mark = (VideoMark) result.getMark();
BoolQueryBuilder bool = QueryBuilders.boolQuery();
bool.must(QueryBuilders.termQuery(GenericAttribute.ES_M_GROUP, mark.getMgroup()));
bool.must(urlQuery(mark.getUrl(), GenericAttribute.ES_URL));
result.setKey(mark.getMgroup() + Tools.urlReplace(mark.getUrl()));
return bool;
}
@Override
public CommonDO getCommonDOBySearchHit(SearchHit hit) {
return CommonDO.restoreFromEs(hit.getSourceAsMap(), this.getDwClazz());
}
@Override
public MarkInfo toMarkInfoNew(MarkUploadResult result, String mperson, String group, String... originMtag) {
String originTag = originMtag.length > 0 ? originMtag[0] : null;
return new MarkInfo((VideoMark) addDefault(result.getMark(), mperson, group,
originTag, result.getOriginData().getMtag(), VideoMark.class));
}
}
package com.zhiwei.middleware.automatic.server.config;
public class GenericAttribute {
public static final boolean IS_TEST = false;
public static final String UNIFIED_PREFIX = "dataUpload";
public static final String SOURCE_DATA = "sourceData";
public static final String FORMAT_ERROR_SUFFIX = "formatError";
public static final String FIELD_ERROR_SUFFIX = "fieldError";
public static final String SYSTEM_ERROR_SUFFIX = "fieldError";
public static final String SUCCESS_SUFFIX = "successed";
public static final String FAILED_SUFFIX = "failed";
public static final String STATUS_SUFFIX = "status";
public static final String REDIS_PREFIX = "DATA-COLLECTION";
public static final String SOURCE = "SOURCE";
public static final String STATUS = "STATUS";
public static final String NOISE = "NOISE";
public static final String MAP_SET = "|MAP_SET";
public static final String KEY_SET = "|KEY_SET";
public static final String NOISE_SET = "|NOISE_SET";
public static final String HIT_WORD_RATE = "hitWordAndRate";
public static final double SIMILAR_STANDARD_NOISE = 0.8;
public static final String KEY_INCREMENT = "increment";
public static final String REDIS_QUEUE_ONE_KEY = "autoDataOneQueue";
public static final String REDIS_QUEUE_MULTI_KEY = "autoDataMultiQueue";
public static final String REDIS_MAP_KEY = "autoDataMap";
public static final int REDIS_QUEUE_LIMIT = 1000;
public static final double SIMILAR_STANDARD = 0.7;
public static final String SON_ID = "sonId";
/**
* 修改模板标签最大处理数据的数量
*/
public static final int POINT_SIZE = 100;
public static final String AUTO_PERSON = "自动化机器人";
public static final long AUTO_CID = 100040002;
public static final String LOCK_TEMPLATE_HOUR = "lock:template:hour";
public static final String LOCK_TEMPLATE_DAY = "lock:template:day";
public static final String LOCK_TEMPLATE_NUMBER = "lock:template:number";
public static final String ES_C_TIME = "ctime";
public static final String ES_M_TIME = "mtime";
public static final String ES_CID = "cid";
public static final long ES_CID_DEFAULT = 100040002L;
public static final String ES_C_NAME = "cname";
public static final String AUTO_CNAME = "上传标注补充采集";
public static final String ES_M_GROUP = "mgroup";
public static final String ES_M_PERSON = "mperson";
public static final String ES_M_TAG = "mtag";
public static final String ES_URL = "url";
public static final String ES_MID = "mid";
public static final String ES_QA_QUESTION_URL = "question_url";
public static final String ES_QA_ANSWER_URL = "answer_url";
public static final String ES_SOURCE = "source";
public static final String ES_TITLE = "title";
public static final String ES_CONTENT = "content";
}
...@@ -2,7 +2,6 @@ package com.zhiwei.middleware.automatic.server.config; ...@@ -2,7 +2,6 @@ package com.zhiwei.middleware.automatic.server.config;
import com.mongodb.ConnectionString; import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings; import com.mongodb.MongoClientSettings;
import com.mongodb.client.MongoClient;
import com.mongodb.client.MongoClients; import com.mongodb.client.MongoClients;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
...@@ -20,15 +19,10 @@ import org.springframework.data.mongodb.core.mapping.MongoMappingContext; ...@@ -20,15 +19,10 @@ import org.springframework.data.mongodb.core.mapping.MongoMappingContext;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
/**
* @ClassName
* @Description TODO
* @Author ${"liu-yu"}
* @Date 2022/12/21 18:01
**/
@Configuration @Configuration
public class MongoConfig { public class MongoConfig {
@Value("${mongo.connectTimeout}") @Value("${mongo.connectTimeout}")
private int connectTimeout; private int connectTimeout;
@Value("${mongo.maxWaitTime}") @Value("${mongo.maxWaitTime}")
...@@ -91,4 +85,5 @@ public class MongoConfig { ...@@ -91,4 +85,5 @@ public class MongoConfig {
converter.setTypeMapper(new DefaultMongoTypeMapper(null)); converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbHangZhouFactory(), converter); return new MongoTemplate(mongoDbHangZhouFactory(), converter);
} }
} }
...@@ -9,24 +9,6 @@ import java.util.concurrent.ThreadPoolExecutor; ...@@ -9,24 +9,6 @@ import java.util.concurrent.ThreadPoolExecutor;
@Component @Component
public class TaskPoolConfig { public class TaskPoolConfig {
@Bean("autMarkExecutor")
public ThreadPoolTaskExecutor autMarkExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("autoMark-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("asyncExecutor") @Bean("asyncExecutor")
public ThreadPoolTaskExecutor asyncExecutor() { public ThreadPoolTaskExecutor asyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
...@@ -44,76 +26,4 @@ public class TaskPoolConfig { ...@@ -44,76 +26,4 @@ public class TaskPoolConfig {
executor.initialize(); executor.initialize();
return executor; return executor;
} }
@Bean("aggreeNoiseExecutor")
public ThreadPoolTaskExecutor aggreeNoiseExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(32);
// 配置最大线程数
executor.setMaxPoolSize(64);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-noise-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("aggreeExecutor")
public ThreadPoolTaskExecutor aggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(5);
// 配置最大线程数
executor.setMaxPoolSize(10);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeEasyExecutor")
public ThreadPoolTaskExecutor eventAggreeEasyExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(6);
// 配置最大线程数
executor.setMaxPoolSize(8);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-easy-aggree-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("eventAggreeExecutor")
public ThreadPoolTaskExecutor eventAggreeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(60);
// 配置最大线程数
executor.setMaxPoolSize(100);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("event-aggree-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
} }
...@@ -14,12 +14,6 @@ public interface TemplateRecordDao { ...@@ -14,12 +14,6 @@ public interface TemplateRecordDao {
*/ */
List<TemplateRecord> findTemplateRecord (Query query); List<TemplateRecord> findTemplateRecord (Query query);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/** /**
* 查询模板记录数量 * 查询模板记录数量
...@@ -27,10 +21,4 @@ public interface TemplateRecordDao { ...@@ -27,10 +21,4 @@ public interface TemplateRecordDao {
* @return 声量 * @return 声量
*/ */
long count(Query query); long count(Query query);
/**
* 根据插件删除模板记录
* @param query 条件
*/
void removeTemplateRecord (Query query);
} }
package com.zhiwei.middleware.automatic.server.dao.impl; package com.zhiwei.middleware.automatic.server.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao; import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord; import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.MongoTemplate;
...@@ -21,21 +20,11 @@ public class TemplateRecordDaoImpl implements TemplateRecordDao { ...@@ -21,21 +20,11 @@ public class TemplateRecordDaoImpl implements TemplateRecordDao {
@Override @Override
public List<TemplateRecord> findTemplateRecord(Query query) { public List<TemplateRecord> findTemplateRecord(Query query) {
return mongoTemplate.find(query, TemplateRecord.class); return mongoTemplate.find(query, TemplateRecord.class, "automaticmark_template_record");
}
@Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
mongoTemplate.insert(templateRecord);
} }
@Override @Override
public long count(Query query) { public long count(Query query) {
return mongoTemplate.count(query, TemplateRecord.class); return mongoTemplate.count(query, TemplateRecord.class, "automaticmark_template_record");
}
@Override
public void removeTemplateRecord(Query query) {
mongoTemplate.remove(query, TemplateRecord.class);
} }
} }
...@@ -2,6 +2,7 @@ package com.zhiwei.middleware.automatic.server.dubbo.service; ...@@ -2,6 +2,7 @@ package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo; import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti; import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -13,6 +14,7 @@ public interface AutoMaticService { ...@@ -13,6 +14,7 @@ public interface AutoMaticService {
void autoMarkMulti(List<MarkInfoMulti> markInfoMultis); void autoMarkMulti(List<MarkInfoMulti> markInfoMultis);
/** /**
* 修正模板标题的markTag 如果不存在就会增加 * 修正模板标题的markTag 如果不存在就会增加
* *
...@@ -20,7 +22,22 @@ public interface AutoMaticService { ...@@ -20,7 +22,22 @@ public interface AutoMaticService {
* @param templateTitle 模板标题 * @param templateTitle 模板标题
* @param fixTag 正确的标签 * @param fixTag 正确的标签
*/ */
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag); void modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
*/
void resetTemplate (String group, String templateTitle);
/**
* 获取项目文本模板
* @param project 项目
* @return 模板集
*/
Map<String, TemplateTitleVo> getTemplateTitleByProject(String project);
/** /**
* 根据模板标题获取数据(仅最新100条) * 根据模板标题获取数据(仅最新100条)
...@@ -29,7 +46,7 @@ public interface AutoMaticService { ...@@ -29,7 +46,7 @@ public interface AutoMaticService {
* @param templateTitle 模板标题 * @param templateTitle 模板标题
* @return 特征值 * @return 特征值
*/ */
List<String> getMupdateByTemplateTitle(String group, String templateTitle); List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/** /**
* 根据标题和特征值尝试搜索模板标题 * 根据标题和特征值尝试搜索模板标题
...@@ -39,7 +56,7 @@ public interface AutoMaticService { ...@@ -39,7 +56,7 @@ public interface AutoMaticService {
* @param mupdate 特征值 * @param mupdate 特征值
* @return 模板标题 * @return 模板标题
*/ */
String tryGetTemplateTitleByMupdate(String group, String title, String mupdate); String tryGetTemplateTitleByMupdate(String group, String title, String mupdate);
/** /**
* 根据项目组和标题在线匹配已有聚合标题 * 根据项目组和标题在线匹配已有聚合标题
...@@ -48,13 +65,6 @@ public interface AutoMaticService { ...@@ -48,13 +65,6 @@ public interface AutoMaticService {
* @param title 标题 * @param title 标题
* @return 返回值 * @return 返回值
*/ */
public Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
} }
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import java.util.List;
public interface CommonService {
/**
* 获得任务id(新)
*
* @return
*/
String generateAggreeOrder();
/**
* 根据id添加数据new
*
* @param id
* @param list
* @return
*/
boolean appendAggreeOrder(String id, List<AggreeDTO> list);
/**
* k-means二分聚合数据
*
* @param id
* @return
*/
boolean startAggree(String id);
/**
* k-means二分聚合数据
*
* @param id
* @param limit
* @return
*/
boolean startAggree(String id, double limit);
/**
* 获取聚合结果(默认返回第一页)
*
* @param id
* @return
*/
CommonAggreeResult getAggreeResult(String id);
/**
* 获取聚合结果(分页)
*
* @param id
* @param page
* @param pageLimit
* @return
*/
CommonAggreeResult getAggreeResult(String id, int page, int pageLimit);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.zhiwei.base.category.ClassB;
import java.util.List;
import java.util.Map;
public interface DataCollectionService {
/**
* 清理全部缓存
*
* @param group 项目
* @param id id
*/
void cleanCache(String group, String id);
/**
* 清理全部缓存(保留噪音集)
*
* @param group 项目
* @param id id
*/
void cleanCacheExceptNoise(String group, String id);
/**
* 添加基础数据集
*
* @param group 项目
* @param id id
* @param compressedList 数据集
*/
void addDataCollection(String group, String id, List<String> compressedList);
/**
* 启动聚合
*
* @param group 项目
* @param id id
*/
void startAggree(String group, String id, String highWords);
/**
* 批量修改父模板标签(批量修改所属的子标签)
*
* @param group 项目
* @param id id
* @param fatherIds 父级id
* @param mtag 标签
* @param mperson 标注人
* @param typeB typeB
* @return 是否成功
*/
boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 修改父模板标签(批量修改所属的子标签)
*
* @param group
* @param id
* @param fatherId
* @param mtag
* @return
*/
boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB);
/**
* 修改子标签
*
* @param group
* @param id
* @param fatherId
* @param sonId
* @param mtag
* @return
*/
boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
ClassB.TypeB typeB);
/**
* 纳入噪音集
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 批量纳入噪音集
*
* @param group
* @param id
* @return
*/
boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB);
/**
* 从噪音集还原
*
* @param group
* @param id
* @param fatherId
* @return
*/
boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB);
/**
* 分页获取父标题信息集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id和子id分页获取子信息集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB);
/**
* 分页获取父标题信息噪音集合
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag);
/**
* 根据父id分页获取子信息噪音集合
*
* @param group
* @param id
* @param fatherId
* @param page
* @param size
* @param isAsc
* @param keyword
* @return
*/
Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, ClassB.TypeB typeB);
/**
* 检查完毕数据入库
*
* @param group
* @param id
*/
void checkedThenInsert(String group, String id);
/**
* 立刻获取聚合临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未聚合;0:聚合中:1:已聚合
*/
int getAggreResultNow(String group, String id);
/**
* 立刻获取入库临时结果
*
* @param group
* @param id
* @return -2:获取结果异常;-1:未入库;0:入库中:1:已入库
*/
int getInsertResultNow(String group, String id);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public interface DataUploadService {
/**
* 添加源数据集
*
* @param group 项目
* @param id 任务id
*/
void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*
* @param group 项目
* @param id 任务
* @param mperson 提交人
* @return void
*/
void startUpload(String group, String id, String mperson,
UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType);
/**
* 获取上传状态(进度)
*
* @param group 项目
* @param id 任务id
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group 项目
* @param id 任务id
* @param page page
* @param size size
* @param isAsc 排序
* @param searchField 查询字段
* @param keyword 关键字
* @param uploadType 上传类型
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.dubbo.service;
import java.util.Map;
public interface EventCollectionMarkService {
/**
* 增加事件采集聚合源数据集
*
* @param sourceList
* @return
*/
void addEventCollectionAggreSourceList(String group, String id, String sourceStr);
/**
* 清理事件采集聚合结果集
*/
void cleanEventCollectionAggreData(String group, String id);
/**
* 获取事件采集聚合结果
*
* @return
*/
Map<String, Object> getEventCollectionAggreTemplate(String group, String id, int page, int size, boolean isAsc,
int markFlag, String keyword);
/**
* 修改插件聚合模板标题的标签
*
* @param group
* @param id
* @param modifyTag
* @return
*/
boolean modifyEventCollectionAggreTitleMarkTag(String group, String id, String templateTitle, String modifyTag);
/**
* 根据模板标题获取父标题标注信息markTag
*
* @param group
* @param id
* @param templateTitle
* @return
*/
String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle);
/**
* 根据模板标题获取子标题集合
*
* @param group
* @param id
* @param templateTitle
* @return
*/
Map<String, Object> getEventCollectionAggreSubTitle(String group, String id, String templateTitle);
/**
* 启动聚合
*
* @param group
* @param id
*/
public void startAggre(String group, String id);
/**
* 已标注事件采集入库
*
* @param group
* @param id
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum);
/**
* 已标注事件采集入库
*
* @param group
* @param id
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson);
/**
* 清理全部结果(聚合集+噪音集)
*/
void cleanEventCollectionAllData(String group, String id);
/**
* 获取事件采集噪音父标题集
*
* @param group
* @param id
* @param page
* @param size
* @param isAsc
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc,
String keyword);
/**
*
* 获取事件采集噪音子集
*
* @param group
* @param id
* @param templateTitle
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle);
/**
*
* 标注部分是否已入库
*
* @param group
* @param id
* @return
*
* @return boolean
*/
boolean markedHasInserted(String group, String id);
}
...@@ -2,15 +2,20 @@ package com.zhiwei.middleware.automatic.server.dubbo.service.impl; ...@@ -2,15 +2,20 @@ package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo; import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute; import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService; import com.zhiwei.middleware.automatic.server.dubbo.service.AutoMaticService;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti; import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil; import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService; import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.dubbo.config.annotation.Service; import org.apache.dubbo.config.annotation.Service;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@Service @Service
...@@ -27,17 +32,44 @@ public class AutoMaticServiceImpl implements AutoMaticService { ...@@ -27,17 +32,44 @@ public class AutoMaticServiceImpl implements AutoMaticService {
@Override @Override
public void autoMark(List<MarkInfo> markInfos) { public void autoMark(List<MarkInfo> markInfos) {
redissonUtil.putQueue(GenericAttribute.REDIS_QUEUE_ONE_KEY, markInfos.stream().map(JSONObject::toJSONString).collect(Collectors.toList())); AutoTask autoTask = new AutoTask(TaskType.COMMON_ONE.getType());
String sourceKey = Tools.assembleKey(GenericAttribute.REDIS_QUEUE_ONE_KEY, Tools.randomUUID());
redissonUtil.setList(sourceKey, markInfos.stream()
.filter(e -> Objects.nonNull(e) && Objects.nonNull(e.getSourceObj()))
.map(JSONObject::toJSONString).collect(Collectors.toList()));
autoTask.getParamSource().put(TaskType.COMMON_ONE.getCacheId(), sourceKey);
redissonUtil.putQueue(GenericAttribute.KEY, JSONObject.toJSONString(autoTask));
} }
@Override @Override
public void autoMarkMulti(List<MarkInfoMulti> markInfoMultis) { public void autoMarkMulti(List<MarkInfoMulti> markInfoMultis) {
redissonUtil.putQueue(GenericAttribute.REDIS_QUEUE_MULTI_KEY, markInfoMultis.stream().map(JSONObject::toJSONString).collect(Collectors.toList())); AutoTask autoTask = new AutoTask(TaskType.COMMON_TWO.getType());
String sourceKey = Tools.assembleKey(GenericAttribute.REDIS_QUEUE_MULTI_KEY, Tools.randomUUID());
redissonUtil.setList(sourceKey, markInfoMultis.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
autoTask.getParamSource().put(TaskType.COMMON_TWO.getCacheId(), sourceKey);
redissonUtil.putQueue(GenericAttribute.KEY, JSONObject.toJSONString(autoTask));
} }
@Override @Override
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) { public void modifyTemplateTitle(String group, String templateTitle, String fixTag) {
return templateTitleService.modifyTemplateTitle(group, templateTitle, fixTag); AutoTask autoTask = new AutoTask(TaskType.TEMPLATE_MODIFY.getType());
autoTask.getParamSource().put(GenericAttribute.GROUP_PARAM, group);
autoTask.getParamSource().put(GenericAttribute.TEMPLATE_TITLE, templateTitle);
autoTask.getParamSource().put(GenericAttribute.FIX_TAG, fixTag);
redissonUtil.putQueue(GenericAttribute.KEY, JSONObject.toJSONString(autoTask));
}
@Override
public void resetTemplate(String group, String templateTitle) {
AutoTask autoTask = new AutoTask(TaskType.TEMPLATE_RESET.getType());
autoTask.getParamSource().put(GenericAttribute.GROUP_PARAM, group);
autoTask.getParamSource().put(GenericAttribute.TEMPLATE_TITLE, templateTitle);
redissonUtil.putQueue(GenericAttribute.KEY, JSONObject.toJSONString(autoTask));
}
@Override
public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) {
return templateTitleService.getTemplateTitleByProject(project);
} }
@Override @Override
...@@ -54,9 +86,4 @@ public class AutoMaticServiceImpl implements AutoMaticService { ...@@ -54,9 +86,4 @@ public class AutoMaticServiceImpl implements AutoMaticService {
public Map<String, Object> compareWithTemplateTileOL(String project, String title) { public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return templateTitleService.compareWithTemplateTileOL(project, title); return templateTitleService.compareWithTemplateTileOL(project, title);
} }
@Override
public boolean resetTemplate(String group, String templateTitle) {
return templateTitleService.resetTemplate(group, templateTitle);
}
} }
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.middleware.automatic.server.dubbo.service.CommonService;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeCache;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult;
import com.zhiwei.middleware.automatic.server.pojo.PageData;
import com.zhiwei.middleware.automatic.server.pojo.Status;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.service.handler.TextHandlerService;
import org.springframework.stereotype.Service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.List;
@Service
public class CommonServiceImpl implements CommonService {
// 每次返回量
private static final int PAGE_SIZE = 5000;
private final TextHandlerService textHandler;
public CommonServiceImpl(TextHandlerService textHandler) {
this.textHandler = textHandler;
}
@Override
public String generateAggreeOrder() {
return textHandler.generateAggreeOrder();
}
@Override
public boolean appendAggreeOrder(String id, List<AggreeDTO> list) {
return textHandler.appendAggreeOrderNew(id, list);
}
@Override
public boolean startAggree(String id) {
return textHandler.startAggree(id);
}
@Override
public boolean startAggree(String id, double limit) {
return textHandler.startAggree(id, limit);
}
@Override
public CommonAggreeResult getAggreeResult(String id) {
return getAggreeResult(id, 0, PAGE_SIZE);
}
@Override
public CommonAggreeResult getAggreeResult(String id, int page, int pageLimit) {
CommonAggreeCache cache = textHandler.getAggreeResult(id);
// 错误状态(页数异常||未查询到对应任务)
if (page <= 0 || null == cache) {
return new CommonAggreeResult(Status.ERROR);
}
// 正在聚合状态
List<ResultInfo> result = cache.getResults();
if (result.isEmpty()) {
return new CommonAggreeResult(Status.RUN);
}
int total = result.size();
int start = pageLimit * (page - 1);
int end = Math.min(start + pageLimit, total);
int totalPage = (total + pageLimit - 1) / pageLimit;
if (start > total) {
// 超出总量范畴
return new CommonAggreeResult(Status.ERROR);
}
PageData<ResultInfo> results = new PageData<>(page, result.size(), totalPage, pageLimit,
result.subList(start, end));
return new CommonAggreeResult(Status.END, results);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataCollectionService;
import com.zhiwei.middleware.automatic.server.service.impl.DataCollection;
import org.apache.dubbo.config.annotation.Service;
import java.util.List;
import java.util.Map;
@Service
public class DataCollectionServiceImpl implements DataCollectionService {
private final DataCollection dataCollection;
public DataCollectionServiceImpl(DataCollection dataCollection) {
this.dataCollection = dataCollection;
}
@Override
public void cleanCache(String group, String id) {
dataCollection.cleanCache(group, id);
}
@Override
public void cleanCacheExceptNoise(String group, String id) {
dataCollection.cleanCacheExceptNoise(group, id);
}
@Override
public void addDataCollection(String group, String id, List<String> compressedList) {
dataCollection.addDataCollection(group, id, compressedList);
}
@Override
public void startAggree(String group, String id, String highWords) {
dataCollection.startAggree(group, id, highWords);
}
@Override
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.batchModifyFatherTag(group, id, fatherIds, mtag, mperson, typeB);
}
@Override
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
@Override
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson, ClassB.TypeB typeB) {
return dataCollection.modifySonTag(group, id, fatherId, sonId, mtag, mperson, typeB);
}
@Override
public boolean throwIntoNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollection.throwIntoNoise(group, id, fatherId, typeB);
}
@Override
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, ClassB.TypeB typeB) {
return dataCollection.batchThrowIntoNoise(group, id, fatherIds, typeB);
}
@Override
public boolean restoreFromNoise(String group, String id, String fatherId, ClassB.TypeB typeB) {
return dataCollection.restoreFromNoise(group, id, fatherId, typeB);
}
@Override
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollection.getFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
@Override
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollection.getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
@Override
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB, boolean isTitle, int markFlag) {
return dataCollection.getNoiseFatherTitles(group, id, page, size, isAsc, keyword, typeB, isTitle, markFlag);
}
@Override
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc, String keyword, ClassB.TypeB typeB) {
return dataCollection.getNoiseSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
@Override
public void checkedThenInsert(String group, String id) {
dataCollection.checkedThenInsert(group, id);
}
@Override
public int getAggreResultNow(String group, String id) {
return dataCollection.getAggreResultNow(group, id);
}
@Override
public int getInsertResultNow(String group, String id) {
return dataCollection.getInsertResultNow(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.dubbo.service.DataUploadService;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import com.zhiwei.middleware.automatic.server.service.UploadService;
import org.apache.dubbo.config.annotation.Service;
import java.util.Map;
@Service
public class DataUploadServiceImpl implements DataUploadService {
private final UploadService uploadService;
public DataUploadServiceImpl (UploadService uploadService) {
this.uploadService = uploadService;
}
@Override
public void addUploadList(String group, String id, String sourceStr) {
uploadService.addUploadList(group, id, sourceStr);
}
@Override
public void startUpload(String group, String id, String mperson, UploadInfo.MtagType mtagType, UploadInfo.FilterType filterType, String projectId, InsertType insertType) {
uploadService.startUpload(new MarkUploadRule(id, group, mperson, mtagType, filterType, projectId, insertType));
}
@Override
public Map<String, Object> getUploadStatus(String group, String id) {
return uploadService.getUploadStatus(group, id);
}
@Override
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc, String searchField, String keyword, UploadInfo.UploadType uploadType) {
return uploadService.getUploadInfoList(group, id, page, size, isAsc, searchField, keyword, uploadType);
}
@Override
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
return uploadService.getDataType(json, typeB);
}
@Override
public void cleanUploadResult(String group, String id) {
uploadService.cleanUploadResult(group, id);
}
}
package com.zhiwei.middleware.automatic.server.dubbo.service.impl;
import com.zhiwei.middleware.automatic.server.dubbo.service.EventCollectionMarkService;
import com.zhiwei.middleware.automatic.server.service.impl.EventCollectionMark;
import org.apache.dubbo.config.annotation.Service;
import java.util.Map;
@Service
public class EventCollectionMarkServiceImpl implements EventCollectionMarkService {
private final EventCollectionMark eventCollectionMark;
public EventCollectionMarkServiceImpl(EventCollectionMark eventCollectionMark) {
this.eventCollectionMark = eventCollectionMark;
}
@Override
public void addEventCollectionAggreSourceList(String group, String id, String sourceStr) {
eventCollectionMark.addEventCollectionAggreeSourceList(group, id, sourceStr);
}
@Override
public void cleanEventCollectionAggreData(String group, String id) {
eventCollectionMark.cleanEventCollectionAggreeData(group, id);
}
@Override
public Map<String, Object> getEventCollectionAggreTemplate(String group, String id, int page, int size, boolean isAsc, int markFlag, String keyword) {
return eventCollectionMark.getEventCollectionAggreeTemplate(group, id, page, size, isAsc, markFlag, keyword);
}
@Override
public boolean modifyEventCollectionAggreTitleMarkTag(String group, String id, String templateTitle, String modifyTag) {
return eventCollectionMark.modifyEventCollectionAggreeTitleMarkTag(group, id, templateTitle, modifyTag);
}
@Override
public String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionMarkTagByTemplate(group, id, templateTitle);
}
@Override
public Map<String, Object> getEventCollectionAggreSubTitle(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionAggreeSubTitle(group, id, templateTitle);
}
@Override
public void startAggre(String group, String id) {
eventCollectionMark.startAggree(group, id);
}
@Override
public boolean eventCollectionMarkedInsert(String group, String id, int markSum) {
return eventCollectionMark.eventCollectionMarkedInsert(group, id, markSum);
}
@Override
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson) {
return eventCollectionMark.eventCollectionMarkedInsert(group, id, markSum, mperson);
}
@Override
public void cleanEventCollectionAllData(String group, String id) {
eventCollectionMark.cleanEventCollectionAllData(group, id);
}
@Override
public Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc, String keyword) {
return eventCollectionMark.getEventCollectionNoiseTitles(group, id, page, size, isAsc, keyword);
}
@Override
public Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle) {
return eventCollectionMark.getEventCollectionNoiseSubTitle(group, id, templateTitle);
}
@Override
public boolean markedHasInserted(String group, String id) {
return eventCollectionMark.markedHasInserted(group, id);
}
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.base.category.ClassB;
@FunctionalInterface
public interface DataClassType<T> {
ClassB.TypeB getClassType(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import org.elasticsearch.search.SearchHit;
import java.util.List;
@FunctionalInterface
public interface DataMerge<T> {
void dataMerge(List<SearchHit> hit, T t, MarkUploadRule rule);
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface EsIndex<T> {
String getIndex(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import org.elasticsearch.index.query.BoolQueryBuilder;
@FunctionalInterface
public interface EsRowQuery<T> {
BoolQueryBuilder rowQuery(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import com.zhiwei.middleware.automatic.server.base.DataUploadCommon;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.listener.BaseServiceContext;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadResult;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.util.TimeUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.search.SearchHit;
import java.util.List;
import java.util.Objects;
public class FunctionalImpl {
private static final Logger log = LogManager.getLogger(FunctionalImpl.class);
public void rowException(MarkUploadResult result, String stage, String message) {
if (Objects.nonNull(result)) {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, stage + ":" + message);
}
}
/**
* 标注上传:url查询条件
* @param result 上传DTO
* @return BoolQueryBuilder
*/
public BoolQueryBuilder urlSearchQuery(MarkUploadResult result) {
return BaseServiceContext.getInstance().getDataUploadService(result.getTypeB()).urlSearchQuery(result);
}
/**
* 标注上传:文本查询条件
* @param result 上传DTO
* @return BoolQueryBuilder
*/
public BoolQueryBuilder textSearchQuery(MarkUploadResult result) {
return BaseServiceContext.getInstance().getDataUploadService(result.getTypeB()).textSearchQuery(result);
}
/**
* 获取大库es index
* @param result 上传DTO
* @return es index
*/
public String getDwIndex(MarkUploadResult result) {
return Objects.nonNull(result.getDw()) ? TimeUtil.getDwIndex(result.getDw().getTime()) : TimeUtil.getDwIndex(result.getOriginData().getTime());
}
/**
* 获取标注库es index
* @param result 上传DTO
* @return es index
*/
public String getMarkIndex(MarkUploadResult result) {
return Objects.nonNull(result.getDw()) ? TimeUtil.getMarkIndex(result.getDw().getTime()) : TimeUtil.getMarkIndex(result.getOriginData().getTime());
}
/**
* 获取TypeB
* @param result 上传DTO
* @return TypeB
*/
public ClassB.TypeB getTypeB(MarkUploadResult result) {
return result.getTypeB();
}
/**
* 获取文本搜索key
* @param result 上传DTO
* @return key
*/
public String getTextSearchRowKey(MarkUploadResult result) {
return Tools.urlReplace(result.getOriginData().getUrl());
}
/**
* 获取标注数据源key
* @param result 上传DTO
* @return key
*/
public String markHandleRowKey(MarkUploadResult result) {
return result.getKey();
}
/**
* url搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void searchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
if (Objects.nonNull(hits)) {
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = dataUploadService.getCommonDOBySearchHit(hits.get(0));
DataUploadCommon dataUploadCommon = (DataUploadCommon) dataUploadService;
result.setCommonDO(commonDO, dataUploadCommon.convert2Mark(commonDO, rule.getGroup()));
result.setSearch(true);
} else {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "格式转换失败且大库中不存在该数据");
}
}
/**
* 大库数据源搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void dwSearchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
boolean isNull = Objects.isNull(hits);
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = result.isSearch() ? result.getMark() : (isNull) ? dataUploadService.searchDwByContentNew(result) : dataUploadService.getCommonDOBySearchHit(hits.get(0));
if (null == commonDO) {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "数据类型:【dw-content数据】;上传结果:【失败】,二次文本搜索任未搜索到数据");
return;
}
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup()));
}
/**
* 标注数据源搜索 数据合并
* @param hits es数据
* @param result 上传DTO
*/
public void markSearchHitMerge(List<SearchHit> hits, MarkUploadResult result, MarkUploadRule rule) {
if (Objects.nonNull(hits)) {
try {
BaseDataUploadService dataUploadService = BaseServiceContext.getInstance().getDataUploadService(result.getTypeB());
CommonDO commonDO = dataUploadService.getCommonDOBySearchHit(hits.get(0));
switch (rule.getMtagType()) {
case INDEX:
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup()));
case UPDATE:
result.setMarkInfo(dataUploadService.toMarkInfoNew(result, rule.getMperson(), rule.getGroup(), commonDO.toJSON().get("mtag") + ""));
}
} catch (Exception e) {
log.error("UploadShell-标注库数据源处理失败:", e);
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "markHandle处理异常");
}
} else {
result.setInfo(GenericAttribute.SYSTEM_ERROR_SUFFIX, "数据类型:【标注数据】;上传结果:【失败】,原因:标注库未找到对应数据");
}
}
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface RowKey<T> {
String getRowKey(T t);
}
package com.zhiwei.middleware.automatic.server.functional;
@FunctionalInterface
public interface UploadRowException<T> {
void rowException(T t, String state, String message);
}
package com.zhiwei.middleware.automatic.server.listener;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.middleware.automatic.server.base.BaseDataUploadService;
import org.springframework.context.ApplicationContext;
import java.util.HashMap;
import java.util.Map;
/**
* 基础服务单例类
*/
public class BaseServiceContext {
private final Map<TypeB, BaseDataUploadService> handlerMap = new HashMap<>();
public BaseServiceContext() {
ApplicationContext applicationContext = ApplicationContextHolder.getInstance();
Map<String, BaseDataUploadService> beansOfType = applicationContext.getBeansOfType(BaseDataUploadService.class);
beansOfType.forEach((key, value) -> handlerMap.put(value.getTypeB(), value));
}
public BaseDataUploadService getDataUploadService(TypeB typeB) {
return handlerMap.get(typeB);
}
public static BaseServiceContext getInstance() {
return BaseServiceContextHolder.BASE_SERVICE;
}
private static class BaseServiceContextHolder {
private static final BaseServiceContext BASE_SERVICE = new BaseServiceContext();
}
}
package com.zhiwei.middleware.automatic.server.mission; package com.zhiwei.middleware.automatic.server.mission;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.qbjc.bean.pojo.common.Project; import com.zhiwei.qbjc.bean.pojo.common.Project;
import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.MongoTemplate;
...@@ -14,19 +13,12 @@ public class AsyncTask { ...@@ -14,19 +13,12 @@ public class AsyncTask {
private final MongoTemplate hangZhouMongo; private final MongoTemplate hangZhouMongo;
private final AutoService autoService;
public AsyncTask(@Qualifier("hangzhouMongoTemplate") MongoTemplate hangZhouMongo, public AsyncTask(@Qualifier("hangzhouMongoTemplate") MongoTemplate hangZhouMongo) {
AutoService autoService) {
this.hangZhouMongo = hangZhouMongo; this.hangZhouMongo = hangZhouMongo;
this.autoService = autoService;
} }
public List<String> findAllGroup() { public List<String> findAllGroup() {
return hangZhouMongo.findAll(Project.class).stream().map(Project::getProjectName).collect(Collectors.toList()); return hangZhouMongo.findAll(Project.class).stream().map(Project::getProjectName).collect(Collectors.toList());
} }
public void queueDataPull() {
autoService.asyncAutoMark();
}
} }
package com.zhiwei.middleware.automatic.server.mission; package com.zhiwei.middleware.automatic.server.mission;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil; import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Async;
...@@ -11,8 +13,6 @@ import org.springframework.scheduling.annotation.Scheduled; ...@@ -11,8 +13,6 @@ import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date;
import java.util.concurrent.TimeUnit;
@Component @Component
@EnableScheduling @EnableScheduling
...@@ -20,43 +20,28 @@ public class ScheduledMission { ...@@ -20,43 +20,28 @@ public class ScheduledMission {
private final Logger log = LogManager.getLogger(ScheduledMission.class); private final Logger log = LogManager.getLogger(ScheduledMission.class);
private final AsyncTask asyncTask;
private final RedissonUtil redissonUtil; private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService; private final AsyncTask asyncTask;
public ScheduledMission(AsyncTask asyncTask, RedissonUtil redissonUtil, public ScheduledMission(RedissonUtil redissonUtil, AsyncTask asyncTask) {
TemplateTitleService templateTitleService) {
this.asyncTask = asyncTask;
this.redissonUtil = redissonUtil; this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService; this.asyncTask = asyncTask;
} }
// @Scheduled(cron = "10/10 * * * * ? ")
// @Async("asyncExecutor")
public void queueDataPull() {
try {
asyncTask.queueDataPull();
} catch (Exception e) {
log.error("定时拉取自动标注队列出错:", e);
}
}
// @Scheduled(cron = "0 0/5 * * * ?") // @Scheduled(cron = "0 0/5 * * * ?")
// @Async("asyncExecutor") // @Async("asyncExecutor")
public void templateHourSync() { public void templateHourSync() {
try { try {
if (redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_HOUR, 0, 1, TimeUnit.MINUTES)) { Calendar calendar = Calendar.getInstance();
Calendar calendar = Calendar.getInstance(); calendar.add(Calendar.HOUR_OF_DAY, -7);
calendar.add(Calendar.HOUR_OF_DAY, -7); long startTime = calendar.getTime().getTime();
long startTime = calendar.getTime().getTime(); Calendar calendarEndTime = Calendar.getInstance();
Calendar calendarEndTime = Calendar.getInstance(); calendarEndTime.add(Calendar.MINUTE, -5);
calendarEndTime.add(Calendar.MINUTE, -5); long endTime = calendarEndTime.getTime().getTime();
long endTime = calendarEndTime.getTime().getTime(); for (String project : asyncTask.findAllGroup()) {
templateTitleService.schedulerHourAggregation(asyncTask.findAllGroup(), startTime, endTime); putTask(project, startTime, endTime);
//释放锁
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_HOUR);
} }
} catch (Exception e) { } catch (Exception e) {
log.error("十分钟定时同步模板失败:", e); log.error("十分钟定时同步模板失败:", e);
...@@ -67,18 +52,25 @@ public class ScheduledMission { ...@@ -67,18 +52,25 @@ public class ScheduledMission {
// @Async("autMarkExecutor") // @Async("autMarkExecutor")
public void templateDaySync() { public void templateDaySync() {
try { try {
if (redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_DAY, 0, 1, TimeUnit.MINUTES)) { Calendar calendar = Calendar.getInstance();
Calendar calendar = Calendar.getInstance(); // 聚合1天,文章时间和标注时间都在1天内
// 聚合1天,文章时间和标注时间都在1天内 calendar.add(Calendar.DAY_OF_MONTH, -1);
calendar.add(Calendar.DAY_OF_MONTH, -1); long startTime = calendar.getTime().getTime();
long startTime = calendar.getTime().getTime(); long endTime = System.currentTimeMillis();
templateTitleService.schedulerHourAggregation(asyncTask.findAllGroup(), startTime, System.currentTimeMillis()); for (String project : asyncTask.findAllGroup()) {
//释放锁 putTask(project, startTime, endTime);
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_DAY);
} }
} catch (Exception e) { } catch (Exception e) {
log.error("每天定时同步模板失败:", e); log.error("每天定时同步模板失败:", e);
} }
} }
private void putTask(String group, long startTime, long endTime) {
AutoTask autoTask = new AutoTask(TaskType.TEMPLATE.getType());
autoTask.getParamSource().put(GenericAttribute.GROUP_PARAM, group);
autoTask.getParamSource().put(GenericAttribute.START_PARAM, startTime);
autoTask.getParamSource().put(GenericAttribute.END_PARAM, endTime);
redissonUtil.putQueue(GenericAttribute.KEY, JSONObject.toJSONString(autoTask));
}
} }
package com.zhiwei.middleware.automatic.server.pojo;
import java.io.Serializable;
public class AggreInfo implements Serializable {
private static final long serialVersionUID = 4901060154053874112L;
/**
* 聚合是否完成
*/
Boolean aggreFinshed;
/**
* 共聚合数据条数
*/
int totalCount;
/**
* 疑似噪音数
*/
int noiseCount;
/**
* 模板标题数
*/
int titleFatherCount;
/**
* 自动标注数
*/
int automaticmarkCount;
/**
* 已标注部分是否入库
*/
Boolean inserted;
// JSON解析必须保留
public AggreInfo() {
}
public AggreInfo(Boolean aggreFinshed, Boolean isInserted) {
this.aggreFinshed = aggreFinshed;
this.inserted = isInserted;
}
public Boolean isAggreFinshed() {
return aggreFinshed;
}
public void setAggreFinshed(Boolean aggreFinshed) {
this.aggreFinshed = aggreFinshed;
}
public void setTotalCount(int totalCount) {
this.totalCount = totalCount;
}
public void setNoiseCount(int noiseCount) {
this.noiseCount = noiseCount;
}
public void setTitleFatherCount(int titleFatherCount) {
this.titleFatherCount = titleFatherCount;
}
public void setAutomaticmarkCount(int automaticmarkCount) {
this.automaticmarkCount = automaticmarkCount;
}
public void setAll(boolean aggreFinshed, int totalCount, int noiseCount, int titleFatherCount,
int automaticmarkCount) {
this.aggreFinshed = aggreFinshed;
this.totalCount = totalCount;
this.noiseCount = noiseCount;
this.titleFatherCount = titleFatherCount;
this.automaticmarkCount = automaticmarkCount;
}
public void setAll(boolean aggreFinshed, boolean isInserted, int totalCount, int noiseCount, int titleFatherCount,
int automaticmarkCount) {
this.aggreFinshed = aggreFinshed;
this.inserted = isInserted;
this.totalCount = totalCount;
this.noiseCount = noiseCount;
this.titleFatherCount = titleFatherCount;
this.automaticmarkCount = automaticmarkCount;
}
public Boolean isInserted() {
return inserted;
}
public void setInserted(Boolean inserted) {
this.inserted = inserted;
}
public int getTotalCount() {
return totalCount;
}
public int getNoiseCount() {
return noiseCount;
}
public int getTitleFatherCount() {
return titleFatherCount;
}
public int getAutomaticmarkCount() {
return automaticmarkCount;
}
public String getPrintString() {
StringBuffer sb = new StringBuffer();
sb.append("本次数据采集共计");
sb.append(totalCount);
sb.append("条");
if (0 != noiseCount) {
sb.append("(有效数据");
sb.append(totalCount - noiseCount);
sb.append("条,疑似噪音");
sb.append(noiseCount);
sb.append("条)");
}
sb.append(",聚合模板共计");
sb.append(titleFatherCount);
sb.append("条,自动标注");
sb.append(automaticmarkCount);
sb.append("条");
return sb.toString();
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class CommonAggreeCache {
/** 订单id **/
String id;
/** 更新时间 **/
Long updateTime;
Map<String, AggreeDTO> data;
/** 缓存结果 **/
List<ResultInfo> results;
public CommonAggreeCache(String id) {
this.id = id;
this.updateTime = System.currentTimeMillis();
this.data = new HashMap<>();
this.results = new ArrayList<>();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Long getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Long updateTime) {
this.updateTime = updateTime;
}
public Map<String, AggreeDTO> getData() {
return data;
}
public void setData(Map<String, AggreeDTO> data) {
this.data = data;
}
public List<ResultInfo> getResults() {
return results;
}
public void setResults(List<ResultInfo> results) {
this.results = results;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import lombok.Data;
import java.util.Objects;
@Data
public class MarkUploadResult {
/**
* 是否转换成功
*/
private boolean success;
private boolean search;
private String key;
/**
* 信息描述类型
*/
private String infoType;
/**
* 上传信息
*/
private String message;
/**
* 数据信息
*/
private MarkInfo markInfo;
/**
* 原始上传数据
*/
private MarkUploadInfo originData;
/**
* 大库数据
*/
private CommonDO dw;
/**
* 标注数据
*/
private CommonDO mark;
private ClassB.TypeB typeB;
private UploadInfo.DataType dataType;
public void setMarkInfo(MarkInfo markInfo) {
this.success = Objects.nonNull(markInfo);
this.markInfo = markInfo;
}
public MarkUploadResult(MarkUploadInfo markUploadInfo) {
this.originData = markUploadInfo;
this.infoType = GenericAttribute.SUCCESS_SUFFIX;
this.success = false;
}
public void setInfo(String infoType, String message) {
this.infoType = infoType;
this.message = message;
this.success = false;
}
private void clearInfo() {
this.infoType = GenericAttribute.SUCCESS_SUFFIX;
this.message = null;
}
public void setCommonDO(CommonDO dw, CommonDO mark) {
if (Objects.nonNull(dw)) {
this.dw = dw;
this.mark = mark;
this.success = true;
clearInfo();
}
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import lombok.Data;
@Data
public class TemplateNum {
private String title;
private String group;
private Integer number;
public TemplateNum() {}
public TemplateNum(String title, String group) {
this.title = title;
this.group = group;
this.number = 1;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import lombok.Data;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@Data
public class TitleAggreeResult {
private Map<TemplateFatherVo, List<JSONObject>> templateFatherVoListMap;
private AtomicInteger fatherId;
private String keyword;
private ClassB.TypeB typeB;
private String group;
private String id;
public TitleAggreeResult(String id, String group, ClassB.TypeB typeB, String keyword) {
this.templateFatherVoListMap = new HashMap<>();
this.fatherId = new AtomicInteger(1);
this.keyword = keyword;
this.id = id;
this.group = group;
this.typeB = typeB;
}
}
package com.zhiwei.middleware.automatic.server.pojo;
import lombok.Data;
@Data
public class TitleCosFreq {
private String title;
private double cosFreq;
public TitleCosFreq() {}
public TitleCosFreq(String title, double cosFreq) {
this.title = title;
this.cosFreq = cosFreq;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
public enum AggreeTaskType {
DATA("普通任务", "DATA-COLLECTION:"),
EVENT("事件任务", "event:"),
COMMON("普通任务", "common:");
final String type;
final String keyPrefix;
public String getType() {
return type;
}
public String getKeyPrefix() {
return keyPrefix;
}
AggreeTaskType(String type, String keyPrefix) {
this.type = type;
this.keyPrefix = keyPrefix;
}
}
package com.zhiwei.middleware.automatic.server.pojo.enums;
import com.zhiwei.base.category.ClassB;
public enum Fields {
QA("question_title", "question_content"), VIDEO(), COMPLETE(), INCOMPLETE();
// 默认值
public String title = "title";
public String content = "content";
public String mtag = "mtag";
public String mtime = "mtime";
public String mperson = "mperson";
public String mgroup = "mgroup";
Fields() {
}
Fields(String title, String content) {
this.title = title;
this.content = content;
}
public static Fields getFields(ClassB.TypeB typeB) {
return Fields.valueOf(typeB.name());
}
}
package com.zhiwei.middleware.automatic.server.pojo.vo;
import com.alibaba.fastjson.JSONObject;
import lombok.Data;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
@Data
public class TemplateFatherVo implements Serializable {
private static Logger logger = LogManager.getLogger(TemplateFatherVo.class);
private static final long serialVersionUID = 4142532604627291041L;
/**
* 比较字段
*/
private String title = "";
/**
* 检索字段(标题/文本)
*/
private String content = "";
/**
* 模板id
*/
private String fatherId = "1";
/**
* 作为模板的第一条数据
*/
private JSONObject example;
/**
* 子集总数
*/
private Integer totalSon = 0;
private boolean isForward;
/**
* 高亮命中关键词及词频
*/
private List<Map<String, Integer>> hitWordAndRate;
public TemplateFatherVo() {
}
public TemplateFatherVo(String title) {
if (null == title) {
title = "";
}
this.title = title;
this.content = title;
}
public TemplateFatherVo(String title, String content) {
if (null == title) {
title = "";
}
if (null == content) {
content = "";
}
this.title = title;
this.content = content;
}
public void reFreshTotalSon() {
totalSon++;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof TemplateFatherVo)) {
return false;
}
TemplateFatherVo vo = (TemplateFatherVo) o;
if (this.isForward || vo.isForward) {
return false;
}
if (null == content) {
content = "";
}
if (null == title) {
return vo.content.equals(this.content);
}
return vo.content.equals(this.content) && vo.title.equals(this.title);
}
@Override
public int hashCode() {
try {
if (null == content) {
content = "";
}
if (null == title) {
return content.hashCode();
}
return content.hashCode() + title.hashCode();
} catch (Exception e) {
logger.info("TemplateFatherVo获取hashCode错误,title:{},content:{}", title, content, e);
return -1;
}
}
}
package com.zhiwei.middleware.automatic.server.queue;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Component
public class TemplateNumQueue implements Runnable {
private static final Logger log = LogManager.getLogger(TemplateNumQueue.class);
private final BlockingQueue<TemplateNum> queue;
private static final int PULL_LIMIT = 1000;
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
public TemplateNumQueue(RedissonUtil redissonUtil, TemplateTitleService templateTitleService,
@Qualifier("asyncExecutor")ThreadPoolTaskExecutor executor) {
this.queue = new LinkedBlockingQueue<>();
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
executor.execute(this);
}
public void put(List<TemplateNum> templateNum) {
queue.addAll(templateNum);
}
public void put(TemplateNum templateNum) {
queue.add(templateNum);
}
@Override
public void run() {
while (!Thread.interrupted()) {
try {
// 获取锁
if (queue.size() != 0 && redissonUtil.tryLock(GenericAttribute.LOCK_TEMPLATE_NUMBER, 0, 1, TimeUnit.MINUTES)) {
int pullSize = Math.min(queue.size(), PULL_LIMIT);
List<TemplateNum> infoList = new ArrayList<>(pullSize);
infoList.add(queue.take());
for (int i = 0; i < pullSize - 1; i++) {
infoList.add(queue.take());
}
Map<String, List<TemplateNum>> templateNumGroup = infoList.stream().collect(Collectors.groupingBy(TemplateNum::getGroup, Collectors.toList()));
for (Map.Entry<String, List<TemplateNum>> entry : templateNumGroup.entrySet()) {
entry.getValue().stream().collect(Collectors.groupingBy(TemplateNum::getTitle, Collectors.counting())).forEach((title, num) -> {
templateTitleService.modifyTemplateNum(entry.getKey(), title, num);
});
}
redissonUtil.unlock(GenericAttribute.LOCK_TEMPLATE_NUMBER);
}
Tools.sleep(300L);
} catch (Exception e) {
log.error("模板数值更新失败:", e);
}
}
}
}
package com.zhiwei.middleware.automatic.server.redis; package com.zhiwei.middleware.automatic.server.redis;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.redisson.api.*; import org.redisson.api.*;
import org.redisson.client.protocol.ScoredEntry;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
@Component @Component
public class RedissonUtil { public class RedissonUtil {
private static final Logger log = LogManager.getLogger(RedissonUtil.class);
/**
* 定义开始时间戳:2022-01-01 00:00:00
*/
private static final long BEGIN_TIMESTAMP = 1640995200L;
/**
* 序列号的位数
*/
private static final int COUNT_BITS = 32;
private static final String MARK_KEY = "auto:mark:"; private static final String MARK_KEY = "auto:mark:";
private final RedissonClient redissonClient; private final RedissonClient redissonClient;
public static final DecimalFormat FORMAT = new DecimalFormat("0000000");
private RedissonUtil(RedissonClient redissonClient) { private RedissonUtil(RedissonClient redissonClient) {
this.redissonClient = redissonClient; this.redissonClient = redissonClient;
} }
//生成全局唯一ID
public String nextId(String keyPrefix) {
RAtomicLong atomicLong = redissonClient.getAtomicLong(Tools.assembleKey(MARK_KEY, keyPrefix));
long count = atomicLong.incrementAndGet();
//1.生成时间戳
LocalDateTime now = LocalDateTime.now();
long nowSecond = now.toEpochSecond(ZoneOffset.UTC);
long timeStamp = nowSecond - BEGIN_TIMESTAMP;
//2.拼接并返回
return String.valueOf(timeStamp << COUNT_BITS | count);
}
/** /**
* redis队列添加数据 * redis队列添加数据
* @param key key * @param key key
* @param value value * @param value value
*/ */
public void putQueue(String key, List<String> value) { public void putQueue(String key, String value) {
RQueue<String> queue = redissonClient.getQueue(redisKey(key)); RQueue<String> queue = redissonClient.getQueue(redisKey(key));
queue.addAll(value); queue.add(value);
} }
/** public void setList(String key, List<String> value) {
* 拉取redis队列数据 RList<Object> list = redissonClient.getList(redisKey(key));
* @param key key list.addAll(value);
* @param limit 条数
* @return value
*/
public List<String> pullQueue(String key, int limit) {
RQueue<String> queue = redissonClient.getQueue(redisKey(key));
return queue.poll(limit);
} }
public Map<String, String> getMapValue(String key) { public Map<String, String> getMapValue(String key) {
return redissonClient.getMap(redisKey(key)); return redissonClient.getMap(redisKey(key));
} }
...@@ -93,442 +47,11 @@ public class RedissonUtil { ...@@ -93,442 +47,11 @@ public class RedissonUtil {
map.put(group, value); map.put(group, value);
} }
public void setMapValue(String key, Map<String, String> data) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
map.putAll(data);
}
/**
* 获取分布式锁
* @param lockKey key
* @param waitTime 等待时间
* @param leaseTime 超时时间
* @param unit 时间格式
* @return boolean
* @throws InterruptedException 线程中断异常
*/
public boolean tryLock(String lockKey, long waitTime, long leaseTime, TimeUnit unit) throws InterruptedException {
RLock lock = redissonClient.getLock(redisKey(lockKey));
return lock.tryLock(waitTime, leaseTime, unit);
}
/**
* 解锁
* @param lockKey key
*/
public void unlock(String lockKey) {
RLock lock = redissonClient.getLock(redisKey(lockKey));
lock.forceUnlock();
}
public void setList(String assembleKey, List<String> gzipWithUploadInfoList) {
RList<Object> list = redissonClient.getList(redisKey(assembleKey));
list.addAll(gzipWithUploadInfoList);
}
public List<String> getList(String redisKey, int start, int end) {
RList<String> list = redissonClient.getList(redisKey(redisKey));
return list.subList(start, end);
}
public List<String> getList(String redisKey) {
return redissonClient.getList(redisKey(redisKey));
}
public void deleteList(String key) {
RList<Object> list = redissonClient.getList(key);
list.delete();
}
public long getListSize(String redisKey) {
RList<String> list = redissonClient.getList(redisKey(redisKey));
return list.size();
}
public <T> void addListMapWithGzip(String key, Map<String, List<T>> listMap) {
if (null == listMap || listMap.isEmpty()) {
return;
}
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rListMultimap);
listMap.forEach((k, v) -> {
List<String> resList = new ArrayList<>(v.size());
for (T t : v) {
resList.add(Tools.gzip(JSONObject.toJSONString(t)));
}
if (null != k) {
rListMultimap.get(k).addAll(resList);
}
});
}
public <T> T getListMapKeySetByScore(String key, String score, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
Double scoreValue = Double.valueOf(score);
List<String> list = new ArrayList<>(
scoredSortedSet.valueRange(scoreValue, true, getPointNext(scoreValue), false));
if (checkIsNull(list)) {
return null;
}
return JSONObject.parseObject(list.get(0), clazz);
}
public Double generateScore(TemplateFatherVo fatherVo, int size) {
String result = new StringBuilder(String.valueOf(size)).append(".")
.append(FORMAT.format(Double.valueOf(fatherVo.getFatherId()))).toString();
fatherVo.setFatherId(result);
fatherVo.setTotalSon(size - 1);
return Double.valueOf(result);
}
public void transferListMapKeySetFromOld2New(String oldKey, String newKey, String score) {
transferScoredSortedListFromOld2New(oldKey + GenericAttribute.KEY_SET, newKey + GenericAttribute.KEY_SET, score);
}
public void transferScoredSortedListFromOld2New(String oldKey, String newKey, String score) {
// 缓存k集合
RScoredSortedSet<String> scoredSortedSetOld = redissonClient.getScoredSortedSet(oldKey);
RScoredSortedSet<String> scoredSortedSetNew = redissonClient.getScoredSortedSet(newKey);
Double scoreValue = Double.valueOf(score);
for (String str : scoredSortedSetOld.valueRange(scoreValue, true, getPointNext(scoreValue), false)) {
scoredSortedSetNew.add(scoreValue, str);
}
scoredSortedSetOld.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
}
public <T> void replaceListMapKeySetByScore(String key, String score, T t) {
Double scoreValue = Double.valueOf(score);
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
expireDefault(scoredSortedSet);
scoredSortedSet.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
scoredSortedSet.add(scoreValue, JSONObject.toJSONString(t));
}
public String getBucket(String assembleKey) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
return (String) bucket.get();
}
public void setBucket(String assembleKey, String str) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
bucket.set(str);
}
public void deleteBucket(String assembleKey) {
RBucket<Object> bucket = redissonClient.getBucket(redisKey(assembleKey));
bucket.delete();
}
public void deleteListByKey(String assembleKey) {
redissonClient.getList(redisKey(assembleKey)).delete();
}
public void deleteListMap(String key) {
redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).delete();
redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).delete();
redissonClient.getList(key + GenericAttribute.NOISE_SET).delete();
}
public void deleteListMapRetainNoiseRule(String key) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
if (scoredSortedSet.size() < 2000) {
for (String str : scoredSortedSet) {
redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).addAsync(str);
}
}
redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).delete();
}
/**
* @param key
* @return
*/
public RFuture<Boolean> deleteByData(String key) {
return redissonClient.getBucket(key).deleteAsync();
}
public RFuture<Boolean> deleteListByData(String key) {
return redissonClient.getList(key).deleteAsync();
}
public List<RFuture<Boolean>> deleteListMapByType(String key) {
List<RFuture<Boolean>> resList = new ArrayList<>();
resList.add(redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).deleteAsync());
resList.add(redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).deleteAsync());
resList.add(redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).deleteAsync());
return resList;
}
public RFuture<Boolean> deleteListMapRetainNoiseRuleByData(String key) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
if (scoredSortedSet.size() < 2000) {
for (String str : scoredSortedSet) {
redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET).addAsync(str);
}
}
return redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).deleteAsync();
}
public void addListByData(String key, List<String> list) {
if (checkIsNull(list)) {
return;
}
RList<String> rList = redissonClient.getList(key);
expireDefault(rList);
rList.addAll(list);
}
public <T> T getStrByData(String key, Class<T> clazz) {
RBucket<String> rBucket = redissonClient.getBucket(key);
String value = rBucket.get();
if (StringUtils.isEmpty(value)) {
return null;
}
return JSONObject.parseObject(value, clazz);
}
public <T> void setStrByData(String key, T t) {
RBucket<String> rBucket = redissonClient.getBucket(key);
expireDefault(rBucket);
rBucket.set(JSONObject.toJSONString(t));
}
public <T> List<T> getListAllWithGunZipByData(String key, Class<T> clazz) {
RList<String> rList = redissonClient.getList(key);
if (checkIsNull(rList)) {
return new ArrayList<>();
}
return gunzipFormat(rList, clazz);
}
public <T> void setListMapAllWithGzipCustom(String key, Map<TemplateFatherVo, List<T>> listMap) {
if (checkIsNull(listMap)) {
return;
}
Map<TemplateFatherVo, List<String>> preMap = new HashMap<>(listMap.size());
listMap.forEach((k, list) -> {
preMap.put(k, gzipFormat(list));
});
setListMapAllCustom(key, preMap);
}
public void addScoredSortedList(String key, Map<String, Double> map) {
if (checkIsNull(map)) {
return;
}
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
expireDefault(scoredSortedSet);
scoredSortedSet.addAll(map);
}
public void setListMapAllCustom(String key, Map<TemplateFatherVo, List<String>> listMap) {
if (null == listMap || listMap.isEmpty()) {
return;
}
Map<String, Double> perMap = new HashMap<>(listMap.size());
listMap.forEach((fatherVo, list) -> {
Double score = generateScore(fatherVo, list.size());
perMap.put(JSONObject.toJSONString(fatherVo), score);
});
// 缓存K集合
addScoredSortedList(key + GenericAttribute.KEY_SET, perMap);
// 缓存V集合
RListMultimap<String, String> rMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
listMap.forEach((k, v) -> {
// list拆分
final int limit = 1000;
int total = v.size();
if (total <= limit) {
rMultimap.putAll(k.getFatherId(), v);
} else {
// 截取list分批次缓存
int count = (total + limit - 1) / limit;
for (int i = 0; i < count; i++) {
int start = i * limit;
int end = (i + 1) * limit;
rMultimap.putAll(k.getFatherId(), v.subList(start, end > total ? total : end));
}
}
});
}
public <T> List<T> getListMapValueByFieldWithGunZipByData(String key, String fieldKey, Class<T> clazz) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
List<String> list = rListMultimap.getAll(fieldKey);
return gunzipFormat(list, clazz);
}
public void addListMapKeyByData(String key, String score, Object obj) {
redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET).add(Double.valueOf(score), JSONObject.toJSONString(obj));
expireDefault(redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET));
}
public void addListMapValueOneByData(String key, String fieldKey, JSONObject json) {
// 缓存V集合
RListMultimap<String, String> rMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rMultimap);
rMultimap.put(fieldKey, Tools.gzip(JSONObject.toJSONString(json)));
}
public void reduceListMapKeyByScoreCustomByData(String key, String score) {
RScoredSortedSet<String> rScoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.KEY_SET);
expireDefault(rScoredSortedSet);
TemplateFatherVo fatherVo = removeListMapKeySetByScore(key, score, TemplateFatherVo.class);
fatherVo.setTotalSon(fatherVo.getTotalSon() - 1);
rScoredSortedSet.add(Double.valueOf(score), JSONObject.toJSONString(fatherVo));
}
public <T> List<T> getListMapKeySet(String key, int fromIndex, int toIndex, Class<T> clazz) {
return getScoredSortedList(key + GenericAttribute.KEY_SET, fromIndex, toIndex, clazz);
}
public <T> List<T> getListMapNoiseRule(String key, Class<T> clazz) {
RSortedSet<String> sortedSet = redissonClient.getSortedSet(key + GenericAttribute.NOISE_SET);
if (null == sortedSet) {
return null;
}
List<T> res = new ArrayList<>(sortedSet.size());
for (String text : sortedSet) {
res.add(JSONObject.parseObject(text, clazz));
}
return res;
}
public int getScoredSortedListSize(String key) {
return redissonClient.getScoredSortedSet(key).size();
}
public int getListMapValueByFieldSize(String key, String fieldKey) {
return redissonClient.getListMultimap(key + GenericAttribute.MAP_SET).get(fieldKey).size();
}
public <T> List<T> getRangeListMapValueByFieldWithGunZip(String key, String fieldKey, int fromIndex, int toIndex,
Class<T> clazz) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
List<String> list = rListMultimap.get(fieldKey).range(fromIndex, toIndex);
return gunzipFormat(list, clazz);
}
public int getListMapKeySetSize(String key) {
return getScoredSortedListSize(key + GenericAttribute.KEY_SET);
}
public <T> void setListMapValueByFieldWithGZipByData(String key, String fieldKey, List<T> list) {
// 缓存K集合
RListMultimap<String, String> rListMultimap = redissonClient.getListMultimap(key + GenericAttribute.MAP_SET);
expireDefault(rListMultimap);
rListMultimap.replaceValues(fieldKey, gzipFormat(list));
}
public void listExpirable(String key, long time) {
redissonClient.getList(redisKey(key)).expire(time, TimeUnit.MINUTES);
}
private <T> List<String> gzipFormat(List<T> list) {
if (checkIsNull(list)) {
return new ArrayList<>();
}
List<String> res = new ArrayList<>(list.size());
list.forEach(t -> {
res.add(Tools.gzip(JSONObject.toJSONString(t)));
});
return res;
}
public <T> List<T> getScoredSortedList(String key, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
List<T> res = new ArrayList<>(scoredSortedSet.size());
for (String str : scoredSortedSet) {
try {
T t = JSONObject.parseObject(str, clazz);
res.add(t);
} catch (Exception e) {
log.error("parseObject-", e);
}
}
return res;
}
public <T> List<T> getScoredSortedList(String key, int fromIndex, int toIndex, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key);
List<T> res = new ArrayList<>(scoredSortedSet.size());
for (ScoredEntry<String> entry : scoredSortedSet.entryRangeReversed(fromIndex, toIndex)) {
try {
res.add(JSONObject.parseObject(entry.getValue(), clazz));
} catch (Exception e) {
log.error("getScoredSortedList-parseJSONObject出错,value:{}", entry.getValue(),e);
}
}
return res;
}
private <T> T removeListMapKeySetByScore(String key, String score, Class<T> clazz) {
RScoredSortedSet<String> scoredSortedSet = redissonClient.getScoredSortedSet(key + GenericAttribute.MAP_SET);
Double scoreValue = Double.valueOf(score);
List<String> list = new ArrayList<>(
scoredSortedSet.valueRange(scoreValue, true, getPointNext(scoreValue), false));
scoredSortedSet.removeRangeByScore(scoreValue, true, getPointNext(scoreValue), false);
if (checkIsNull(list)) {
return null;
}
return JSONObject.parseObject(list.get(0), clazz);
}
private <T> List<T> gunzipFormat(List<String> list, Class<T> clazz) {
if (checkIsNull(list)) {
return new ArrayList<>();
}
List<T> res = new ArrayList<>(list.size());
list.forEach(compressedStr -> {
res.add(JSONObject.parseObject(Tools.gunzip(compressedStr), clazz));
});
return res;
}
private Double getPointNext(Double scoreValue) {
try {
String[] values = scoreValue.toString().split("\\.");
String a = values[1];
if (a.length() < 7) {
StringBuilder sb = new StringBuilder(a);
while (sb.length() < 7) {
sb.append("0");
}
a = sb.toString();
}
Integer b = FORMAT.parse(a).intValue() + 1;
String c = new StringBuilder(values[0]).append(".").append(FORMAT.format(b)).toString();
return Double.valueOf(c);
} catch (ParseException e) {
log.error(e);
}
return 0.0;
}
private String redisKey(String key) { private String redisKey(String key) {
return MARK_KEY + key; return MARK_KEY + key;
} }
/* public Long getCount(String key) {
* 默认超时时间8天 return redissonClient.getAtomicLong(redisKey(key)).get();
*/
private void expireDefault(RExpirable rExpirable) {
rExpirable.expire(8, TimeUnit.DAYS);
}
private <T> boolean checkIsNull(List<T> list) {
return null == list || list.isEmpty();
}
private <K, T> boolean checkIsNull(Map<K, T> map) {
return null == map || map.isEmpty();
} }
} }
package com.zhiwei.middleware.automatic.server.service; package com.zhiwei.middleware.automatic.server.service;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
public interface AutoService { public interface AutoService {
/**
*/
void asyncAutoMark();
/**
* 异步自动标注 多项目
*/
void autoMarkMulti();
/**
* 噪音聚合自动标注
* @param templateFatherVos 噪音模板
* @param group 项目
* @param field 字段
* @return 标注数量
*/
int noiseAutoMark(Set<TemplateFatherVo> templateFatherVos, String group, String field);
/**
* 事件数据自动标注
* @param group 项目
* @param data 数据集
*/
void autMarkByEvent(String group, List<MarkInfo> data);
} }
package com.zhiwei.middleware.automatic.server.service; package com.zhiwei.middleware.automatic.server.service;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord; import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -9,12 +8,6 @@ import java.util.Map; ...@@ -9,12 +8,6 @@ import java.util.Map;
public interface TemplateTitleService { public interface TemplateTitleService {
/** /**
* 自动聚合模板
* @param groups 项目集
*/
void schedulerHourAggregation(List<String> groups, Long startTime, Long endTime);
/**
* 获取项目文本模板 * 获取项目文本模板
* @param project 项目 * @param project 项目
* @return 模板集 * @return 模板集
...@@ -22,29 +15,6 @@ public interface TemplateTitleService { ...@@ -22,29 +15,6 @@ public interface TemplateTitleService {
Map<String, TemplateTitleVo> getTemplateTitleByProject(String project); Map<String, TemplateTitleVo> getTemplateTitleByProject(String project);
/** /**
* 添加项目文本模板
* @param project 项目
* @param vos 模板集
* @return 模板集数量
*/
void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 修改模板计数
* @param group 项目
*/
void modifyTemplateNum(String group, String title, Long num);
/**
* 根据模板标题获取数据(仅最新100条) * 根据模板标题获取数据(仅最新100条)
* *
* @param group 项目 * @param group 项目
...@@ -54,12 +24,6 @@ public interface TemplateTitleService { ...@@ -54,12 +24,6 @@ public interface TemplateTitleService {
List<String> getMupdateByTemplateTitle(String group, String templateTitle); List<String> getMupdateByTemplateTitle(String group, String templateTitle);
/** /**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 根据标题和特征值尝试搜索模板标题 * 根据标题和特征值尝试搜索模板标题
* *
* @param group 项目 * @param group 项目
...@@ -77,13 +41,4 @@ public interface TemplateTitleService { ...@@ -77,13 +41,4 @@ public interface TemplateTitleService {
* @return 返回值 * @return 返回值
*/ */
Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
} }
package com.zhiwei.middleware.automatic.server.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadRule;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import java.util.Map;
public interface UploadService {
/**
* 添加源数据集
*
* @param group 项目
* @param id 任务id
*/
void addUploadList(String group, String id, String sourceStr);
/**
* 启动上传
*/
void startUpload(MarkUploadRule markUploadRule);
/**
* 获取上传状态(进度)
*
* @param group 项目
* @param id 任务id
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadStatus(String group, String id);
/**
* 获取UploadType数据集
*
* @param group 项目
* @param id 任务id
* @param page page
* @param size size
* @param isAsc 排序
* @param searchField 查询字段
* @param keyword 关键字
* @param uploadType 上传类型
* @return
*
* @return Map<String,Object>
*/
Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc,
String searchField, String keyword, UploadInfo.UploadType uploadType);
/**
* 获取DataType
*
* @param json
* @param typeB
*
* @return DataType
*/
UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB);
/**
* 清理数据集
*
* @param group
* @param id
*
* @return void
*/
void cleanUploadResult(String group, String id);
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import java.util.List;
public class BaseTaskHandler {
private final RedissonUtil redissonUtil;
private final AggreeTaskType aggreeTaskType;
private static final String SOURCE_KEY = "source";
private static final String TASK_KEY = "task";
public BaseTaskHandler(RedissonUtil redissonUtil, AggreeTaskType aggreeTaskType) {
this.redissonUtil = redissonUtil;
this.aggreeTaskType = aggreeTaskType;
}
public String getKeyPrefix() {
return aggreeTaskType.getKeyPrefix();
}
/**
* 获取聚合任务
* @param group 项目
* @param id id
* @return 聚合任务
*/
public AggreInfo getAggreeTask(String group, String id) {
return JSONObject.parseObject(redissonUtil.getBucket(getTaskKey(group, id))).toJavaObject(AggreInfo.class);
}
/**
* 添加聚合任务
* @param group 项目
* @param id id
* @param aggreInfo 聚合任务
*/
public void addAggreeTask(String group, String id, AggreInfo aggreInfo) {
redissonUtil.setBucket(getTaskKey(group, id), JSONObject.toJSONString(aggreInfo));
}
/**
* 添加数据集
* @param group 项目
* @param id id
* @param dataSource 数据集
*/
public void addDataSource(String group, String id, List<String> dataSource) {
redissonUtil.setList(getSourceKey(group, id), dataSource);
}
/**
* 获取数据集
* @param group 项目
* @param id id
* @return 数据集
*/
public List<String> getDataSource(String group, String id) {
return redissonUtil.getList(getSourceKey(group, id));
}
public long getDataSourceSize(String group, String id) {
return redissonUtil.getListSize(getSourceKey(group, id));
}
public void dataSourceExpirable(String group, String id) {
redissonUtil.listExpirable(getSourceKey(group, id), 30);
}
/**
* 删除该任务得所有信息
* @param group 项目
* @param id id
*/
public void removerInfo(String group, String id) {
// 数据集
redissonUtil.deleteList(getSourceKey(group, id));
redissonUtil.deleteBucket(getTaskKey(group, id));
}
private String getSourceKey(String group, String id) {
return Tools.assembleKey(aggreeTaskType.getKeyPrefix(), SOURCE_KEY, group, id);
}
private String getTaskKey(String group, String id) {
return Tools.assembleKey(aggreeTaskType.getKeyPrefix(), TASK_KEY, group, id);
}
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Component;
import java.util.List;
/**
*
* @ClassName: KafkaSendHandler
* @Description: kafka发送消息
* @author shenjunjie
* @date 2019年8月29日 下午11:09:30
*/
@Component
@EnableKafka
public class KafkaSendHandler {
private static final Logger logger = LogManager.getLogger(KafkaSendHandler.class);
@Autowired
private KafkaTemplate<String, Object> kafkaTemplate;
@Value("${crawler.topic}")
private String topic;
public void insertDataByMarkInfo(List<MarkInfo> list) {
list.forEach(markInfo -> {
insertData(markInfo.getSourceObj());
});
logger.info("Kafka发送消息{}条", list.size());
}
public void insertData(JSONObject json) {
String cname = json.getString("cname");
try {
while (!syncSendKafkaMsg(topic, cname, json)) {
logger.error("Kafka消息发送{}失败,立即重试...", cname);
}
} catch (Exception e) {
logger.error("Kafka发送消息异常,等待3s后重试...");
}
}
/**
* 同步发送kafka消息
*
* @param topic
* kafka 主题
* @param msgType
* 消息类型
* @param list
* 待发送数据
* @return
*/
private boolean syncSendKafkaMsg(String topic, String msgType, JSONObject json) {
Boolean[] isSuccesses = new Boolean[] { true };
// 2019/7/11 11:13 value由list调整为string
kafkaTemplate.send(topic, msgType, json.toJSONString()).addCallback((success) -> {
isSuccesses[0] = false;
}, (failure) -> {
logger.error("KafkaSendFailure", failure);
});
return isSuccesses[0];
}
}
package com.zhiwei.middleware.automatic.server.service.handler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeCache;
import com.zhiwei.middleware.automatic.server.pojo.dto.AggreeDTO;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.utils.BasicUtil;
import com.zhiwei.nlp.vo.KResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import com.zhiwei.middleware.automatic.server.pojo.CommonAggreeResult.ResultInfo;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service
public class TextHandlerService extends BaseTaskHandler {
private static final Logger log = LogManager.getLogger(TextHandlerService.class);
/** 默认规格标准0.1 **/
private static final double DEFAULT_LIMIT = 0.1;
/** 单个聚合任务不得超出15W条 **/
private static final int AGGREE_COUNT_LIMIT = 150000;
private static final String TEXT_RES = "textRes";
/* 启动聚合处理线程池 */
private final ThreadPoolTaskExecutor START_SERVICE;
private final RedissonUtil redissonUtil;
public TextHandlerService(RedissonUtil redissonUtil,
@Qualifier("aggreeExecutor") ThreadPoolTaskExecutor aggreeExecutor) {
super(redissonUtil, AggreeTaskType.COMMON);
this.redissonUtil = redissonUtil;
this.START_SERVICE = aggreeExecutor;
}
/**
* 获取订单id
*
* @return String
*/
public String generateAggreeOrder() {
// 生成聚合任务订单
return redissonUtil.nextId(GenericAttribute.KEY_INCREMENT);
}
public boolean appendAggreeOrderNew(String id, List<AggreeDTO> list) {
long listSize = getDataSourceSize(null, id);
if (!checkLimit((int) listSize)) {
log.info("id:{},聚合任务超出上限:{},预期值:{}", id, AGGREE_COUNT_LIMIT, list.size() + listSize);
return false;
}
addDataSource(null, id, list.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
log.info("id:{},聚合任务添加{}条", id, list.size());
return true;
}
public boolean startAggree(String id) {
return startAggree(id, DEFAULT_LIMIT);
}
/**
* 开启聚合
*
* @param id 任务id
* @param limit limit
*
* @return boolean
*/
public boolean startAggree(String id, double limit) {
List<ResultInfo> res = new ArrayList<>();
List<String> source = getDataSource(null, id);
if (null == source) {
return false;
}
if (START_SERVICE.getActiveCount() == 10) {
return false;
}
START_SERVICE.execute(() -> {
log.info("id:{},开始聚合任务", id);
Map<String, AggreeDTO> dataGroup = source.stream()
.map(e -> JSONObject.parseObject(e).toJavaObject(AggreeDTO.class))
.collect(Collectors.toMap(AggreeDTO::getId, dto -> dto));
// 添加统一简体处理
List<KResult<String>> kResultList = AggreeBootStarter.getKResult(
dataGroup.values().stream().collect(Collectors.toMap(AggreeDTO::getId, AggreeDTO::getText)), limit);
// 添加聚合结果
kResultList.forEach(result -> res.add(packageResultInfo(result, dataGroup)));
// 按照聚合量级倒叙排序
res.sort((a, b) -> (b.getSize() - a.getSize()));
// 结果缓存
String resKey = Tools.assembleKey(TEXT_RES, id);
redissonUtil.setList(resKey, res.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
dataSourceExpirable(null, id);
redissonUtil.listExpirable(resKey, 30);
log.info("id:{},聚合任务结束,缓存已生成", id);
});
return true;
}
/**
* 获取聚合结果new
*
* @param id 任务id
*
* @return Map<String,List<AggreeInfo>>
*/
public CommonAggreeCache getAggreeResult(String id) {
CommonAggreeCache cache = new CommonAggreeCache(id);
cache.setResults(redissonUtil.getList(Tools.assembleKey(TEXT_RES, id))
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(ResultInfo.class)).collect(Collectors.toList()));
cache.setData(getDataSource(null, id)
.stream()
.map(e -> JSONObject.parseObject(e).toJavaObject(AggreeDTO.class))
.collect(Collectors.toMap(AggreeDTO::getId, dto -> dto)));
return cache;
}
private ResultInfo packageResultInfo(KResult<String> result, Map<String, AggreeDTO> sourceMap) {
List<String> indexes = result.getDataPoints();
AggreeDTO templateData;
List<String> hitList = indexes.stream().filter(index -> BasicUtil
.textComparisonByAggreeConfig(result.getClusterName(), sourceMap.get(index).getText()))
.collect(Collectors.toList());
if (hitList.isEmpty()) {
// 未命中重要渠道选择第一条数据
log.info("未命中重要渠道选择第一条数据");
templateData = sourceMap.get(indexes.get(0));
} else {
templateData = sourceMap.get(hitList.get(0));
}
return new ResultInfo(result.getClusterName(), indexes.size(),
sourceMap.values().stream().filter(dto -> indexes.contains(dto.getId())).collect(Collectors.toList()),
templateData);
}
/**
* 校验单个任务是否超过上限
*
* @param size size
* @return boolean
*/
private boolean checkLimit(int size) {
return AGGREE_COUNT_LIMIT > size;
}
}
package com.zhiwei.middleware.automatic.server.service.impl; package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateNum;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.queue.TemplateNumQueue;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService; import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
@Service @Service
public class AutoServiceImpl implements AutoService { public class AutoServiceImpl implements AutoService {
private static final Logger log = LogManager.getLogger(AutoServiceImpl.class); private static final Logger log = LogManager.getLogger(AutoServiceImpl.class);
private final RedissonUtil redissonUtil;
private final DubboHandler dubboHandler;
private final TemplateTitleService templateTitleService;
private final ThreadPoolTaskExecutor autoMarkExecutor;
private final TemplateNumQueue templateNumQueue;
public AutoServiceImpl(RedissonUtil redissonUtil,
DubboHandler dubboHandler, TemplateTitleService templateTitleService,
TemplateNumQueue templateNumQueue,
@Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) {
this.redissonUtil = redissonUtil;
this.dubboHandler = dubboHandler;
this.templateTitleService = templateTitleService;
this.templateNumQueue = templateNumQueue;
this.autoMarkExecutor = autoMarkExecutor;
}
@Override
public void asyncAutoMark() {
List<String> infos = redissonUtil.pullQueue(GenericAttribute.REDIS_QUEUE_ONE_KEY, GenericAttribute.REDIS_QUEUE_LIMIT);
if (Tools.isEmpty(infos)) {
return;
}
List<MarkInfo> data = infos.stream()
.map(e -> {
try {
JSONObject jsonObject = JSONObject.parseObject(e);
return jsonObject.toJavaObject(MarkInfo.class);
} catch (Exception exception) {
log.error("单项目自动标注失败,json转换异常,原数据:{}", e);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (data.isEmpty()) {
log.info("异步单项目自动标注,本次拉取数据为0");
return;
}
// 分组自动化标注
Map<String, List<MarkInfo>> groupMap = data.stream()
.collect(Collectors.groupingBy(markInfo -> {
try {
return markInfo.getSourceObj().getString("mgroup");
} catch (Exception e) {
return "";
}
}));
projectAutoMark(groupMap);
}
@Override
public void autoMarkMulti() {
List<MarkInfoMulti> data = redissonUtil.pullQueue(GenericAttribute.REDIS_QUEUE_MULTI_KEY, GenericAttribute.REDIS_QUEUE_LIMIT)
.stream()
.map(e -> {
try {
return JSONObject.parseObject(e).toJavaObject(MarkInfoMulti.class);
} catch (Exception exception) {
log.error("多项目自动标注失败,json转换异常,原数据:{}", e, exception);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (data.isEmpty()) {
log.info("异步多项目自动标注,本次拉取数据为0");
return;
}
Map<String, List<MarkInfo>> groupMap = new HashMap<>();
data.forEach(markInfoMulti -> {
// 标注信息,未正确填充mgroup
JSONObject example = markInfoMulti.getMarkInfo().getSourceObj();
markInfoMulti.getProjects().forEach(project -> groupMap.compute(project, (k, v) -> {
v = null == v ? new ArrayList<>() : v;
// 调整mgroup
example.put("mgroup", project);
v.add(MarkInfoUtil.transformToMarkInfo(example));
return v;
}));
});
projectAutoMark(groupMap);
}
@Override
public int noiseAutoMark(Set<TemplateFatherVo> templateFatherVos, String group, String field) {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProject(group);
int count = 0;
for (TemplateFatherVo vo : templateFatherVos) {
String title = vo.getExample().getString(field);
Map<String, Object> map = similarMapInfo(template, title, group);
if (Tools.isEmpty(map)) {
continue;
}
autoInfo(vo.getExample());
// 填充数据
String aggreTitle = String.valueOf(map.get("aggreeTitle"));
TemplateTitleVo templateTitleVo = template.get(aggreTitle);
vo.getExample().put(GenericAttribute.ES_M_TAG, templateTitleVo.getMtag());
count++;
}
return count;
}
@Override
public void autMarkByEvent(String group, List<MarkInfo> data) {
try {
asyncTitleMark(group, data, templateTitleService.getTemplateTitleByProject(group));
} catch (Exception e) {
log.error("事件自动标注出错group:{},:", group, e);
}
}
/**
* 项目自动标注
* @param groupMap 数据集 项目名分类
*/
private void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) {
try {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProject(entry.getKey());
asyncTitleMark(entry.getKey(), entry.getValue(), template);
} catch (Exception e) {
log.error("自动标注处理失败,项目:{}", entry.getKey(), e);
}
}
}
/**
* 异步等待
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
* @throws Exception 异步异常
*/
private void asyncTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) throws Exception {
List<List<MarkInfo>> splitList = Tools.spilt(markInfos, 1000);
emptyTemplate(titleVoMap);
List<CompletableFuture<Void>> futures = splitList.stream()
.map(e -> CompletableFuture.runAsync(() -> oneTitleMark(group, e, titleVoMap), autoMarkExecutor)).collect(Collectors.toList());
CompletableFuture<Void> allOf = CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
// 获取返回值
allOf.thenApply(e -> futures.stream().map(CompletableFuture::join)).get();
}
/**
* 自动标注
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
*/
private void oneTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) {
// 移除标题长度小于6的部分
List<MarkInfo> newList = markInfos.stream().filter(markInfo -> {
String title = markInfo.getSourceObj().getString(GenericAttribute.ES_TITLE);
return null != title && title.length() > 6;
}).collect(Collectors.toList());
for (MarkInfo markInfo : newList) {
JSONObject sourceObj = markInfo.getSourceObj();
String title = Tools.filterSymbol(sourceObj.getString(GenericAttribute.ES_TITLE));
Map<String, Object> similarMap = similarMapInfo(titleVoMap, title, group);
if (!similarMap.isEmpty()) {
// 填充数据
String aggreTitle = String.valueOf(similarMap.get("aggreTitle"));
TemplateTitleVo templateTitleVo = titleVoMap.get(aggreTitle);
String aggreTag = templateTitleVo.getMtag();
sourceObj.put(GenericAttribute.ES_M_TAG, aggreTag);
sourceObj.put(GenericAttribute.ES_M_PERSON, "自动化机器人");
sourceObj.put(GenericAttribute.ES_M_TIME, new Date().getTime());
log.info("模板标题:{} MarkSum:{} Tag:{}被标注标题:{}相似度:{}", aggreTitle, templateTitleVo.getMarkSum(), aggreTag,
title, similarMap.get("similar"));
// 刷新一下标注量和标注时间,
try {
String[] updates = dubboHandler.getMupdates(markInfo.filterInfo());
templateTitleVo.refreshMark();
templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), updates[0]));
templateNumQueue.put(new TemplateNum(templateTitleVo.getTemplateTitle(), group));
} catch (Exception e) {
log.error("记录事件采集-标注数据特征值失败", e);
}
}
}
}
private Map<String, Object> similarMapInfo(Map<String, TemplateTitleVo> titleVoMap, String title, String group) {
// 相似度最高的模板信息
Map<String, Object> similarMap = new HashMap<>();
for (TemplateTitleVo templateTitleVo : titleVoMap.values()) {
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
// 过滤掉以重置的模板
if (templateTitleVo.getStatus() == TemplateStatus.已重置 || Tools.isEmpty(templateTitleVo.getMtag())) {
continue;
}
String aggreTitle = templateTitleVo.getTemplateTitle();
// 过滤掉标题里面的特殊符号
double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title);
double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0;
// 选取相似度最大的标注
if (similar >= GenericAttribute.SIMILAR_STANDARD && similar > currentSimilar) {
similarMap.put("similar", similar);
similarMap.put("aggreeTitle", aggreTitle);
}
}
return similarMap;
}
private void autoInfo(JSONObject json) {
json.put(GenericAttribute.ES_M_PERSON, "自动化机器人");
json.put(GenericAttribute.ES_M_TIME, new Date().getTime());
}
/**
* 重置模板计数
* @param titleVoMap 模板集
*/
private void emptyTemplate(Map<String, TemplateTitleVo> titleVoMap) {
for (Map.Entry<String, TemplateTitleVo> entry : titleVoMap.entrySet()) {
entry.getValue().emptyNum();
}
}
} }
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.pojo.enums.Fields;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.handler.BaseTaskHandler;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.DataCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.middleware.mark.vo.QueryResult;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import io.micrometer.core.instrument.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.redisson.api.RFuture;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import static com.zhiwei.middleware.automatic.server.config.GenericAttribute.SON_ID;
import static com.zhiwei.middleware.automatic.server.pojo.enums.Fields.getFields;
import static com.zhiwei.middleware.automatic.server.util.Tools.assembleKey;
import static com.zhiwei.middleware.automatic.server.util.Tools.cutKeyword;
/**
* @ClassName: DataUploadVo
* @Description: 数据采集
* @author SJJ
* @date 2020年4月8日 下午6:33:15
*/
@Component
public class DataCollection extends BaseTaskHandler {
private static final Logger logger = LogManager.getLogger(DataCollection.class);
private static final String VAILD = "VAILD";
private static final String NOISE = "NOISE";
private static final String STATUS = "STATUS";
private static final double SIMILAR_STANDARD = 0.8;
/* 启动聚合处理线程池 */
private final ThreadPoolTaskExecutor START_SERVICE;
private final AutoService autoService;
private final DubboHandler dubboHandler;
private final RedissonUtil redissonUtil;
private final TypeB[] typeBs = new TypeB[] { TypeB.COMPLETE, TypeB.INCOMPLETE, TypeB.QA, TypeB.VIDEO };
public DataCollection(AutoService autoService, DubboHandler dubboHandler, RedissonUtil redissonUtil,
@Qualifier("aggreeNoiseExecutor")ThreadPoolTaskExecutor aggreeNoiseExecutor) {
super(redissonUtil, AggreeTaskType.DATA);
this.autoService = autoService;
this.dubboHandler = dubboHandler;
this.redissonUtil = redissonUtil;
this.START_SERVICE = aggreeNoiseExecutor;
}
public void cleanCache(String group, String id) {
try {
long s = System.currentTimeMillis();
List<RFuture<Boolean>> futureList = new ArrayList<>();
removerInfo(group, id);
for (TypeB typeB : typeBs) {
futureList.addAll(redissonUtil.deleteListMapByType(vaildKey(group, id, typeB)));
futureList.add(redissonUtil.deleteByData(vaildStatusKey(group, id, typeB)));
futureList.addAll(redissonUtil.deleteListMapByType(noiseKey(group, id, typeB)));
}
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[futureList.size()])).get();
logger.info("id:{}-清理缓存完毕!耗时:{}ms", id, System.currentTimeMillis() - s);
} catch (Exception e) {
logger.info("id:{}-清理缓存出错,e", id, e);
}
}
public void cleanCacheExceptNoise(String group, String id) {
try {
long s = System.currentTimeMillis();
List<RFuture<Boolean>> futureList = new ArrayList<>();
removerInfo(group, id);
for (TypeB typeB : typeBs) {
futureList.addAll(redissonUtil.deleteListMapByType(vaildKey(group, id, typeB)));
futureList.add(redissonUtil.deleteByData(vaildStatusKey(group, id, typeB)));
futureList.add(redissonUtil.deleteListMapRetainNoiseRuleByData(noiseKey(group, id, typeB)));
}
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[futureList.size()])).get();
logger.info("id:{}-清理缓存(保留噪音部分)完毕!耗时:{}ms", id, System.currentTimeMillis() - s);
} catch (Exception e) {
logger.info("id:{}-清理缓存(保留噪音部分)完毕出错,e", id, e);
}
}
public void addDataCollection(String group, String id, List<String> compressedlist) {
try {
if (null == compressedlist || compressedlist.isEmpty()) {
logger.info("id:{}-传入数据源为null或empty!", id);
return;
}
addDataSource(group, id, compressedlist);
} catch (Exception e) {
logger.error("addDataCollection-", e);
}
}
public void startAggree(String group, String id, String highWords) {
logger.info("DEBUG-group:{},id:{}", group, id);
START_SERVICE.execute(() -> {
AggreInfo aggreInfo = new AggreInfo(false, false);
try {
if (null != getAggreeTask(group, id)) {
logger.info("id:{}-正在聚合请等待...", id);
return;
}
// 获得全部源数据
List<JSONObject> sourceList = getDataSource(group, id).stream().map(e -> JSONObject.parseObject(Tools.gunzip(e))).collect(Collectors.toList());
// List<JSONObject> sourceList = redissonUtil.getListAllWithGunZipByData(sourceKey(group, id), JSONObject.class);
// 过滤指定C5的数据
sourceList = sourceList.stream().filter(json -> 143657937 != json.getLong("c5"))
.collect(Collectors.toList());
if (sourceList.isEmpty()) {
logger.info("id:{}-缓存数据源未准备!", id);
return;
}
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.info("id:{}启动聚合...", id);
AtomicInteger noiseTotal = new AtomicInteger();
AtomicInteger markedTotal = new AtomicInteger();
AtomicInteger titleFatherTotal = new AtomicInteger();
// 区分数据格式并处理
divid(sourceList).forEach((typeB, data) -> {
AggreeHandler handler = new AggreeHandler(data, id, group, highWords);
// 根据噪音规则列表剔除数据
int noiseCount = handler.reduceByNoise();
// 聚合
Map<TemplateFatherVo, List<JSONObject>> aggreMap = handler.aggree();
// 聚合集的是否已有标注标签(通过调用标注接口的方式)
handler.checkHasMarked(aggreMap);
// 对sourceList针对现有自动标注聚合集作聚合及自动标注
int markCount = handler.autoMarkByCurrentCollection(aggreMap.keySet());
// 数据统计部分
handler.statisticsCache(markCount, aggreMap.size());
noiseTotal.addAndGet(noiseCount);
markedTotal.addAndGet(markCount);
titleFatherTotal.addAndGet(aggreMap.size());
// 数据缓存
redissonUtil.setListMapAllWithGzipCustom(vaildKey(group, id, typeB), aggreMap);
});
// 更新信息并同步redis
aggreInfo.setAll(true, false, sourceList.size(), noiseTotal.get(), titleFatherTotal.get(),
markedTotal.get());
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.info("id:{}-数据采集聚合完成 ", id);
} catch (Exception e) {
aggreInfo.setAggreFinshed(null);
addAggreeTask(group, id, aggreInfo);
// redissonUtil.setStrByData(statusKey(group, id), aggreInfo);
logger.error("id:{}-数据采集聚合异常", id, e);
}
});
}
public boolean batchModifyFatherTag(String group, String id, List<String> fatherIds, String mtag, String mperson,
TypeB typeB) {
if (null == fatherIds || fatherIds.isEmpty()) {
return false;
}
boolean res = true;
for (String fatherId : fatherIds) {
res = res && modifyFatherTag(group, id, fatherId, mtag, mperson, typeB);
}
return res;
}
public boolean modifyFatherTag(String group, String id, String fatherId, String mtag, String mperson, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
TemplateFatherVo fatherVo = getTemplateFatherVo(vaildKey, fatherId);
if (null == fatherVo) {
logger.info("id:{},fatherId:{}-父聚模板不存在!", id, fatherId);
return true;
}
Fields fields = getFields(typeB);
JSONObject example = fatherVo.getExample();
String originMtag = example.getString(fields.mtag);
String title = example.getString(fields.title);
example.put(fields.mtag, mtag);
example.put(fields.mtime, System.currentTimeMillis());
example.put(fields.mperson, mperson);
redissonUtil.replaceListMapKeySetByScore(vaildKey, fatherId, fatherVo);
logger.info("id:{}-修改父模板标题{}条,oldMtag:{},newMtag:{},模板标题:{},标注人:{}", id, fatherVo.getTotalSon() + 1,
originMtag, mtag, title, mperson);
} catch (Exception e) {
logger.error("modifyFatherTag-", e);
return false;
}
return true;
}
public boolean modifySonTag(String group, String id, String fatherId, String sonId, String mtag, String mperson,
TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
TemplateFatherVo templateFatherVo = getTemplateFatherVo(vaildKey, fatherId);
List<JSONObject> list = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildKey, fatherId, JSONObject.class);
if (null == templateFatherVo || list.isEmpty()) {
logger.info("id:{},fatherId:{}-父聚合集不存在!", id, fatherId);
return true;
}
Fields fields = getFields(typeB);
// 标准比对标签
JSONObject templateVo = templateFatherVo.getExample();
String standardMtag = templateVo.getString(fields.mtag);
if (mtag.equals(standardMtag)) {
logger.info("id:{}-待修改子标签与父标签一致!mtag:{}", id, mtag);
return true;
}
JSONObject hitJSON = null;
for (int i = 0; i < list.size(); i++) {
JSONObject json = list.get(i);
// sonId比对找到该条数据
if (sonId.equals(json.getString(SON_ID))) {
json.put(fields.mtag, mtag);
json.put(fields.mtime, System.currentTimeMillis());
json.put(fields.mperson, mperson);
hitJSON = json;
list.remove(i);
break;
}
}
if (null != hitJSON) {
JSONObject status = getStatus(vaildStatusKey(group, id, typeB));
Integer titleFatherCount = status.getInteger(AggreeHandler.TITLE_FATHER_COUNT);
// 新建模板标题
TemplateFatherVo fatherVo = getNewInstance(hitJSON, fields, 1, String.valueOf(titleFatherCount + 1));
// 添加K映射
redissonUtil.addListMapKeyByData(vaildKey, fatherVo.getFatherId(), fatherVo);
// 添加V映射
redissonUtil.addListMapValueOneByData(vaildKey, fatherVo.getFatherId(), fatherVo.getExample());
// 修改当前status
statusIncrease(vaildStatusKey(group, id, typeB), status, 0);
// 修改旧K值
redissonUtil.reduceListMapKeyByScoreCustomByData(vaildKey, fatherId);
// 修改原先对应数据集
redissonUtil.setListMapValueByFieldWithGZipByData(vaildKey, fatherId, list);
logger.info("id:{}-修改子模板标题并新建父集:{},oldMtag:{},newMtag:{}", id, hitJSON.getString(fields.title),
standardMtag, mtag);
} else {
logger.info("id:{}-未找到子模板标题,fatherId:{},sonId:{}", id, fatherId, sonId);
}
} catch (Exception e) {
logger.error("modifySonTag-", e);
return false;
}
return true;
}
public boolean batchThrowIntoNoise(String group, String id, List<String> fatherIds, TypeB typeB) {
try {
List<CompletableFuture<Boolean>> results = fatherIds.stream()
.map(fatherId -> CompletableFuture.supplyAsync(() -> throwIntoNoise(group, id, fatherId, typeB), START_SERVICE))
.collect(Collectors.toList());
CompletableFuture.allOf(results.toArray(new CompletableFuture[results.size()])).join();
logger.info("id:{}-fatherIds:{},typeB:{}-丢入噪音集", id, fatherIds.toString(), typeB);
} catch (Exception e) {
logger.error("batchThrowIntoNoise-", e);
return false;
}
return true;
}
public boolean throwIntoNoise(String group, String id, String fatherId, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
String oldKey = vaildKey;
String newKey = noiseKey(group, id, typeB);
String oldStatusKey = vaildStatusKey(group, id, typeB);
String newStatusKey = noiseStatusKey(group, id, typeB);
if (vaildNoiseChange(fatherId, vaildKey, oldKey, newKey, oldStatusKey, newStatusKey)) {
logger.info("id:{}-fatherId:{},typeB:{}-丢入噪音集", id, fatherId, typeB);
}
} catch (Exception e) {
logger.error("throwIntoNoise-", e);
return false;
}
return true;
}
public boolean restoreFromNoise(String group, String id, String fatherId, TypeB typeB) {
try {
String vaildKey = vaildKey(group, id, typeB);
String oldKey = noiseKey(group, id, typeB);
String newKey = vaildKey;
String oldStatusKey = noiseStatusKey(group, id, typeB);
String newStatusKey = vaildStatusKey(group, id, typeB);
if (vaildNoiseChange(fatherId, vaildKey, oldKey, newKey, oldStatusKey, newStatusKey)) {
logger.info("id:{}-fatherId:{},typeB:{}-从噪音集移除", id, fatherId, typeB);
}
} catch (Exception e) {
logger.error("restoreFromNoise-", e);
return false;
}
return true;
}
public Map<String, Object> getFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag) {
return getFatherTitles(vaildKey(group, id, typeB), vaildStatusKey(group, id, typeB), page, size, isAsc, keyword,
isTitle, markFlag);
}
public Map<String, Object> getSonTitles(String group, String id, String fatherId, int page, int size, boolean isAsc,
String keyword, TypeB typeB) {
return getSonTitles(vaildKey(group, id, typeB), fatherId, page, size, isAsc, keyword, typeB);
}
public Map<String, Object> getNoiseFatherTitles(String group, String id, int page, int size, boolean isAsc,
String keyword, TypeB typeB, boolean isTitle, int markFlag) {
return getFatherTitles(noiseKey(group, id, typeB), noiseStatusKey(group, id, typeB), page, size, isAsc, keyword,
isTitle, markFlag);
}
public Map<String, Object> getNoiseSonTitles(String group, String id, String fatherId, int page, int size,
boolean isAsc, String keyword, TypeB typeB) {
return getSonTitles(group, id, fatherId, page, size, isAsc, keyword, typeB);
}
public void checkedThenInsert(String group, String id) {
START_SERVICE.execute(() -> {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
// AggreInfo aggreInfo = redissonUtil.getStrByData(statusKey, AggreInfo.class);
if (null == aggreInfo) {
logger.info("id:{}-尚未聚合,无法入库!", id);
// 修改标识符
aggreInfo = new AggreInfo(null, null);
addAggreeTask(group, id, aggreInfo);
return;
}
if (Boolean.TRUE.equals(aggreInfo.isInserted())) {
logger.info("id:{}-已经入库完毕!", id);
return;
}
AtomicInteger markedTotal = new AtomicInteger();
for (TypeB typeB : typeBs) {
String vaildKey = vaildKey(group, id, typeB);
Fields fields = getFields(typeB);
List<TemplateFatherVo> fatherTitles = redissonUtil.getScoredSortedList(vaildKey,
TemplateFatherVo.class);
if (fatherTitles.isEmpty()) {
continue;
}
fatherTitles.forEach(fatherVo -> {
JSONObject example = fatherVo.getExample();
List<JSONObject> sonList = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildKey,
fatherVo.getFatherId(), JSONObject.class);
sonList.add(example);
if (DataCollectionUtil.hasTag(fatherVo, fields)) {
markedTotal.addAndGet(dubboHandler.dataCollectionUpsert(sonList, group,
example.getString(fields.mtag), example.getString(fields.mperson)));
}
if (StringUtils.isEmpty(example.getString(fields.mperson))) {
logger.info("id:{}-父标题:{}缺失mperson", id, fatherVo.getTitle());
}
});
}
// 修改标识符
aggreInfo.setInserted(true);
addAggreeTask(group, id, aggreInfo);
logger.info("id:{}-入库完毕,共入标注库{}条", id, markedTotal.get());
} catch (Exception e) {
logger.error("id:{}-入库异常", id, e);
}
});
}
public int getAggreResultNow(String group, String id) {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo) {
return -1;
}
if (null == aggreInfo.isAggreFinshed()) {
logger.info("id:{},聚合结果返回错误状态-2,info:{}", id, JSON.toJSONString(aggreInfo));
return -2;
}
if (!aggreInfo.isAggreFinshed()) {
return 0;
} else {
return 1;
}
} catch (Exception e) {
logger.error("id:{},getAggreResultNow-", id, e);
return -2;
}
}
public int getInsertResultNow(String group, String id) {
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo) {
return -1;
}
if (null == aggreInfo.isInserted()) {
logger.info("id:{},入库结果返回错误状态-2,info:{}", id, JSON.toJSONString(aggreInfo));
return -2;
}
if (!aggreInfo.isInserted()) {
return 0;
} else {
return 1;
}
} catch (Exception e) {
logger.error("id:{},getInsertResultNow-", id, e);
return -2;
}
}
private TemplateFatherVo getTemplateFatherVo(String key, String fatherId) {
return redissonUtil.getListMapKeySetByScore(key, fatherId, TemplateFatherVo.class);
}
private JSONObject getStatus(String statuskey) {
return redissonUtil.getStrByData(statuskey, JSONObject.class);
}
@SuppressWarnings("unchecked")
private TemplateFatherVo getNewInstance(JSONObject json, Fields fields, Integer size, String fatherId) {
TemplateFatherVo fatherVo = new TemplateFatherVo(json.getString(fields.title), json.getString(fields.content));
fatherVo.setExample(json);
// 设置fatherId
fatherVo.setFatherId(fatherId);
redissonUtil.generateScore(fatherVo, size);
fatherVo.setTotalSon(0);
fatherVo.setHitWordAndRate((List<Map<String, Integer>>) json.remove(AggreeHandler.HITWORD_RATE));
return fatherVo;
}
private boolean vaildNoiseChange(String fatherId, String vaildkey, String oldkey, String newKey,
String oldStatusKey, String newStatusKey) throws Exception {
List<JSONObject> vaildList = redissonUtil.getListMapValueByFieldWithGunZipByData(vaildkey, fatherId,
JSONObject.class);
if (vaildList.isEmpty()) {
logger.info("噪音集移动-搜索数据为空,fatherId:{},key:{}", fatherId, vaildkey);
return false;
}
// 更新对应数据集
redissonUtil.transferListMapKeySetFromOld2New(oldkey, newKey, fatherId);
// 更新信息
statusReduce(oldStatusKey, vaildList.size());
statusIncrease(newStatusKey, vaildList.size());
return true;
}
private Map<String, Object> getFatherTitles(String key, String statusKey, int page, int size, boolean isAsc,
String keyword, boolean isTitle, int markFlag) {
Map<String, Object> res = new HashMap<>(3);
res.put("status", false);
if (page <= 0 || size <= 0) {
return res;
}
try {
int total = redissonUtil.getListMapKeySetSize(key);
int fromIndex = (page - 1) * size;
int toIndex = page * size - 1;
List<TemplateFatherVo> preList = null;
if (StringUtils.isEmpty(keyword) && markFlag == 0) {
preList = redissonUtil.getListMapKeySet(key, fromIndex, toIndex, TemplateFatherVo.class);
} else {
List<TemplateFatherVo> list = DataCollectionUtil.fuzzyMatch(redissonUtil.getScoredSortedList(key, TemplateFatherVo.class),
keyword, isTitle, markFlag);
total = list.size();
preList = DataCollectionUtil.getList(list, page, size);
}
res.put("totalSize", total);
res.put("status", true);
res.put("data", Tools.bean2JSON(preList));
res.put("statistics", redissonUtil.getStrByData(statusKey, JSONObject.class));
} catch (Exception e) {
logger.error("getFatherTitles-", e);
}
return res;
}
private Map<String, Object> getSonTitles(String key, String fieldKey, int page, int size, boolean isAsc,
String keyword, TypeB typeB) {
Map<String, Object> res = new HashMap<>(3);
res.put("status", false);
if (page <= 0 || size <= 0) {
return res;
}
try {
int total = redissonUtil.getListMapValueByFieldSize(key, fieldKey);
// 移除作为模板的第一条数据
int fromIndex = (page - 1) * size + 1;
int toIndex = page * size;
List<JSONObject> data = redissonUtil.getRangeListMapValueByFieldWithGunZip(key, fieldKey, fromIndex,
toIndex, JSONObject.class);
TemplateFatherVo fatherVo = getTemplateFatherVo(key, fieldKey);
if(null!=fatherVo){
String mtag = fatherVo.getExample().getString(getFields(typeB).mtag);
String mperson = fatherVo.getExample().getString(getFields(typeB).mperson);
String mgroup = fatherVo.getExample().getString(getFields(typeB).mgroup);
res.put("mtag", mtag);
res.put("mperson", mperson);
DataCollectionUtil.supplementForInsert(data, mgroup, mtag, mperson);
}
res.put("totalSize", total > 0 ? total - 1 : 0);
res.put("status", true);
res.put("data", data);
} catch (Exception e) {
logger.error("getSonTitles-", e);
}
return res;
}
private String vaildKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), VAILD);
}
private String vaildStatusKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), VAILD) + "|" + STATUS;
}
private String noiseKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), NOISE);
}
private String noiseStatusKey(String group, String id, TypeB typeB) {
return assembleKey(getKeyPrefix(), group, id, typeB.name(), NOISE) + "|" + STATUS;
}
private static Map<TypeB, List<JSONObject>> divid(List<JSONObject> sourceList) {
Map<TypeB, List<JSONObject>> res = new HashMap<>(4);
sourceList.forEach(json -> {
TypeB typeB = Tools.getTypeB(json);
res.putIfAbsent(typeB, new ArrayList<>());
res.get(typeB).add(json);
});
return res;
}
private void statusReduce(String statusKey, int listSize) {
JSONObject json = redissonUtil.getStrByData(statusKey, JSONObject.class);
json = json != null ? json : new JSONObject();
json.put(AggreeHandler.TITLE_FATHER_COUNT, json.getIntValue(AggreeHandler.TITLE_FATHER_COUNT) - 1);
json.put(AggreeHandler.TOTAL_COUNT, json.getIntValue(AggreeHandler.TOTAL_COUNT) - listSize);
redissonUtil.setStrByData(statusKey, json);
}
private void statusIncrease(String statusKey, int listSize) {
JSONObject json = redissonUtil.getStrByData(statusKey, JSONObject.class);
statusIncrease(statusKey, json, listSize);
}
private void statusIncrease(String statusKey, JSONObject json, int listSize) {
json = json != null ? json : new JSONObject();
json.put(AggreeHandler.TITLE_FATHER_COUNT, json.getIntValue(AggreeHandler.TITLE_FATHER_COUNT) + 1);
json.put(AggreeHandler.TOTAL_COUNT, json.getIntValue(AggreeHandler.TOTAL_COUNT) + listSize);
redissonUtil.setStrByData(statusKey, json);
}
class AggreeHandler {
public static final String HITWORD_RATE = "hitWordAndRate";
public static final String TOTAL_COUNT = "totalCount";
public static final String TITLE_FATHER_COUNT = "titleFatherCount";
public static final String MARK_COUNT = "markCount";
private final TypeB typeB;
private final Fields fields;
private final String id;
private final String group;
private final List<List<String>> highWordList;
private List<JSONObject> data;
public AggreeHandler(List<JSONObject> data, String id, String group, String highWords) {
if (null == data || data.isEmpty()) {
throw new IllegalArgumentException("data can not be null or empty!");
}
this.data = data;
this.id = id;
this.group = group;
this.highWordList = cutKeyword(highWords);
this.typeB = Tools.getTypeB(data.get(0));
this.fields = getFields(typeB);
}
/**
* 根据噪音规则列表剔除数据
*/
public int reduceByNoise() {
if (TypeB.INCOMPLETE == typeB) {
logger.info("id:{}-【INCOMPLETE】-不做去噪处理,剩余聚合数据{}条", id, data.size());
return 0;
}
int originSize = data.size();
data = reduceByNoiseTitles(
redissonUtil.getListMapNoiseRule(noiseKey(group, id, typeB), TemplateFatherVo.class), data,
SIMILAR_STANDARD, noiseKey(group, id, typeB), redissonUtil, fields);
int noiseCount = originSize - data.size();
logger.info("id:{}-【{}】-根据噪音规则移除源数据{}条,剩余聚合数据{}条", id, typeB.name(), noiseCount, data.size());
return noiseCount;
}
/**
* 对sourceList针对现有自动标注聚合集作聚合及自动标注
*/
public int autoMarkByCurrentCollection(Set<TemplateFatherVo> set) {
if (TypeB.INCOMPLETE == typeB || TypeB.QA == typeB) {
logger.info("id:{}-【{}】-不做自动标注", id, typeB);
return 0;
}
int autoMarkedCount = autoService.noiseAutoMark(set, group, fields.title);
logger.info("id:{}-【{}】-根据现有自动标注聚合集命中{}条", id, typeB.name(), autoMarkedCount);
return autoMarkedCount;
}
/**
* 短文本聚合
*
* @param res
* @param fatherId
*/
private void incompleteAggree(Map<TemplateFatherVo, List<JSONObject>> res, AtomicInteger fatherId) {
List<String> contents = data.stream().map(json -> json.getString(fields.content))
.collect(Collectors.toList());
Map<String, List<Integer>> result = incompleteTextAggree(contents);
result.forEach((clusterName, indexs) -> {
TemplateFatherVo fatherVo = new TemplateFatherVo(null, clusterName);
List<JSONObject> tempList = new ArrayList<>();
boolean isFirst = true;
for (Integer i : indexs) {
JSONObject json = data.get(i);
// 默认原创
fatherVo.setForward(null != json.getBoolean("is_forward") && json.getBoolean("is_forward"));
if (isFirst) {
json.put(AggreeHandler.HITWORD_RATE, fatherVoInit(fatherVo, json, fatherId));
isFirst = false;
} else {
// 只做词频匹配
json.put(AggreeHandler.HITWORD_RATE, getHitWordAndRate(json));
}
tempList.add(json);
}
res.put(fatherVo, tempList);
});
}
private Map<String, List<Integer>> incompleteTextAggree(List<String> list) {
if (null == list || list.isEmpty()) {
return Collections.emptyMap();
}
Map<String, List<Integer>> res = new HashMap<>();
for (int i = 0; i < list.size(); i++) {
final int j = i;
String text = list.get(j);
res.compute(text, (k, v) -> {
if (null == v) {
v = new ArrayList<>();
}
v.add(j);
return v;
});
}
return res;
}
private List<JSONObject> reduceByNoiseTitles(List<TemplateFatherVo> noiseTitles, List<JSONObject> sourceList,
double cosFreq, String noiseKey, RedissonUtil redissonUtil, Fields fields) {
// 噪音规则列表未生成
if (null == noiseTitles || noiseTitles.isEmpty()) {
return sourceList;
}
List<JSONObject> vaildList = new ArrayList<>();
Map<String, List<JSONObject>> newNoiseMap = new HashMap<>();
for (JSONObject json : sourceList) {
String title = Tools.filterSymbol(json.getString(fields.title));
String hitTitle = null;
for (TemplateFatherVo fatherVo : noiseTitles) {
String noiseTilte = fatherVo.getTitle();
// 超过相似度标准判为噪音,不添加
if (null != noiseTilte && CosineSimilarity.calculateTextSimWithBrand(title,
Tools.filterSymbol(noiseTilte)) >= cosFreq) {
hitTitle = noiseTilte;
break;
}
}
// 非噪音数据添加
if (null == hitTitle) {
vaildList.add(json);
} else {
// 纳入噪音集
newNoiseMap.putIfAbsent(hitTitle, new ArrayList<>());
newNoiseMap.get(hitTitle).add(json);
}
}
// 更新噪音集
redissonUtil.addListMapWithGzip(noiseKey, newNoiseMap);
return vaildList;
}
/**
* 长文本聚合
*
* @param res
* @param fatherId
*/
private void completeAggree(Map<TemplateFatherVo, List<JSONObject>> res, AtomicInteger fatherId) {
List<String> titles = data.stream().map(json -> Tools.filterSymbol(json.getString(fields.title)))
.collect(Collectors.toList());
List<KResult<Integer>> agreeResult = AggreeBootStarter.getKResult(titles, 0.1);
for (KResult<Integer> result : agreeResult) {
TemplateFatherVo fatherVo = new TemplateFatherVo(result.getClusterName());
List<JSONObject> tempList = new ArrayList<>();
boolean isFirst = true;
for (Integer i : result.getDataPoints()) {
JSONObject json = data.get(i);
if (isFirst) {
json.put(AggreeHandler.HITWORD_RATE, fatherVoInit(fatherVo, json, fatherId));
isFirst = false;
} else {
// 只做词频匹配
json.put(AggreeHandler.HITWORD_RATE, getHitWordAndRate(json));
}
tempList.add(json);
}
res.put(fatherVo, tempList);
}
}
/**
* 聚合
*/
public Map<TemplateFatherVo, List<JSONObject>> aggree() {
Map<TemplateFatherVo, List<JSONObject>> res = new HashMap<>(data.size());
long s1 = System.currentTimeMillis();
AtomicInteger fatherId = new AtomicInteger(1);
// 无标题数据
if (TypeB.INCOMPLETE == typeB) {
incompleteAggree(res, fatherId);
} else {
// 有标题数据
completeAggree(res, fatherId);
}
long s2 = System.currentTimeMillis();
sortByTimeAndStatistics(res);
logger.info("id:{}-【{}】-聚合前{}条,聚合组后{}条,耗时:{}ms", id, typeB.name(), data.size(), res.size(), s2 - s1);
return res;
}
/**
* 信息部分缓存
*/
public void statisticsCache(int markCount, int titleFatherCount) {
JSONObject json = new JSONObject(3);
json.put(TOTAL_COUNT, data.size());
json.put(MARK_COUNT, markCount);
json.put(TITLE_FATHER_COUNT, titleFatherCount);
redissonUtil.setStrByData(vaildStatusKey(group, id, typeB), json);
}
/**
* 校验已存在标签
*
* @param aggreMap
*/
public void checkHasMarked(Map<TemplateFatherVo, List<JSONObject>> aggreMap) {
long s = System.currentTimeMillis();
int exists = 0;
for (Map.Entry<TemplateFatherVo, List<JSONObject>> entry : aggreMap.entrySet()) {
String[] mInfos = checkHasMarked(entry.getValue());
if (null != mInfos) {
entry.getKey().getExample().put(fields.mtag, mInfos[0]);
entry.getKey().getExample().put(fields.mperson, mInfos[1]);
exists++;
}
}
logger.info("id:{}-【{}】-校验已存在标注标签{}条,耗时:{}ms", id, typeB.name(), exists, (System.currentTimeMillis() - s));
}
public String[] checkHasMarked(List<JSONObject> list) {
String mtag = null;
String mperson = null;
final int cutLimit = 100;
List<FilterInfo> filterList = DataCollectionUtil.changeJSONList2FilterInfoList(list, group, typeB);
int page = (filterList.size() + cutLimit - 1) / cutLimit;
int start;
int end;
// 分段调用接口
for (int i = 0; i < page; i++) {
start = i * cutLimit;
end = start + cutLimit > filterList.size() ? filterList.size() : start + cutLimit;
Map<String, QueryResult> mutiMap = dubboHandler.matchQueryResult(filterList.subList(start, end));
if (null == mutiMap) {
logger.info("匹配标签异常,group:{},id:{}", group, id);
continue;
}
for (QueryResult qResult : mutiMap.values()) {
// 发现有匹配的标签即退出
if (null != qResult.getMtag()) {
mtag = qResult.getMtag();
mperson = qResult.getMperson();
logger.info("命中匹配标签,group:{},QueryResult:{}", group, JSON.toJSONString(qResult));
break;
}
}
// 已有匹配结果,退出
if (null != mtag && !"".equals(mtag)) {
return new String[] { mtag, mperson };
}
}
return null;
}
private void sortByTimeAndStatistics(Map<TemplateFatherVo, List<JSONObject>> aggreMap) {
aggreMap.forEach((fatherVo, list) -> {
// 发文时间升序
list.sort((o1, o2) -> Long.compare(o1.getLong("time"), o2.getLong("time")));
// 添加sonId
for (int i = 0; i < list.size(); i++) {
JSONObject json = list.get(i);
json.put(SON_ID, i);
}
});
// 设置模板example
aggreMap.forEach((fatherVo, list) -> {
JSONObject example = list.get(0);
fatherVo.setContent(example.getString(fields.content));
fatherVo.setExample(example);
});
}
private List<Map<String, Integer>> fatherVoInit(TemplateFatherVo fatherVo, JSONObject json, AtomicInteger fatherId) {
List<Map<String, Integer>> hitWordAndRate = getHitWordAndRate(json);
fatherVo.setFatherId(String.valueOf(fatherId.getAndIncrement()));
fatherVo.setHitWordAndRate(hitWordAndRate);
// 返回一个新的list 防止循环引用
return Tools.deepCopyByJson(hitWordAndRate, List.class);
}
// 词频匹配
@SuppressWarnings("unchecked")
private List<Map<String, Integer>> getHitWordAndRate(JSONObject json) {
try {
if (json.containsKey(AggreeHandler.HITWORD_RATE)) {
return (List<Map<String, Integer>>) json.remove(AggreeHandler.HITWORD_RATE);
}
} catch (ClassCastException e) {
}
return highWordMatch(json.getString(fields.title) + json.getString(fields.content));
}
private List<Map<String, Integer>> highWordMatch(String text) {
List<Map<String, Integer>> res = new ArrayList<>();
for (List<String> ands : highWordList) {
int rate = 0;
for (String and : ands) {
int oneRate = DataCollectionUtil.calculateRate(and, text);
// 该词未被命中,清除词频记录并返回
if (0 == oneRate) {
rate = 0;
break;
}
// 有单个词被命中且取最低值
if (0 == rate || oneRate < rate) {
rate = oneRate;
}
}
// 该组合词被命中
if (rate > 0) {
Map<String, Integer> map = new HashMap<>(1);
map.put(String.join(" ", ands), rate);
res.add(map);
}
}
res.sort((x, y) -> Integer.compare(y.values().toArray(new Integer[0])[0],
x.values().toArray(new Integer[0])[0]));
return res;
}
}
}
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.AggreInfo;
import com.zhiwei.middleware.automatic.server.pojo.enums.AggreeTaskType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.AutoService;
import com.zhiwei.middleware.automatic.server.service.handler.BaseTaskHandler;
import com.zhiwei.middleware.automatic.server.service.handler.KafkaSendHandler;
import com.zhiwei.middleware.automatic.server.util.EventCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
@Service
public class EventCollectionMark extends BaseTaskHandler {
private static final Logger logger = LogManager.getLogger(EventCollectionMark.class);
private static final String AGGREGATION_SUFFIX = "aggregation";
private static final String NOISE_SUFFIX = "noise";
private final ThreadPoolTaskExecutor eventAggreeEasyExecutor;
private final ThreadPoolTaskExecutor eventAggreeExecutor;
private final RedissonUtil redissonUtil;
private final KafkaSendHandler kafkaSendHandler;
private final DubboHandler dubboHandler;
private final AutoService autoService;
public EventCollectionMark(@Qualifier("eventAggreeEasyExecutor") ThreadPoolTaskExecutor eventAggreeEasyExecutor,
@Qualifier("eventAggreeExecutor") ThreadPoolTaskExecutor eventAggreeExecutor,
RedissonUtil redissonUtil, KafkaSendHandler kafkaSendHandler,
DubboHandler dubboHandler, AutoService autoService) {
super(redissonUtil, AggreeTaskType.EVENT);
this.eventAggreeEasyExecutor = eventAggreeEasyExecutor;
this.eventAggreeExecutor = eventAggreeExecutor;
this.redissonUtil = redissonUtil;
this.kafkaSendHandler = kafkaSendHandler;
this.dubboHandler = dubboHandler;
this.autoService = autoService;
}
/**
* 增加注聚合源数据集
*
* @param sourceStr
* @return
*/
public void addEventCollectionAggreeSourceList(String group, String id, String sourceStr) {
try {
String index = group + "-" + id;
// 解压
List<String> cachedMarkInfos = JSONObject.parseArray(Tools.gunzip(sourceStr), String.class);
List<MarkInfo> sourceList = Tools.getMarkInfos(cachedMarkInfos);
int originSize = sourceList.size();
// 过滤掉不规范的类型数据以及标题为空的数据
sourceList = MarkInfoUtil.filterTitleNon(sourceList);
// 补充必要字段
EventCollectionUtil.supplementForMarkInfoList(sourceList, group, "自动化机器人");
int newSize = sourceList.size();
if (originSize != newSize) {
logger.warn("原添加数据集{}条,移除标题为空的数据后{}条", originSize, newSize);
}
addDataSource(group, id, sourceList.stream().map(JSONObject::toJSONString).collect(Collectors.toList()));
logger.info("{}添加聚合集{}条", index, sourceList.size());
} catch (Exception e) {
logger.error("addEventCollectionAggreSourceList", e);
}
}
/**
* 获取事件采集聚合模板标题临时结果
*
* @return
*/
public Map<String, Object> getEventCollectionAggreeTemplate(String group, String id, int page, int size,
boolean isAsc, int markFlag, String keyword) {
try {
Map<String, Object> returnMap = new HashMap<>();
List<JSONObject> dataList = new ArrayList<>();
AggreInfo aggreInfo = getAggreeTask(group, id);
if (null == aggreInfo || !aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
returnMap.put("status", false);
returnMap.put("data", null);
returnMap.put("totalSize", 0);
return returnMap;
} else {
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
// 模糊匹配
Map<String, List<JSONObject>> markAggreeMap = EventCollectionUtil.fuzzyMatch(aggreeMap, keyword);
if (null != markAggreeMap) {
Comparator<Object> comparator = null;
if (isAsc) {
comparator = Comparator.comparing(key -> markAggreeMap.get(key).size());
} else {
comparator = Comparator.comparing(key -> markAggreeMap.get(key).size()).reversed();
}
List<String> sortList = new ArrayList<>(markAggreeMap.keySet());
sortList.sort(comparator);
// 组装dataMap
for (String key : sortList) {
JSONObject titleFather = new JSONObject();
List<JSONObject> list = markAggreeMap.get(key);
List<MarkInfo> resList = MarkInfoUtil.transformToMarkInfo(list);
String mtag = list.get(0).getString("mtag");
titleFather.put("title", key);
titleFather.put("mtag", mtag);
titleFather.put("sonList", resList);
// 其它值不筛选标注
if (-1 == markFlag) {
dataList.add(titleFather);
}
// 标注成功
if (1 == markFlag && null != mtag && !"".equals(mtag)) {
dataList.add(titleFather);
}
// 未标注
if (0 == markFlag && (null == mtag || "".equals(mtag))) {
dataList.add(titleFather);
}
}
if (!dataList.isEmpty()) {
// 正常数据返回
int totalSize = dataList.size();
dataList = Tools.listPagedQuery(dataList, page, size);
returnMap.put("status", true);
returnMap.put("data", Tools.gzip(JSONObject.toJSONString(dataList)));
returnMap.put("totalSize", totalSize);
returnMap.put("detail", aggreInfo.getPrintString());
return returnMap;
}
}
// 聚合集为空/根据关键词筛选条件后为空
returnMap.put("status", true);
returnMap.put("data", Tools.gzip(JSONObject.toJSONString(dataList)));
returnMap.put("totalSize", 0);
return returnMap;
}
} catch (Exception e) {
logger.error("{}初次获取事件采集聚合模板标题结果失败", group);
logger.error("getEventCollectionAggreTemplate", e);
return null;
}
}
/**
* 根据模板标题获取父标题标注信息markTag
*
* @param group 标注分组
* @param id 事件id
* @param templateTitle 模板标题
* @return String 标注标签
*/
public String getEventCollectionMarkTagByTemplate(String group, String id, String templateTitle) {
try {
String mtag = getAggreeMapByField(group, id, templateTitle).get(0).getString("mtag");
return mtag == null ? "" : mtag;
} catch (Exception e) {
logger.error("getEventCollectionMarkTagByTemplate", e);
return null;
}
}
/**
* 根据模板标题获取其子集标注聚合临时结果
*
* @return Map<String, Object>
*/
public Map<String, Object> getEventCollectionAggreeSubTitle(String group, String id, String templateTitle) {
Map<String, Object> resMap = new HashMap<>();
String error = "isEventCollectionAggreeOkMap";
try {
AggreInfo aggreInfo = getAggreeTask(group, id);
if (!aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
resMap.put("status", false);
} else {
resMap.put("status", true);
}
error = "eventCollectionAggreeMap";
resMap.put("data", MarkInfoUtil.transformToMarkInfo(getAggreeMapByField(group, id, templateTitle)));
return resMap;
} catch (Exception e) {
logger.error("{}根据模板标题获取其子集标注聚合结果{}", group, error);
return null;
}
}
/**
* 修改事件采集聚合模板标题的标签
*
* @param group
* @param id
* @param modifyTag
* @return
*/
public boolean modifyEventCollectionAggreeTitleMarkTag(String group, String id, String templateTitle,
String modifyTag) {
try {
Map<String, List<JSONObject>> markAggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
if (markAggreeMap != null) {
List<JSONObject> markList = markAggreeMap.get(templateTitle);
String oldTag = null;
if (markList != null) {
oldTag = markList.get(0).getString("mtag");
for (JSONObject obj : markList) {
obj.put("mtag", modifyTag);
// 补齐标注时间及标注人
if (null == obj.get("mtime")) {
obj.put("mperson", "自动化机器人");
obj.put("mtime", new Date().getTime());
}
}
upsetAggreeResult(group, id, markAggreeMap, null, null);
logger.info("修改模板标题:{}-tag成功,oldTag:{} modifyTag:{}", templateTitle, oldTag, modifyTag);
return true;
}
}
return false;
} catch (Exception e) {
logger.error("modifyEventCollectionAggreTitleMarkTag", e);
return false;
}
}
/**
* 启动聚合
*/
public synchronized void startAggree(String group, String id) {
try {
String index = group + "-" + id;
AggreInfo tempVo = getAggreeTask(group, id);
AggreInfo aggreInfo = null != tempVo ? tempVo : new AggreInfo(false, false);
if (!aggreInfo.isAggreFinshed()) {
// 分组自动化标注
logger.info("启动事件采集聚合 id:{}", id);
eventAggreeExecutor.execute(() -> {
List<MarkInfo> originSourceList = getDataSource(group, id)
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(MarkInfo.class)).collect(Collectors.toList());
// 1.所有数据入清洗kafka
kafkaSendHandler.insertDataByMarkInfo(originSourceList);
// 2.根据噪音集减少源数据量
List<MarkInfo> sourceList = EventCollectionUtil.reduceByNoise(getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)),
originSourceList, GenericAttribute.SIMILAR_STANDARD);
int noiseCount = originSourceList.size() - sourceList.size();
logger.info("index:{} 根据噪音集移除源数据{}条", index, noiseCount);
// 3.对sourceList针对现有自动标注聚合集作聚合及自动标注
autoService.autMarkByEvent(group,
sourceList);
// 4.根据sourceList的titles作聚合计算
List<String> sourceTitles = sourceList.stream()
.map(info -> Tools.filterSymbol(info.getSourceObj().getString("title")))
.collect(Collectors.toList());
List<KResult<Integer>> kResult = AggreeBootStarter.getKResult(sourceTitles, 0.1);
// 5.还原数据实体类
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
int autoMaticMarkCount = EventCollectionUtil.restoreForAggreeTitleMap(kResult, aggreeMap,
sourceList);
aggreInfo.setAll(true, originSourceList.size(), noiseCount, aggreeMap.keySet().size(),
autoMaticMarkCount);
upsetAggreeResult(group, id, aggreeMap, aggreInfo, null);
logger.info("事件采集聚合完成 index:{}", index);
});
}
} catch (Exception e) {
logger.error("startAggree异常", e);
}
}
/**
* 已标注事件采集标注部分入库
*
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum) {
return eventCollectionMarkedInsert(group, id, markSum, "自动化机器人");
}
/**
* 已标注事件采集标注部分入库
*
*/
public boolean eventCollectionMarkedInsert(String group, String id, int markSum, String mperson) {
AggreInfo aggreInfo = getAggreeTask(group, id);
try {
if (!aggreInfo.isAggreFinshed()) {
logger.info("正在聚合请等待....");
return false;
}
eventAggreeEasyExecutor.execute(() -> {
Map<String, List<JSONObject>> aggreeMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
Map<String, List<JSONObject>> noiseMap = getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id));
Set<Map.Entry<String, List<JSONObject>>> entrySet = aggreeMap.entrySet();
for (Map.Entry<String, List<JSONObject>> entry : entrySet) {
// 筛选出已标注的数据
String title = entry.getKey();
List<JSONObject> consumers = entry.getValue();
if (EventCollectionUtil.hasTag(entry.getValue())) {
dubboHandler.eventCollectionUpsertWithSupplement(consumers, group, mperson);
logger.info("titile:{}已经入库{}条", title, consumers.size());
} else {
// 添加噪音空集
if (null == noiseMap) {
noiseMap = new HashMap<>();
}
// 跳过热度不达标的数据
if (consumers.size() < markSum) {
continue;
}
// 新标题集
if (!noiseMap.containsKey(title)) {
noiseMap.put(title, consumers);
logger.info("title:{}新建噪音集{}条", title, consumers.size());
} else {
// 更新重复标题噪音集
List<JSONObject> originList = noiseMap.get(title);
List<JSONObject> newList = EventCollectionUtil.mergeNoiseList(originList, consumers);
noiseMap.put(title, newList);
logger.info("title:{}新增噪音{}条,并入噪音集{}条", title, consumers.size(),
newList.size() - originList.size());
}
}
// 已完成入库操作,修改标识符
aggreInfo.setInserted(true);
upsetAggreeResult(group, id, null, aggreInfo, noiseMap);
}
});
logger.info("id:{}已经入库完毕", id);
return true;
} catch (Exception e) {
logger.error("eventCollectionMarkedInsert", e);
return false;
}
}
/**
* 清理全部结果(聚合集+噪音集)
*/
public void cleanEventCollectionAllData(String group, String id) {
cleanEventCollectionAggreeData(group, id);
redissonUtil.deleteBucket(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id));
logger.info("删除事件采集全部结果集,id:{}", id);
}
/**
* 清理事件采集聚合结果集
*/
public void cleanEventCollectionAggreeData(String group, String id) {
redissonUtil.deleteBucket(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id));
removerInfo(group, id);
logger.info("删除事件采集聚合结果集,id:{}", id);
}
/**
* 获取事件采集噪音父标题集
*
* @return
*/
public Map<String, Object> getEventCollectionNoiseTitles(String group, String id, int page, int size, boolean isAsc,
String keyword) {
Map<String, Object> resMap = new HashMap<>();
List<JSONObject> noiseList = new ArrayList<>();
// 模糊匹配
Map<String, List<JSONObject>> noiseMap = EventCollectionUtil.fuzzyMatch(getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)), keyword);
if (null == noiseMap) {
resMap.put("noiseList", null);
resMap.put("status", false);
resMap.put("totalSize", 0);
} else {
List<String> noiseTitleList = new ArrayList<>(noiseMap.keySet());
Comparator<Object> comparator = Comparator.comparing(key -> noiseMap.get(key).size());
if (!isAsc) {
comparator = comparator.reversed();
}
noiseTitleList.sort(comparator);
// 组装noiseList
for (String title : noiseTitleList) {
JSONObject titleFather = new JSONObject();
List<JSONObject> list = noiseMap.get(title);
List<MarkInfo> resList = MarkInfoUtil.transformToMarkInfo(list);
String mtag = list.get(0).getString("mtag");
titleFather.put("title", title);
titleFather.put("mtag", mtag);
titleFather.put("sonList", resList);
noiseList.add(titleFather);
}
int total = noiseList.size();
// int pageCount = (int) (total + page - 1) / page;
noiseList = Tools.listPagedQuery(noiseList, page, size);
resMap.put("noiseList", noiseList);
resMap.put("status", true);
resMap.put("totalSize", total);
}
return resMap;
}
/**
* 获取事件采集噪音子集
*
* @return
*/
public Map<String, Object> getEventCollectionNoiseSubTitle(String group, String id, String templateTitle) {
Map<String, Object> resMap = new HashMap<>();
try {
resMap.put("data", getAggreeMap(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id)).get(templateTitle));
return resMap;
} catch (Exception e) {
logger.error("{}根据模板标题获取其子集标注聚合结果", group, e);
return null;
}
}
/**
*
* 标注部分是否已入库
*
* @param group
* @param id
* @return
*
* @return boolean
*/
public boolean markedHasInserted(String group, String id) {
try {
return getAggreeTask(group, id).isInserted();
} catch (Exception e) {
return false;
}
}
/**
*
* 同步redis缓存集
*
* @param group
* @param id
* @param aggreMap
* @param noiseMap
*
* @return void
*/
private void upsetAggreeResult(String group, String id, Map<String, List<JSONObject>> aggreMap, AggreInfo aggreInfo,
Map<String, List<JSONObject>> noiseMap) {
// 需要组装K值和转换V值
synchronized (EventCollectionMark.class) {
// 同步聚合结果集
if (null != aggreMap) {
redissonUtil.setMapValue(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id), Tools.redisHmFormatWithGzip(aggreMap));
}
// 同步聚合状态集
if (null != aggreInfo) {
addAggreeTask(group, id, aggreInfo);
}
// 同步噪音集
if (null != noiseMap) {
redissonUtil.setMapValue(Tools.assembleKey(getKeyPrefix(), NOISE_SUFFIX, group, id), Tools.redisHmFormatWithGzip(noiseMap));
}
}
}
private List<JSONObject> getAggreeMapByField(String group, String id, String mapKey) {
return JSONObject.parseArray(
Tools.gunzip(redissonUtil.getMapKeyValue(Tools.assembleKey(getKeyPrefix(), AGGREGATION_SUFFIX, group, id), mapKey)),
JSONObject.class);
}
/**
* 获取聚合结果集
*
*/
private Map<String, List<JSONObject>> getAggreeMap(String key) {
Map<String, String> map = redissonUtil.getMapValue(key);
if (null == map) {
return null;
}
return EventCollectionUtil.parseFromRedisHmStrWithGunZip(map);
}
}
package com.zhiwei.middleware.automatic.server.service.impl; package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject; import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.es.index.Index;
import com.zhiwei.es.util.IndexUtil;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dao.EsDao;
import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao; import com.zhiwei.middleware.automatic.server.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler; import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord; import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus; import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil; import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.server.util.CosineSimilarity; import com.zhiwei.middleware.automatic.server.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.server.util.Tools; import com.zhiwei.middleware.automatic.server.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query; import org.springframework.data.mongodb.core.query.Query;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@Service @Service
public class TemplateTitleServiceImpl implements TemplateTitleService { public class TemplateTitleServiceImpl implements TemplateTitleService {
private final Logger log = LogManager.getLogger(TemplateTitleServiceImpl.class); private static final Logger log = LogManager.getLogger(TemplateTitleServiceImpl.class);
private final RedissonUtil redissonUtil;
private final EsDao esDao; private final static String COUNT_KEY = "count";
private final IndexUtil.ESIndexes esIndexes; private final RedissonUtil redissonUtil;
private final TemplateRecordDao templateRecordDao; private final TemplateRecordDao templateRecordDao;
private final DubboHandler dubboHandler; public TemplateTitleServiceImpl(RedissonUtil redissonUtil, TemplateRecordDao templateRecordDao) {
private final ThreadPoolTaskExecutor executor;
/* 一天的秒数(为保留前一天文件) */
private static final int ONE_DAY = 60 * 60 * 24;
public TemplateTitleServiceImpl(RedissonUtil redissonUtil, EsDao esDao,
IndexUtil.ESIndexes esIndexes, TemplateRecordDao templateRecordDao,
DubboHandler dubboHandler,
@Qualifier("asyncExecutor") ThreadPoolTaskExecutor executor) {
this.redissonUtil = redissonUtil; this.redissonUtil = redissonUtil;
this.esDao = esDao;
this.esIndexes = esIndexes;
this.templateRecordDao = templateRecordDao; this.templateRecordDao = templateRecordDao;
this.dubboHandler = dubboHandler;
this.executor = executor;
}
@Override
public void schedulerHourAggregation(List<String> groups, Long startTime, Long endTime) {
groups.forEach(group -> {
try {
//源数据
List<Map<String, Object>> sourceList = findRecentTimeData(group, startTime, endTime);
if (sourceList.isEmpty()) {
return;
}
log.info("发现{}组数据{}条,聚合中...", group, sourceList.size());
projectDataTemplate(group, sourceList);
} catch (Exception e) {
log.error("自动聚合模板更新失败,项目:{}", group, e);
}
});
} }
@Override @Override
public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) { public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) {
Map<String, String> mapValue = redissonUtil.getMapValue(GenericAttribute.REDIS_MAP_KEY + project); Map<String, String> mapValue = redissonUtil.getMapValue(Tools.assembleKey(GenericAttribute.REDIS_MAP_KEY, project));
if (Tools.isEmpty(mapValue)) { if (Tools.isEmpty(mapValue)) {
return new HashMap<>(); return new HashMap<>();
} }
return Tools.restoreTMap(mapValue, TemplateTitleVo.class); Map<String, TemplateTitleVo> res = Tools.restoreTMap(mapValue, TemplateTitleVo.class);
} for (Map.Entry<String, TemplateTitleVo> entry : res.entrySet()) {
setMarkCount(entry.getKey(), entry.getValue());
@Override
public void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos) {
String key = GenericAttribute.REDIS_MAP_KEY + project;
for (Map.Entry<String, TemplateTitleVo> entry : vos.entrySet()) {
redissonUtil.setMapValue(key, entry.getKey(), JSONObject.toJSONString(entry.getValue()));
} }
} return res;
@Override
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
try {
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
String key = GenericAttribute.REDIS_MAP_KEY + group;
String mapKeyValue = redissonUtil.getMapKeyValue(key, templateTitle);
if (Tools.isEmpty(mapKeyValue)) {
return false;
}
TemplateTitleVo titleVo = JSONObject.parseObject(mapKeyValue, TemplateTitleVo.class);
String oldTag = titleVo.getMtag();
titleVo.setMtag(fixTag);
redissonUtil.setMapValue(key, templateTitle, JSONObject.toJSONString(titleVo));
log.info("修改模板标签成功: group:{} templateTitle:{} oldTag:{} fixTag:{}", group, templateTitle, oldTag,
fixTag);
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
@Override
public void modifyTemplateNum(String group, String title, Long num) {
String mapKeyValue = redissonUtil.getMapKeyValue(GenericAttribute.REDIS_MAP_KEY + group, title);
if (Tools.isEmpty(mapKeyValue)) {
return;
}
TemplateTitleVo titleVo = JSONObject.parseObject(mapKeyValue, TemplateTitleVo.class);
titleVo.getMarkSum().addAndGet(num);
redissonUtil.setMapValue(mapKeyValue, title, JSONObject.toJSONString(titleVo));
} }
@Override @Override
...@@ -158,11 +64,6 @@ public class TemplateTitleServiceImpl implements TemplateTitleService { ...@@ -158,11 +64,6 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
} }
@Override @Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
templateRecordDao.insertTemplateRecord(templateRecord);
}
@Override
public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) { public String tryGetTemplateTitleByMupdate(String group, String title, String mupdate) {
Map<String, TemplateTitleVo> templateTitleVos = getTemplateTitleByProject(group); Map<String, TemplateTitleVo> templateTitleVos = getTemplateTitleByProject(group);
List<Map<String, Object>> filterTitles = new ArrayList<>(); List<Map<String, Object>> filterTitles = new ArrayList<>();
...@@ -207,18 +108,21 @@ public class TemplateTitleServiceImpl implements TemplateTitleService { ...@@ -207,18 +108,21 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
} }
TemplateTitleVo titleVo = null; TemplateTitleVo titleVo = null;
double similarity = 0.0; double similarity = 0.0;
for (TemplateTitleVo templateTitleVo : templateTitleVoMap.values()) { String group = null;
if (TemplateStatus.已重置 == templateTitleVo.getStatus()) { for (Map.Entry<String, TemplateTitleVo> entry : templateTitleVoMap.entrySet()) {
if (TemplateStatus.已重置 == entry.getValue().getStatus()) {
continue; continue;
} }
double currentSimilarity = CosineSimilarity.calculateTextSimWithBrand(templateTitleVo.getTemplateTitle(), double currentSimilarity = CosineSimilarity.calculateTextSimWithBrand(entry.getValue().getTemplateTitle(),
title); title);
if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) { if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) {
similarity = currentSimilarity; similarity = currentSimilarity;
titleVo = templateTitleVo; titleVo = entry.getValue();
group = entry.getKey();
} }
} }
if (Objects.nonNull(titleVo)) { if (Objects.nonNull(titleVo)) {
setMarkCount(group, titleVo);
res.put("isMatched", true); res.put("isMatched", true);
Map<String, Object> dataMap = new HashMap<>(); Map<String, Object> dataMap = new HashMap<>();
dataMap.put("title", titleVo.getTemplateTitle()); dataMap.put("title", titleVo.getTemplateTitle());
...@@ -232,209 +136,7 @@ public class TemplateTitleServiceImpl implements TemplateTitleService { ...@@ -232,209 +136,7 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
return res; return res;
} }
@Override private void setMarkCount(String group, TemplateTitleVo vo) {
public boolean resetTemplate(String group, String templateTitle) { vo.setMarkSum(redissonUtil.getCount(Tools.assembleKey(COUNT_KEY, group, vo.getId())));
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
Map<String, TemplateTitleVo> map = getTemplateTitleByProject(group);
if (Objects.isNull(map.get(templateTitle))) {
return false;
}
TemplateTitleVo templateTitleVo = map.get(templateTitle);
// 只要在运行中的模板 才能重置
if (Objects.isNull(templateTitleVo.getStatus()) || templateTitleVo.getStatus().name().equals(TemplateStatus.运行中.name())) {
templateTitleVo.setStatus(TemplateStatus.重置中);
setTemplateTitleByProject(group, map);
// 修改模板的标注信息
executor.execute(() -> {
try {
modifyTemplateMarkerInfo(templateTitleVo, group);
templateTitleVo.setStatus(TemplateStatus.已重置);
templateRecordDao.removeTemplateRecord(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
} catch (Exception e) {
templateTitleVo.setStatus(TemplateStatus.重置失败);
log.error("重置模板:修改聚和集错误,title:{},以加入重试队列", templateTitleVo.getTemplateTitle());
} finally {
setTemplateTitleByProject(group, map);
}
});
return true;
}
return false;
}
/**
* 模板自动标注的历史数据修改为新标签
* @param templateTitleVo 模板
* @return 是否修改成功
*/
private boolean modifyTemplateMarkerInfo (TemplateTitleVo templateTitleVo, String project) throws Exception {
long now = System.currentTimeMillis();
long count = templateRecordDao.count(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
if (count == 0) {
return true;
}
int page = (int) ((count)+ GenericAttribute.POINT_SIZE -1) / GenericAttribute.POINT_SIZE;
for (int i = 0; i < page; i ++) {
Query query = new Query(Criteria.where("templateId").is(templateTitleVo.getId()));
query.skip(i * GenericAttribute.POINT_SIZE)
.limit(GenericAttribute.POINT_SIZE);
// mongo 查询记录信息
List<TemplateRecord> templateRecord = templateRecordDao.findTemplateRecord(query);
// es 查询
SearchHits hits = findByMupdateInfos(templateRecord.stream().map(TemplateRecord::getMupdate).collect(Collectors.toList()));
// 转换成markInfo
List<MarkInfo> collect = Arrays.stream(hits.getHits())
.map(e -> MarkInfoUtil.distinguishMarkInfo(e, null))
.filter(Objects::nonNull).collect(Collectors.toList());
//发送给标注中间件处理
dubboHandler.markUpsert(collect);
}
log.info("自动标注模板:模板已改动,项目:{},模板标题:{},特征值数量:{}, 耗时:{}"
, project, templateTitleVo.getTemplateTitle(), count, System.currentTimeMillis() - now);
return true;
}
/**
* 根据特征值查询es数据
* @param mupdate 特征值
* @return es返回值
*/
private SearchHits findByMupdateInfos(List<String> mupdate) throws IOException {
List<String> mark2 = esIndexes.getMarkIndexes(Index.mark2.name());
String [] indexes = new String[mark2.size()];
mark2.toArray(indexes);
// 查询条件
BoolQueryBuilder query = QueryBuilders.boolQuery();
query.must(QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON));
BoolQueryBuilder should = QueryBuilders.boolQuery();
mupdate.forEach(e -> {
should.should(QueryBuilders.termQuery("mupdate", e));
});
query.must(should);
return esDao.search(indexes, null, query, null, 0, 1000, null);
}
/**
* 改项目的数据生成模板
* @param group 项目
* @param sourceList 数据集
*/
private void projectDataTemplate(String group, List<Map<String, Object>> sourceList) {
//聚合模板
Map<String, TemplateTitleVo> aggregation = aggregation(transferMark(sourceList));
//旧的聚合模板
Map<String, TemplateTitleVo> templateTitleByProject = getTemplateTitleByProject(group).entrySet().stream()
.filter(e -> {
String title = e.getKey();
TemplateTitleVo templateTitleVo = e.getValue();
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
long updateTime = templateTitleVo.getUpdateTime().getTime();
// 移除7天有效期外的数据
if (System.currentTimeMillis() - updateTime > ONE_DAY * 7 * 1000) {
log.info("{}-移除过期模板标题:{},最后更新时间:{}", group, title, updateTime);
return false;
} else if (e.getValue().getStatus() == TemplateStatus.已重置) {
log.info("已重置的模板从内存中删除,模板title:{}", e.getKey());
return false;
}
return true;
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// 新旧模板合并 且更新模板
setTemplateTitleByProject(group, mergeTemplate(aggregation, templateTitleByProject));
}
/**
* 新旧模板合并
* @param oldTemplate 旧模板
* @param newTemplate 新模板
*/
private Map<String, TemplateTitleVo> mergeTemplate(Map<String, TemplateTitleVo> oldTemplate, Map<String, TemplateTitleVo> newTemplate) {
for (Map.Entry<String, TemplateTitleVo> newEntry : newTemplate.entrySet()) {
List<String> templateKeys = oldTemplate.keySet().stream()
.filter(e -> CosineSimilarity.calculateTextSimWithBrand(newEntry.getKey(), e) >= 0.96)
.collect(Collectors.toList());
// 添加模板
if (templateKeys.isEmpty()) {
oldTemplate.put(newEntry.getKey(), newEntry.getValue());
} else {
// 更新标签
for (String oldKey : templateKeys) {
oldTemplate.get(oldKey).setMtag(newEntry.getValue().getMtag());
}
}
}
return oldTemplate;
}
/**
* 数据聚合成模板
* @param sourceList 数据集
* @return 模板集
*/
private Map<String, TemplateTitleVo> aggregation(List<CompleteTextMark> sourceList) {
Map<String, TemplateTitleVo> aggregationTitleTagMap = new ConcurrentHashMap<>();
List<String> titles = sourceList.stream().map(CompleteTextMark::getTitle).collect(Collectors.toList());
// 得到聚合集
List<KResult<Integer>> kResult = AggreeBootStarter.getKResult(titles, 0.1);
for (KResult<Integer> result : kResult) {
if (result.getDataPoints().size() < 3) {
continue;
}
// 标签统计
Map<String, Long> tagGroup = result.getDataPoints().stream().map(e -> sourceList.get(e).getMtag())
.collect(Collectors.groupingBy(mtag -> mtag, Collectors.counting()));
//得到数量最多的标签
String tag = tagGroup.entrySet().stream().max(Map.Entry.comparingByValue()).map(Map.Entry::getKey).get();
// 生成模板
String title = Tools.filterSymbol(result.getClusterName());
aggregationTitleTagMap.put(title, new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl()));
}
return aggregationTitleTagMap;
}
/**
* 查询该项目 指定时间范围的数据
* @param mgroup 项目
* @return 数据集
* @throws IOException io
*/
private List<Map<String, Object>> findRecentTimeData(String mgroup, Long startTime, Long endTime) throws IOException {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
// 标注时间小时级内
QueryBuilder mtimeBuilder = QueryBuilders.rangeQuery("mtime").from(startTime).to(endTime);
Calendar calendar2 = Calendar.getInstance();
calendar2.add(Calendar.DAY_OF_MONTH, -1);
// 文章时间一天内
QueryBuilder timeBuilder = QueryBuilders.rangeQuery("time").from(calendar2.getTime().getTime()).to(endTime);
QueryBuilder mgroupBuilder = QueryBuilders.matchPhraseQuery("mgroup", mgroup);
// 过滤自动化机器人标注数据
boolQueryBuilder.must(timeBuilder).must(mtimeBuilder).must(mgroupBuilder).mustNot(autoRobotQueryBuilder())
.mustNot(QueryBuilders.termQuery("c2", 25165824)).mustNot(QueryBuilders.termQuery("c2", 16777216));
sourceBuilder.query(boolQueryBuilder).size(10000)
.fetchSource(new String[] { "ind_full_text", "mtime", "mtag", "mperson", "url","id"}, null);
return esDao.afterSearch(esIndexes.getIndexes(Index.mark.name()).toArray(new String[]{}), sourceBuilder, 1000).stream().map(SearchHit::getSourceAsMap).collect(Collectors.toList());
}
/**
* 转换
* @param sourceMap 数据集
* @return 标准文本集
*/
private List<CompleteTextMark> transferMark(List<Map<String, Object>> sourceMap) {
return sourceMap.stream().map(CompleteTextMark::restoreFromEs).collect(Collectors.toList());
}
/**
* 查询条件
* @return 标注人为自动标注机器人
*/
private QueryBuilder autoRobotQueryBuilder() {
return QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON);
} }
} }
package com.zhiwei.middleware.automatic.server.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.base.BulkTemplate;
import com.zhiwei.middleware.automatic.server.base.MarkCommonTemplate;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.dubbo.handle.DubboHandler;
import com.zhiwei.middleware.automatic.server.pojo.*;
import com.zhiwei.middleware.automatic.server.pojo.enums.InsertType;
import com.zhiwei.middleware.automatic.server.redis.RedissonUtil;
import com.zhiwei.middleware.automatic.server.service.UploadService;
import com.zhiwei.middleware.automatic.server.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.*;
import java.util.stream.Collectors;
import static com.alibaba.fastjson.JSON.*;
@Service
public class UploadServiceImpl implements UploadService {
private static final Logger log = LogManager.getLogger(UploadServiceImpl.class);
/* 数据上传模块源数据集(key:group-id value:待处理数据集) */
private static final Map<String, List<MarkUploadInfo>> downLoadDataSourceMap = new ConcurrentHashMap<>();
private static final ExecutorService UPLOAD_SERVICE = Executors.newFixedThreadPool(16);
private final RedissonUtil redissonUtil;
private final MarkCommonTemplate markCommonTemplate;
private final DubboHandler dubboHandler;
public UploadServiceImpl(RedissonUtil redissonUtil, MarkCommonTemplate markCommonTemplate,
DubboHandler dubboHandler) {
this.redissonUtil = redissonUtil;
this.markCommonTemplate = markCommonTemplate;
this.dubboHandler = dubboHandler;
}
@Override
public void addUploadList(String group, String id, String sourceStr) {
try {
String index = group + "-" + id;
// 解压
List<String> sourceList = parseArray(Tools.gunzip(sourceStr), String.class);
redissonUtil.setList(Tools.assembleKey(GenericAttribute.SOURCE_DATA, index), sourceList);
} catch (Exception e) {
log.error("addUploadList", e);
}
}
@Override
public void startUpload(MarkUploadRule rule) {
UploadStatus uploadStatus = new UploadStatus();
// 修改上传状态
upsetUploadResult(rule.getGroup(), rule.getId(), uploadStatus);
UPLOAD_SERVICE.submit(() -> {
try {
// 数据转换分组
Map<String, List<MarkUploadResult>> markResGroup = uploadRes(rule);
uploadStatus.setTotalCount(markResGroup.values().stream().mapToInt(List::size).sum());
// 转换成功 开始缓存
successUploadCache(rule.getGroup(), rule.getId(), uploadStatus, markResGroup, rule.getInsertType());
log.info("上传任务项目:{},已完成", rule.getGroup());
} catch (Exception e) {
// 修改上传状态
uploadStatus.setStatus(-1);
upsetUploadResult(rule.getGroup(), rule.getId(), uploadStatus);
log.error("startUpload-error", e);
}
});
}
@Override
public Map<String, Object> getUploadStatus(String group, String id) {
return parseObject(redissonUtil.getBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX)));
}
@Override
public Map<String, Object> getUploadInfoList(String group, String id, int page, int size, boolean isAsc, String searchField, String keyword, UploadInfo.UploadType uploadType) {
String suffix = matchSuffix(uploadType);
Map<String, Object> returnMap = new HashMap<>();
returnMap.put("status", false);
returnMap.put("data", null);
returnMap.put("totalSize", 0);
try {
String redisKey = assembleKey(group, id, suffix);
// 计算start和end
int start = (page - 1) * size;
int end = page * size - 1;
List<String> list = redissonUtil.getList(redisKey, start, end);
long totalSize = redissonUtil.getListSize(redisKey);
if (null != list) {
returnMap.put("status", true);
returnMap.put("data", toJSONString(list));
returnMap.put("totalSize", totalSize);
}
} catch (Exception e) {
log.error("getUploadInfoList", e);
}
return returnMap;
}
@Override
public UploadInfo.DataType getDataType(JSONObject json, ClassB.TypeB typeB) {
Class<? extends CommonDO> markClazz;
Class<? extends CommonDO> yuqingClazz;
switch (typeB) {
case COMPLETE:
markClazz = CompleteTextMark.class;
yuqingClazz = CompleteText.class;
break;
case INCOMPLETE:
markClazz = IncompleteTextMark.class;
yuqingClazz = IncompleteText.class;
break;
case VIDEO:
markClazz = VideoMark.class;
yuqingClazz = Video.class;
break;
case QA:
markClazz = QATextMark.class;
yuqingClazz = QAText.class;
break;
default:
throw new IllegalArgumentException("TypeB-传参异常");
}
// 标注库是否存在
if (dubboHandler.contains(parseObject(json.toJSONString(), markClazz).filterInfo())) {
return UploadInfo.DataType.MARK;
}
// 舆情库是否存在
if (dubboHandler.contains(parseObject(json.toJSONString(), yuqingClazz).filterInfo())) {
return UploadInfo.DataType.DW;
}
return UploadInfo.DataType.EXTERNAL;
}
@Override
public void cleanUploadResult(String group, String id) {
downLoadDataSourceMap.remove(group + "-" + id);
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FORMAT_ERROR_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FIELD_ERROR_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.FAILED_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.SUCCESS_SUFFIX));
redissonUtil.deleteListByKey(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX));
log.info("id:{}清理缓存完毕", id);
}
/**
* 启动上传任务
* @param markUploadRule 上传规则
* @return 完成转换的数据
*/
private Map<String, List<MarkUploadResult>> uploadRes(MarkUploadRule markUploadRule) throws ExecutionException, InterruptedException {
String key = listKey(markUploadRule.getGroup(), markUploadRule.getId());
List<MarkUploadInfo> sourceList = redissonUtil.getList(key)
.stream().map(e -> JSONObject.parseObject(e).toJavaObject(MarkUploadInfo.class)).collect(Collectors.toList());
if (sourceList.isEmpty()) {
throw new IllegalStateException("please do this after [addUploadList] method!");
}
log.info("index:{},启动上传任务-{}条", key, sourceList.size());
// 切割
List<List<MarkUploadInfo>> listSplit = Tools.spilt(sourceList, 100);
// for (List<MarkUploadInfo> e : listSplit) {
// asyncPoint(e, markUploadRule);
// }
// return null;
//异步执行
List<CompletableFuture<List<MarkUploadResult>>> futures = listSplit.stream().map(e -> CompletableFuture.supplyAsync(() -> asyncPoint(e, markUploadRule), UPLOAD_SERVICE)).collect(Collectors.toList());
CompletableFuture<Void> allOf = CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
//等待并回获取返回值
return allOf.thenApply(e -> futures.stream().map(CompletableFuture::join).collect(Collectors.toList()))
.get().stream().flatMap(Collection::stream).collect(Collectors.groupingBy(MarkUploadResult::getInfoType, Collectors.toList()));
}
/**
* 分段处理
* @return 标注返回值
*/
private List<MarkUploadResult> asyncPoint(List<MarkUploadInfo> infos, MarkUploadRule rule) {
// 数据类型转换
BulkTemplate<MarkUploadResult> bulkTemplate = new BulkTemplate<>(markCommonTemplate.dataTransform(infos), "初始化");
List<MarkUploadResult> results = new ArrayList<>(bulkTemplate.getSource());
// 批量模板数据重置
bulkTemplate.clean(results.stream().filter(e -> !e.isSuccess()).collect(Collectors.toList()), "url查询");
// 对转换失败的数据进行文本搜索
markCommonTemplate.textSearch(bulkTemplate, rule);
// 设置数据源信息
markCommonTemplate.uploadType(results);
// 只对转换成功的数据进行上传
List<MarkUploadResult> completeData = results.stream().filter(MarkUploadResult::isSuccess).collect(Collectors.toList());
// 不同数据源 二次搜索
markCommonTemplate.secondarySearch(completeData, bulkTemplate, rule);
return results;
// return infos.stream().map(uploadInfo -> markCommonTemplate.uploadResult(uploadInfo, rule)).collect(Collectors.toList());
}
/**
* 上传成功 缓存信息
* @param group 项目
* @param id 任务id
* @param uploadStatus 上传状态
* @param data 成功转换的数据
*/
private void successUploadCache(String group, String id, UploadStatus uploadStatus,
Map<String, List<MarkUploadResult>> data, InsertType insertType) {
for (Map.Entry<String, List<MarkUploadResult>> entry : data.entrySet()) {
// 同步格式错误集
if (GenericAttribute.FORMAT_ERROR_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.FORMAT_ERROR_SUFFIX),
Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFormatErrorCount(entry.getValue().size());
// 同步字段错误集
} else if (GenericAttribute.FIELD_ERROR_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.FIELD_ERROR_SUFFIX),
Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFieldErrorCount(entry.getValue().size());
// 同步正确上传集
} else if (GenericAttribute.SUCCESS_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id, GenericAttribute.SUCCESS_SUFFIX), Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setSuccessCount(entry.getValue().size());
// 将成功的数据返回
sendMarker(entry.getValue().stream().map(MarkUploadResult::getMarkInfo).collect(Collectors.toList()), insertType);
} else if (GenericAttribute.FAILED_SUFFIX.equals(entry.getKey())) {
redissonUtil.setList(assembleKey(group, id,GenericAttribute.FAILED_SUFFIX), Tools.gzipWithUploadInfoList(entry.getValue().stream().map(this::setUploadInfo).collect(Collectors.toList())));
uploadStatus.setFailedCount(entry.getValue().size());
}
}
// 同步上传信息实体
uploadStatus.setStatus(1);
redissonUtil.setBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX), toJSONString(uploadStatus));
}
/**
* 发送到标注中间件
* @param markInfos markInfo
* @param insertType 入库类型
*/
private void sendMarker(List<MarkInfo> markInfos, InsertType insertType) {
List<List<MarkInfo>> lists = Tools.spilt(markInfos, 1000);
for (List<MarkInfo> infos : lists) {
try {
// if (InsertType.MARK.equals(insertType)) {
// dubboHandler.markUpsert(infos);
// } else {
// dubboHandler.pluginUpsertBack(infos);
// }
} catch (Exception e) {
log.error("标注上传-数据url:{},发送到标注中间件错误:", infos.stream().map(json -> json.getSourceObj().getString("url")).collect(Collectors.joining("|")), e);
}
}
}
/**
*
* 同步redis缓存集
*
* @param group
* @param id
*
* @return void
*/
private void upsetUploadResult(String group, String id,
UploadStatus uploadStatus) {
// 同步上传信息实体
if (null != uploadStatus) {
redissonUtil.setBucket(assembleKey(group, id, GenericAttribute.STATUS_SUFFIX), toJSONString(uploadStatus));
}
}
/**
* 上传返回值转换未uploadInfo
* @param result 标注返回值
* @return uploadInfo
*/
private UploadInfo setUploadInfo(MarkUploadResult result) {
UploadInfo uploadInfo = new UploadInfo();
uploadInfo.setDataType(result.getDataType());
uploadInfo.setTypeB(result.getTypeB());
uploadInfo.setOriginData(result.getOriginData());
uploadInfo.setError(result.getInfoType());
uploadInfo.setCompound(new UploadInfo.CompoundCommonDO(result.getDw(), result.getMark()));
uploadInfo.setErrorMsg(result.getMessage());
return uploadInfo;
}
/**
* 返回组合K值
*
* @param keys
* @return String
*/
private String assembleKey(String... keys) {
StringBuilder sb = new StringBuilder(GenericAttribute.UNIFIED_PREFIX);
for (int i = 0; i < keys.length; i++) {
if (i == keys.length - 1) {
sb.append(keys[i]);
} else {
sb.append(keys[i]).append(":");
}
}
return sb.toString();
}
private String listKey(String group, String id) {
return group + "-" + id;
}
/**
* 匹配前缀
*
* @param uploadType
* @return String
*/
private String matchSuffix(UploadInfo.UploadType uploadType) {
switch (uploadType) {
case FORMAR_ERROR:
return GenericAttribute.FORMAT_ERROR_SUFFIX;
case FIELD_ERROR:
return GenericAttribute.FIELD_ERROR_SUFFIX;
case FAILED:
return GenericAttribute.FAILED_SUFFIX;
case SUCCESS:
return GenericAttribute.SUCCESS_SUFFIX;
default:
throw new IllegalArgumentException("UploadType匹配异常");
}
}
}
package com.zhiwei.middleware.automatic.server.util; package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.middleware.automatic.server.config.GlobalPojo; import com.alibaba.fastjson.JSONObject;
import java.io.UnsupportedEncodingException; import java.io.*;
import java.util.*; import java.util.*;
public class CosineSimilarity { public class CosineSimilarity {
private static final List<String> BRAND_WORDS = new ArrayList<>();
private static final String path = "classpath:static/brandWords.json";
static {
try {
InputStream inputStream = new FileInputStream(path);
String jsonStr = readJsonFile(inputStream);
if (null != jsonStr) {
List<List> array = JSONObject.parseArray(jsonStr, List.class);
for (List str : array) {
BRAND_WORDS.addAll(str);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static double calculateTextSimWithBrand(String doc1, String doc2) { public static double calculateTextSimWithBrand(String doc1, String doc2) {
if (handleByLength(doc1, doc2) && compareWithBrand(doc1, doc2)) { if (handleByLength(doc1, doc2) && compareWithBrand(doc1, doc2)) {
return calculateSimilar(doc1, doc2); return calculateSimilar(doc1, doc2);
...@@ -121,12 +140,12 @@ public class CosineSimilarity { ...@@ -121,12 +140,12 @@ public class CosineSimilarity {
* @return 关键字数量是否一致 * @return 关键字数量是否一致
*/ */
private static boolean compareWithBrand(String doc1, String doc2) { private static boolean compareWithBrand(String doc1, String doc2) {
if (null == GlobalPojo.BRAND_WORDS || GlobalPojo.BRAND_WORDS.isEmpty()) { if (null == BRAND_WORDS || BRAND_WORDS.isEmpty()) {
return true; return true;
} }
Set<String> set1 = new HashSet<>(); Set<String> set1 = new HashSet<>();
Set<String> set2 = new HashSet<>(); Set<String> set2 = new HashSet<>();
for (String brand : GlobalPojo.BRAND_WORDS) { for (String brand : BRAND_WORDS) {
if (doc1.contains(brand)) { if (doc1.contains(brand)) {
set1.add(brand); set1.add(brand);
} }
...@@ -149,4 +168,25 @@ public class CosineSimilarity { ...@@ -149,4 +168,25 @@ public class CosineSimilarity {
public static boolean isHanZi(char ch) { public static boolean isHanZi(char ch) {
return (ch >= 0x4E00 && ch <= 0x9FA5); return (ch >= 0x4E00 && ch <= 0x9FA5);
} }
/**
* 读取json文件,返回json串
*
* @return
*/
public static String readJsonFile(InputStream fileInputStream) {
try {
Reader reader = new InputStreamReader(fileInputStream, "utf-8");
int ch = 0;
StringBuffer sb = new StringBuffer();
while ((ch = reader.read()) != -1) {
sb.append((char) ch);
}
reader.close();
return sb.toString();
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
} }
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.IncompleteTextMark;
import com.zhiwei.base.entity.subclass.mark.QATextMark;
import com.zhiwei.base.entity.subclass.mark.VideoMark;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.enums.Fields;
import com.zhiwei.middleware.automatic.server.pojo.vo.TemplateFatherVo;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static com.zhiwei.middleware.automatic.server.config.GenericAttribute.SON_ID;
public class DataCollectionUtil {
/**
* 计算词频
*
* @param word
* @param text
* @return
*/
public static int calculateRate(String word, String text) {
if (StringUtils.isEmpty(word)) {
return 0;
}
int rate = 0;
int from = 0;
while (true) {
if ((from = text.indexOf(word, from) + 1) > 0) {
rate++;
} else {
break;
}
}
return rate;
}
public static List<FilterInfo> changeJSONList2FilterInfoList(List<JSONObject> list, String group, ClassB.TypeB typeB) {
Class<? extends CommonDO> clazz;
switch (typeB) {
case INCOMPLETE:
clazz = IncompleteTextMark.class;
break;
case COMPLETE:
clazz = CompleteTextMark.class;
break;
case QA:
clazz = QATextMark.class;
break;
case VIDEO:
clazz = VideoMark.class;
break;
default:
throw new IllegalArgumentException("未能解析到的typeB类型:" + typeB);
}
return list.stream().map(json -> {
json.put("mgroup", group);
return JSONObject.parseObject(json.toJSONString(), clazz).filterInfo();
}).collect(Collectors.toList());
}
public static void supplementForInsert(List<JSONObject> list, String group, String mtag, String mperson) {
for (JSONObject obj : list) {
obj.remove(SON_ID);
obj.put("mgroup", group);
obj.put("mtag", mtag);
if (StringUtils.isEmpty(mperson)) {
obj.put("mperson", GenericAttribute.AUTO_PERSON);
} else {
obj.put("mperson", mperson);
}
// 固定字段
obj.put("cid", GenericAttribute.AUTO_CID);
obj.put("cname", GenericAttribute.AUTO_CNAME);
}
}
/**
*
* 判断事件采集该部分是否有标签(以首个为例)
*
* @param fields
* @return boolean
*/
public static boolean hasTag(TemplateFatherVo fatherVo, Fields fields) {
if (null == fatherVo) {
return false;
}
if (null != fatherVo.getExample().getString(fields.mtag)) {
return true;
}
return false;
}
public static List<TemplateFatherVo> fuzzyMatch(List<TemplateFatherVo> list, String character, boolean isTitle,
int markFlag) {
if (null == list) {
return Collections.emptyList();
}
Boolean isMarked;
switch (markFlag) {
case (0):
isMarked = null;
break;
case (1):
isMarked = true;
break;
default:
isMarked = false;
}
List<List<String>> fuzzyList = null;
List<TemplateFatherVo> res = new ArrayList<>();
if (!StringUtils.isEmpty(character)) {
// 拆分匹配字符
fuzzyList = cutKeyword(character);
}
// 遍历目标集
for (TemplateFatherVo fatherVo : list) {
// 现有聚合集的标注结果
boolean marked = !StringUtils.isEmpty(fatherVo.getExample().getString(Fields.COMPLETE.mtag));
String title;
if (isTitle) {
title = fatherVo.getTitle();
} else {
title = fatherVo.getTitle() + fatherVo.getContent();
}
if ((null == isMarked || isMarked == marked) && isHit(fuzzyList, title)) {
res.add(fatherVo);
}
}
return res;
}
public static List<List<String>> cutKeyword(String keyword) {
List<List<String>> fuzzyList = new ArrayList<>();
if (StringUtils.isBlank(keyword)) {
return fuzzyList;
}
// 先根据"|"区分或关系
String[] anyStrs = keyword.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
return fuzzyList;
}
public static boolean isHit(List<List<String>> fuzzyList, String title) {
if (null == fuzzyList) {
return true;
}
if (null == title) {
return false;
}
for (List<String> ands : fuzzyList) {
boolean res = true;
for (String and : ands) {
// 每个"且"条件都必须符合
if (!title.contains(and)) {
res = false;
break;
}
}
if (res) {
return true;
}
}
return false;
}
public static List<TemplateFatherVo> getList(List<TemplateFatherVo> list, int page, int size) {
list.sort((x, y) -> Double.compare(Double.valueOf(y.getFatherId()), Double.valueOf(x.getFatherId())));
return Tools.listPagedQuery(list, page, size);
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.base.category.*;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.CompleteText;
import com.zhiwei.base.entity.subclass.IncompleteText;
import com.zhiwei.base.entity.subclass.QAText;
import com.zhiwei.base.entity.subclass.Video;
import com.zhiwei.base.utils.MessageTypeUtils;
import com.zhiwei.middleware.automatic.server.pojo.MarkUploadInfo;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
public class DataUploadUtil {
/**
* 按默认字段入库
*
* @param commonDO
* @param markUploadInfo
* @return CommonDO
*/
public static CommonDO defaultCTypeAll(CommonDO commonDO, MarkUploadInfo markUploadInfo) {
CommonDO res = selfAdaptionCTypeAll(commonDO, markUploadInfo.getPlatform(), markUploadInfo.getSource());
commonDO.setC1(res.getC1());
commonDO.setC2(res.getC2());
commonDO.setC3(res.getC3());
commonDO.setC4(res.getC4());
commonDO.setC5(res.getC5());
commonDO.setForeign("外媒".equals(markUploadInfo.getPlatform()) ? 1 : 0);
if(StringUtils.isEmpty(res.getRealSource())){
commonDO.setRealSource(markUploadInfo.getRealSource());
}
return res;
}
/**
* 自匹配C1-C5类型字段
*
* @param commonDO
* @param platform
* @param source
* @return
* @return CommonDO
*/
private static CommonDO selfAdaptionCTypeAll(CommonDO commonDO, String platform, String source) {
CommonDO res;
int encode = getEndoceByPlatformAndSource(platform, source);
ClassD classD = ClassCodec.decodeClassD(encode);
switch (classD.typeB()) {
case COMPLETE:
CompleteText completeText = CompleteText.restoreFromEs(commonDO.toJSON());
// 脉脉平台特殊处理
if (completeText.getUrl().contains("maimai.cn")) {
ClassD cd = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC)
.selectD(ClassD.TypeD.脉脉行业头条);
completeText.setRealSource("脉脉");
completeText.setForeign(0);
completeText.setC1(cd.encodeA());
completeText.setC2(cd.encodeB());
completeText.setC3(cd.encodeC());
completeText.setC4(cd.encodeD());
completeText.setC5(cd.combineEncode());
res = completeText;
} else {
res = MessageTypeUtils.setCompleteRealSourceByUrl(CompleteText.restoreFromEs(commonDO.toJSON()));
}
break;
case INCOMPLETE:
res = MessageTypeUtils.setIncompleteRealSourceByUrl(IncompleteText.restoreFromEs(commonDO.toJSON()));
break;
case QA:
res = MessageTypeUtils.setQATextRealSourceByUrl(QAText.restoreFromEs(commonDO.toJSON()));
break;
case VIDEO:
Video video = Video.restoreFromEs(commonDO.toJSON());
if (video.getUrl().contains("weishi.qq.com")) {
ClassD cd = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC)
.selectD(ClassD.TypeD.短视频);
video.setRealSource("微视");
video.setForeign(0);
video.setC1(cd.encodeA());
video.setC2(cd.encodeB());
video.setC3(cd.encodeC());
video.setC4(cd.encodeD());
video.setC5(cd.combineEncode());
res = video;
} else {
res = MessageTypeUtils.setVideoRealSourceByUrl(Video.restoreFromEs(commonDO.toJSON()));
}
break;
default:
throw new IllegalStateException("selfAdaptionCTypeAll-无法识别的c2类型");
}
if (null == res.getC2()) {
// 使用默认值
res.setC1(classD.encodeA());
res.setC2(classD.encodeB());
res.setC3(classD.encodeC());
res.setC4(classD.encodeD());
res.setC5(classD.combineEncode());
// throw new IllegalStateException("根据url未能正确识别平台类型");
}
return res;
}
public static int getEndoceByPlatformAndSource(String platform, String source) {
// 默认值网媒
Integer encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒)
.combineEncode();
switch (platform) {
case "微博":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.新浪微博)
.combineEncode();
break;
case "微信":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.微信公众号)
.combineEncode();
break;
case "网媒":
break;
case "平媒":
break;
case "今日头条":
if ("微头条".equals(source)) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.微头条)
.combineEncode();
} else if (source.contains("问答")) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.悟空问答)
.combineEncode();
} else if (source.contains("视频")) {
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.西瓜视频)
.combineEncode();
}
break;
case "自媒体":
// 返回默认值
break;
case "知乎":
if ("知乎专栏".equals(source)) {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.知乎专栏)
.combineEncode();
} else {
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.知乎)
.combineEncode();
}
break;
case "问答":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.问答).combineEncode();
break;
case "贴吧论坛":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.QA).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.论坛).combineEncode();
break;
case "视频":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒)
.combineEncode();
break;
case "抖音":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.抖音)
.combineEncode();
break;
case "外媒":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.COMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.网媒).combineEncode();
break;
case "Twitter":
encode = ClassA.selectA(ClassA.TypeA.TEXT).selectB(ClassB.TypeB.INCOMPLETE).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.TWITTER)
.combineEncode();
break;
case "短视频":
encode = ClassA.selectA(ClassA.TypeA.VIDEO).selectB(ClassB.TypeB.VIDEO).selectC(ClassC.TypeC.UGC).selectD(ClassD.TypeD.短视频)
.combineEncode();
break;
}
return encode;
}
}
package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.nlp.vo.KResult;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
public class EventCollectionUtil {
public static void supplementForMarkInfoList(List<MarkInfo> list, String group, String mperson) {
for (MarkInfo info : list) {
JSONObject obj = info.getSourceObj();
obj.put("mperson", mperson);
obj.put("mgroup", group);
// 固定字段
obj.put("cid", 100040002);
obj.put("cname", "上传标注补充采集");
}
}
public static void supplementForInsert(List<JSONObject> list, String group, String mperson) {
for (JSONObject obj : list) {
obj.put("mperson", mperson);
obj.put("mgroup", group);
// 固定字段
obj.put("cid", 100040002);
obj.put("cname", "上传标注补充采集");
}
}
/**
*
* 合并新旧噪音集
*
* @param list1
* @param list2
*
* @return void
*/
public static List<JSONObject> mergeNoiseList(List<JSONObject> list1, List<JSONObject> list2) {
List<JSONObject> resList = new ArrayList<>();
if (list1.size() >= list2.size()) {
addMergeNoiseList(resList, list1, list2);
} else {
addMergeNoiseList(resList, list2, list1);
}
return resList;
}
private static void addMergeNoiseList(List<JSONObject> resList, List<JSONObject> longList,
List<JSONObject> shortList) {
Set<String> tempSet = new HashSet<>();
resList.addAll(longList);
tempSet.addAll(longList.stream().map(json -> json.getString("title")).collect(Collectors.toSet()));
for (JSONObject json : shortList) {
if (!tempSet.contains(json.get("title"))) {
resList.add(json);
}
}
}
/**
*
* 判断事件采集该部分是否有标签(以首个为例)
*
* @param list
* @return boolean
*/
public static boolean hasTag(List<JSONObject> list) {
if (null == list || list.isEmpty()) {
return false;
}
if (!StringUtils.isEmpty(list.get(0).getString("mtag"))) {
return true;
}
return false;
}
public static Map<String, List<JSONObject>> fuzzyMatch(Map<String, List<JSONObject>> map, String character) {
if (StringUtils.isEmpty(character) || null == map) {
return map;
}
Map<String, List<JSONObject>> res = new HashMap<>();
// 拆分匹配字符
List<List<String>> fuzzyList = new ArrayList<>();
// 先根据"|"区分或关系
String[] anyStrs = character.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
// 遍历目标集
for (String title : map.keySet()) {
out: for (List<String> ands : fuzzyList) {
for (String and : ands) {
// 每个"且"条件都必须符合
if (!title.contains(and)) {
continue out;
}
}
// 在"或"关系层添加
res.put(title, map.get(title));
}
}
return res;
}
public static Map<String, List<JSONObject>> parseFromRedisHmStrWithGunZip(Map<String, String> map) {
Map<String, List<JSONObject>> resMap = new HashMap<>();
Set<Map.Entry<String, String>> set = map.entrySet();
for (Map.Entry<String, String> entry : set) {
List<JSONObject> innerList = new ArrayList<>();
List<String> list = JSONArray.parseArray(Tools.gunzip(entry.getValue()), String.class);
list.forEach(str -> {
innerList.add(JSONObject.parseObject(str));
});
resMap.put(entry.getKey(), innerList);
}
return resMap;
}
/**
*
* 根据噪音集减少源数据量
*
* @param noiseMap
* @param sourceList
*
* @return List<MarkInfo>
*/
public static List<MarkInfo> reduceByNoise(Map<String, List<JSONObject>> noiseMap, List<MarkInfo> sourceList,
double cosFreq) {
if (null == noiseMap || noiseMap.isEmpty()) {
return sourceList;
}
List<MarkInfo> resList = new ArrayList<>();
Set<String> noiseTitles = new HashSet<>();
for (List<JSONObject> list : noiseMap.values()) {
for (JSONObject json : list) {
noiseTitles.add(Tools.filterSymbol(json.getString("title")));
}
}
for (MarkInfo info : sourceList) {
String title = Tools.filterSymbol(info.getSourceObj().getString("title"));
boolean isNoise = false;
for (String noiseTitle : noiseTitles) {
// 超过相似度标准判为噪音,不添加
if (CosineSimilarity.calculateTextSimWithBrand(title, noiseTitle) >= cosFreq) {
isNoise = true;
break;
}
}
// 非噪音数据添加
if (!isNoise) {
resList.add(info);
}
}
return resList;
}
public static int restoreForAggreeTitleMap(List<KResult<Integer>> kResult, Map<String, List<JSONObject>> aggreeMap, List<MarkInfo> sourceList) {
AtomicInteger res = new AtomicInteger();
for (KResult<Integer> result : kResult) {
aggreeMap.put(result.getClusterName(), new ArrayList<>());
String mtag = getMtag(result.getDataPoints(), sourceList);
if (Objects.nonNull(mtag)) {
result.getDataPoints().forEach(e -> sourceList.get(e).getSourceObj().put("mtag", mtag));
res.addAndGet(result.getDataPoints().size());
}
}
return res.get();
}
private static String getMtag(List<Integer> indexes, List<MarkInfo> sourceList) {
for (Integer index : indexes) {
MarkInfo markInfo = sourceList.get(index);
if (Objects.nonNull(markInfo) && !Tools.isEmpty(markInfo.getSourceObj().getString("mtag"))) {
return markInfo.getSourceObj().getString("mtag");
}
}
return null;
}
}
package com.zhiwei.middleware.automatic.server.util; package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.base.category.ClassB; import com.zhiwei.base.category.ClassB;
import com.zhiwei.middleware.automatic.server.config.GenericAttribute; import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import org.apache.commons.lang3.time.FastDateFormat; import org.apache.commons.lang3.time.FastDateFormat;
import java.text.ParseException; import java.text.ParseException;
......
package com.zhiwei.middleware.automatic.server.util; package com.zhiwei.middleware.automatic.server.util;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.CommonDO;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.pojo.UploadInfo;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils; import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class Tools { public class Tools {
private static final Pattern SYMBOL_PATTERN = Pattern
.compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]");
public static <T> List<JSONObject> bean2JSON(List<T> list) {
if (null == list) {
return Collections.emptyList();
}
List<JSONObject> res = new ArrayList<>(list.size());
list.forEach(t -> {
res.add(JSONObject.parseObject(JSONObject.toJSONString(t)));
});
return res;
}
public static <T> T deepCopyByJson(T obj, Class<T> t) {
String json = JSON.toJSONString(obj);
return JSON.parseObject(json, t);
}
/**
*
* 转换成redisHm格式并压缩
*
* @param Map<String,T>
* @return
*
* @return Map<String,String>
*/
public static <T> Map<String, String> redisHmFormatWithGzip(Map<String, T> map) {
if (null == map) {
return null;
}
Map<String, String> resMap = new HashMap<>();
map.forEach((k, v) -> {
resMap.put(k, Tools.gzip(JSONObject.toJSONString(v)));
});
return resMap;
}
/**
* 列表分页查询
*
* @param list
* @param page
* @param size
* @Title: listPagedQuery
*/
public static <T> List<T> listPagedQuery(List<T> list, int page, int size) {
if (null == list) {
return null;
}
List<T> result = new ArrayList<>();
if (page > 0 && size > 0) {
if (list.size() > 0) {
int start = (page - 1) * size;
int end = page * size;
if (start < list.size()) {
result = new ArrayList<T>();
for (int i = start; i < end && i < list.size(); i++) {
result.add(list.get(i));
}
}
}
}
return result;
}
/**
* 还原成实体类map
*
* @param strMap map
* @param clazz 目标对象
*/
public static <T> Map<String, T> restoreTMap(Map<String, String> strMap, Class<T> clazz) {
Map<String, T> resMap = new HashMap<>();
if (null == strMap) {
return resMap;
}
for (String key : strMap.keySet()) {
resMap.put(key, JSON.parseObject(strMap.get(key), clazz));
}
return resMap;
}
/**
* list切割
* @param list 源数据
* @param limit limit
* @param <T> 泛型
* @return 切割后
*/
public static<T> List<List<T>> spilt(List<T> list, int limit) {
int pageTotal = (list.size() + limit - 1) / limit;
List<List<T>> result = new ArrayList<>(pageTotal);
for (int i = 0; i < pageTotal; i++) {
int end = i + 1 == pageTotal ? list.size() : (i + 1) * limit;
result.add(list.subList(i * limit, end));
}
return result;
}
public static String urlReplace(String url) {
String prefix = url.contains("https") ? "https" : "http";
return url.replace(prefix, "");
}
public static String randomUUID() { public static String randomUUID() {
return UUID.randomUUID().toString().replace("-", ""); return UUID.randomUUID().toString().replace("-", "");
} }
...@@ -156,121 +31,6 @@ public class Tools { ...@@ -156,121 +31,6 @@ public class Tools {
return sb.toString(); return sb.toString();
} }
public static List<Map<String, Integer>> highWordMatch(String text, String highWords) {
List<Map<String, Integer>> res = new ArrayList<>();
for (List<String> ands : cutKeyword(highWords)) {
int rate = 0;
for (String and : ands) {
int oneRate = calculateRate(and, text);
// 该词未被命中,清除词频记录并返回
if (0 == oneRate) {
rate = 0;
break;
}
// 有单个词被命中且取最低值
if (0 == rate || oneRate < rate) {
rate = oneRate;
}
}
// 该组合词被命中
if (rate > 0) {
Map<String, Integer> map = new HashMap<>(1);
map.put(String.join(" ", ands), rate);
res.add(map);
}
}
res.sort((x, y) -> Integer.compare(y.values().toArray(new Integer[0])[0],
x.values().toArray(new Integer[0])[0]));
return res;
}
public static List<FilterInfo> changeJSONList2FilterInfoList(List<JSONObject> list, String group, ClassB.TypeB typeB) {
Class<? extends CommonDO> clazz;
switch (typeB) {
case INCOMPLETE:
clazz = IncompleteTextMark.class;
break;
case COMPLETE:
clazz = CompleteTextMark.class;
break;
case QA:
clazz = QATextMark.class;
break;
case VIDEO:
clazz = VideoMark.class;
break;
default:
throw new IllegalArgumentException("未能解析到的typeB类型:" + typeB);
}
return list.stream().map(json -> {
json.put("mgroup", group);
return JSONObject.parseObject(json.toJSONString(), clazz).filterInfo();
}).collect(Collectors.toList());
}
/**
* 计算词频
*
* @param word
* @param text
* @return
*/
public static int calculateRate(String word, String text) {
if (StringUtils.isEmpty(word)) {
return 0;
}
int rate = 0;
int from = 0;
while (true) {
if ((from = text.indexOf(word, from) + 1) > 0) {
rate++;
} else {
break;
}
}
return rate;
}
public static List<List<String>> cutKeyword(String keyword) {
List<List<String>> fuzzyList = new ArrayList<>();
if (StringUtils.isBlank(keyword)) {
return fuzzyList;
}
// 先根据"|"区分或关系
String[] anyStrs = keyword.split("\\|");
for (String any : anyStrs) {
List<String> tempList = new ArrayList<>();
// 再根据" "区分且关系
String[] andStrs = any.split(" ");
for (String and : andStrs) {
tempList.add(and);
}
fuzzyList.add(tempList);
}
return fuzzyList;
}
public static ClassB.TypeB getTypeB(JSONObject json) {
return ClassB.TypeB.fromEncode(json.getInteger("c2"));
}
/**
* 过滤掉标题里面的标点符号
*
* @param title 标题
* @return 去除特殊符号后的标题
*/
public static String filterSymbol(String title) {
if (null != title) {
return SYMBOL_PATTERN.matcher(title).replaceAll("");
}
return null;
}
/** /**
* 是否为空,数据为空 * 是否为空,数据为空
* *
...@@ -294,242 +54,19 @@ public class Tools { ...@@ -294,242 +54,19 @@ public class Tools {
} }
/** /**
* 休眠x毫秒 * 还原成实体类map
*
* @param millis void
*/
public static void sleep(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
/**
* 重叠标签修正(保留彼此未重复属性标签)
*
* @param originTag
* @param latestTag
*
* @return void
*/
public static String partialUpdateTag(String originTag, String latestTag) {
Map<String, String> originMap = change2MapTag(originTag);
Map<String, String> latestMap = change2MapTag(latestTag);
if (null == latestMap) {
return "";
}
if (null == originMap) {
return latestTag;
}
originMap.putAll(latestMap);
return mapTag2String(originMap);
}
/**
* 正则表达式匹配结果
*
* @param line
* @param pattern
* @return List<String>
*/
public static List<String> patternMatchFind(String line, String pattern) {
List<String> res = new ArrayList<>();
// 创建 Pattern 对象
Pattern r = Pattern.compile(pattern);
// 现在创建 matcher 对象
Matcher m = r.matcher(line);
while (m.find()) {
res.add(m.group());
}
return res;
}
/**
* 获取主机名
* *
* @param url * @param strMap map
* @return * @param clazz 目标对象
*/
public static String getHost(String url) {
try {
return hostUnified(new URL(url).getHost());
} catch (MalformedURLException e) {
e.printStackTrace();
throw new RuntimeException("url不合法获取域名出错!!!" + url);
}
}
/**
* 时间校验
* @param time
* @return
*/ */
public static boolean isLegalTime(Long time) { public static <T> Map<String, T> restoreTMap(Map<String, String> strMap, Class<T> clazz) {
if (null == time || time.toString().length() != 13) { Map<String, T> resMap = new HashMap<>();
return false; if (null == strMap) {
} return resMap;
boolean flag = true;
try {
long startLimit = -639129600000L;
// 合法结束选取为后一年内
long endLimit = System.currentTimeMillis() + 365 * 24 * 60 * 60 * 1000L;
// 政府文件发布前和后一年内
if (time < startLimit || time > endLimit) {
flag = false;
}
} catch (Exception e) {
return false;
}
return flag;
}
public static String gzip(String primStr) {
if (primStr == null || primStr.length() == 0) {
return primStr;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = null;
try {
gzip = new GZIPOutputStream(out);
gzip.write(primStr.getBytes());
} catch (IOException e) {
e.printStackTrace();
} finally {
if (gzip != null) {
try {
gzip.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return new String(new org.apache.commons.codec.binary.Base64().encode(out.toByteArray()));
}
public static String gunzip(String compressedStr) {
if (compressedStr == null) {
return null;
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = null;
GZIPInputStream ginzip = null;
byte[] compressed = null;
String decompressed = null;
try {
compressed = new Base64().decode(compressedStr);
in = new ByteArrayInputStream(compressed);
ginzip = new GZIPInputStream(in);
byte[] buffer = new byte[102400];
int offset = -1;
while ((offset = ginzip.read(buffer)) != -1) {
out.write(buffer, 0, offset);
}
decompressed = out.toString();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (ginzip != null) {
try {
ginzip.close();
} catch (IOException e) {
}
}
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
if (out != null) {
try {
out.close();
} catch (IOException e) {
}
}
}
return decompressed;
}
public static List<String> gzipWithUploadInfoList(List<UploadInfo> list) {
if (null == list || list.isEmpty()) {
return null;
}
List<String> resList = new ArrayList<>();
for (UploadInfo info : list) {
String jsonStr = JSONObject.toJSONString(info);
resList.add(gzip(jsonStr));
}
return resList;
}
public static List<MarkInfo> getMarkInfos(List<String> cachedMarkInfos) {
List<MarkInfo> list = new ArrayList<>();
MarkInfo markInfo = null;
for (String s : cachedMarkInfos) {
JSONObject jsonObject = JSONObject.parseObject(Tools.gunzip(s));
String typeB = jsonObject.get("typeB").toString();
String sourceObj = jsonObject.get("sourceObj").toString();
switch (typeB) {
case "COMPLETE":
markInfo = new MarkInfo(JSON.parseObject(sourceObj, CompleteTextMark.class));
break;
case "VIDEO":
markInfo = new MarkInfo(JSON.parseObject(sourceObj, VideoMark.class));
break;
default:
break;
}
list.add(markInfo);
}
return list;
}
private static String hostUnified(String host) {
// 微信链接统一
if ("weixin.sogou.com".equals(host)) {
host = "mp.weixin.qq.com";
}
// 抖音链接统一
if ("www.douyin.com".equals(host)) {
host = "www.iesdouyin.com";
}
return host;
}
private static Map<String, String> change2MapTag(String mtag) {
if (null == mtag || "".equals(mtag)) {
return null;
}
// 标签形式如:",1=10"
String[] origins = mtag.split(",");
Map<String, String> originMap = new HashMap<>();
for (String origin : origins) {
if (!"".equals(origin)) {
String[] fianls = origin.split("=");
originMap.put(fianls[0], fianls.length == 2 ? fianls[1] : "");
}
}
return originMap;
}
private static String mapTag2String(Map<String, String> tagMap) {
if (null == tagMap || tagMap.isEmpty()) {
return null;
} }
StringBuilder sb = new StringBuilder(); for (String key : strMap.keySet()) {
for (String key : tagMap.keySet()) { resMap.put(key, JSON.parseObject(strMap.get(key), clazz));
sb.append(",").append(key).append("=").append(tagMap.get(key));
} }
return sb.toString(); return resMap;
} }
} }
package com.zhiwei.middleware.automatic.server.util;
import com.zhiwei.wechat.search.WechatReal;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.Proxy.Type;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class WechatUtil {
private static Logger logger = LogManager.getLogger(WechatUtil.class);
private static final ScheduledExecutorService SCHEDULE = Executors.newScheduledThreadPool(1);
private WechatUtil() {
}
private static final List<Proxy> SCOKET_ADDRESS_CACHE = new ArrayList<>(5);
private static final Map<Proxy, WechatReal> PROXY_CACHE = new HashMap<>(5);
/** 屏蔽失效列表 **/
private static List<Integer> shieldList = new ArrayList<>(5);
/**
* 华为云高匿代理
*/
static {
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("122.112.137.194", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("122.112.163.207", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("119.3.86.205", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("119.3.38.9", 31128)));
SCOKET_ADDRESS_CACHE.add(new Proxy(Type.HTTP, new InetSocketAddress("121.36.135.139", 31128)));
SCOKET_ADDRESS_CACHE.forEach(proxy -> PROXY_CACHE.put(proxy, new WechatReal()));
SCHEDULE.scheduleAtFixedRate(() -> cleanShield(5), 1, 1, TimeUnit.HOURS);
}
public static String wechatRealLinkget(String url) {
int index = selectProxy();
Proxy proxy = SCOKET_ADDRESS_CACHE.get(index);
try {
return PROXY_CACHE.get(proxy).getRealLink(url, proxy);
} catch (Exception e) {
logger.error("代理ip:{}或失效,暂时停用!", proxy.address());
shieldList.add(index);
}
return null;
}
private static int selectProxy() {
final int limit = SCOKET_ADDRESS_CACHE.size();
int index = (int) Math.random() * limit;
if (!shieldList.contains(index)) {
return index;
}
for (int i = 0; i < limit; i++) {
if (!shieldList.contains(i)) {
index = i;
break;
}
if (i == limit - 1) {
cleanShield(limit);
}
}
return index;
}
/**
* 屏蔽列表全部释放
*
* @param limit
*/
private static void cleanShield(int limit) {
shieldList = new ArrayList<>(limit);
}
// TODO 暂不支持自传代理
// public String wechatRealLinkget(String url, Proxy proxy) throws Exception {
// return WechatReal.getRealLink(url, proxy);
// }
}
package com.zhiwei.middleware.automatic.server.util;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
*
* @ClassName: WeiboMidUrlDeal
* @Description: 微博mid 和url 后缀之间的转换
* @author 0xff
* @date 2019年8月29日 下午3:07:15
*/
public class WeiboMidUrlDealUtil {
private static final Logger logger = LogManager.getLogger(WeiboMidUrlDealUtil.class);
private static String[] str62keys = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e",
"f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
"V", "W", "X", "Y", "Z" };
private WeiboMidUrlDealUtil() {
}
/**
** 转换字符
*
* @param int10
* @return
* @return String
*/
private static String intToEnode62(Integer int10) {
String s62 = "";
int r = 0;
while (int10 != 0) {
r = int10 % 62;
s62 = StringUtils.join(str62keys[r], s62);
int10 = (int) Math.floor(int10 / 62.0);
}
return s62;
}
/**
** 62进制转成10进制
*
* @param str62
* @return
* @return String
*/
private static String str62toInt(String str62) {
long i64 = 0;
for (int i = 0; i < str62.length(); i++) {
long vi = (long) Math.pow(62, (str62.length() - i - 1));
String t = str62.substring(i, i + 1);
i64 += vi * findindex(t);
}
return Long.toString(i64);
}
/**
** 查找t字符 所在位置
*
* @param t
* @return
* @return int
*/
private static int findindex(String t) {
int index = 0;
for (int i = 0; i < str62keys.length; i++) {
if (str62keys[i].equals(t)) {
index = i;
break;
}
}
return index;
}
/**
** url后缀 转换成 mid
*
* @param mid
* @return
* @return String
*/
public static String uid2Mid(String mid) {
String id = "";
for (int i = mid.length() - 4; i > -4; i = i - 4) { // 从最后往前以4字节为一组读取URL字符
int offset1 = i < 0 ? 0 : i;
int len = i < 0 ? mid.length() % 4 : 4;
String str = mid.substring(offset1, offset1 + len);
str = str62toInt(str);
if (offset1 > 0) { // 若不是第一组,则不足7位补0
while (str.length() < 7) {
str = StringUtils.join("0", str);
}
}
id = StringUtils.join(str, id);
}
return id;
}
/**
** mid 转换成 url后缀
*
* @param str10
* @return
* @return String
*/
public static String mid2Uid(String str10) {
String mid = "";
int count = 1;
for (int i = str10.length() - 7; i > -7; i = i - 7) { // 从最后往前以7字节为一组读取字符
int offset = i < 0 ? 0 : i;
int len = i < 0 ? str10.length() % 7 : 7;
String temp = str10.substring(offset, offset + len);
String url = intToEnode62(Integer.valueOf(temp));
if (count != 3) {// z xghm uXym 生成的链接从右往左的前2组,4位一组,不足4位的补0
for (int j = 0; j < 4 - url.length(); j++) {
url = StringUtils.join("0", url);
}
}
mid = StringUtils.join(url, mid);
count++;
}
return mid;
}
/**
** url -> mid
*
* @param url
* @return
* @return String
*/
public static String urlToMid(String url) {
try {
String mid = url.trim();
if (url.contains("weibo.cn/status")) {
mid = url.split("status/")[1].split("\\?")[0];
return mid;
}
if (url.contains("weibo.com")) {
url = url.replaceAll("\\?.*|#.*|.*/", "");
if (url.length() >= 9) {
url = url.length() == 9 ? url : url.substring(0, 9);
mid = uid2Mid(url);
}
}
return mid;
} catch (Exception e) {
logger.debug("错误链接{}链接转换mid出错{}", url, e.getMessage());
return null;
}
}
/**
** 批量转换(url -> mid)
*
* @param urlList
* @return
* @return List<String>
*/
public static List<String> weiboToMidToList(List<String> urlList) {
try {
List<String> midList = new ArrayList<>();
urlList.forEach(url -> {
try {
String mid = urlToMid(url);
if (Objects.nonNull(mid) && !mid.isEmpty()) {
midList.add(mid);
}
} catch (Exception e) {
logger.error("exception:", e);
}
});
return midList;
} catch (Exception e) {
logger.error(" 微博 转 mid 出错 :", e);
}
return Collections.emptyList();
}
/**
** 传入批量转换
*
* @param urlList
* @return
* @return Map<String,String>
*/
public static Map<String, String> weiboToMidToMap(List<String> urlList) {
try {
Map<String, String> rMap = new HashMap<>();
urlList.forEach(url -> {
try {
String mid = urlToMid(url);
if (Objects.nonNull(mid) && !mid.isEmpty()) {
rMap.put(mid, url);
}
} catch (Exception e) {
logger.error("exception:", e);
}
});
return rMap;
} catch (Exception e) {
logger.error(" 微博 转 mid 出错 :", e);
}
return Collections.emptyMap();
}
}
...@@ -19,35 +19,16 @@ dubbo.application.shutwait=30s ...@@ -19,35 +19,16 @@ dubbo.application.shutwait=30s
#mongo #mongo
#primary.uri.marker=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin #primary.uri.marker=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
primary.uri.marker=mongodb://localhost:27017/localhost primary.uri.marker=mongodb://localhost:27017/localhost
primary.uri.hangzhou=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin #primary.uri.hangzhou=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
#primary.uri=mongodb://qbjc:asSADf5ffs@202.107.192.94:17150/qbjc?authSource=admin primary.uri.hangzhou=mongodb://qbjc:asSADf5ffs@202.107.192.94:17150/qbjc?authSource=admin
mongo.connectTimeout=30000 mongo.connectTimeout=30000
mongo.maxWaitTime=50000 mongo.maxWaitTime=50000
mongo.dataBaseMarker=marker mongo.dataBaseMarker=marker
mongo.hangzhouMarker=qbjc mongo.hangzhouMarker=qbjc
# es
es.esClientAddresses=202.107.192.94:1443:qbjc-back:yuqing.zhiweidata.com,202.107.192.94:29400:elastic:qWxZRW42OHkuOhmF5AXX
es.clusterNodes=202.107.192.94:1443
es.clusterName=zhiweidata-new-es
es.httpClusterNodes=202.107.192.94:1443:middleware-automaticmark:auto.zhiweidata.com
#es.username=middleware-automaticmark
#es.password=auto.zhiweidata.com
es.username=joker
es.password=jokerdevops
middleware.zookeeperAddress=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181 middleware.zookeeperAddress=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
middleware.appName=automatic-provider middleware.appName=automatic-provider
middleware.markGroup=zhiwei-mark-local-liuyu middleware.markGroup=zhiwei-mark-local-liuyu
middleware.filterGroup=local-filter middleware.filterGroup=local-filter
#kafka topic
crawler.topic = crawler-test_1
#kafka配置
# kafka服务器地址(可以多个)
spring.kafka.bootstrap-servers=192.168.0.11:9092,192.168.0.30:9092,192.168.0.35:9092
#生产者
spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.compression-type=snappy
\ No newline at end of file
server.port=7778
#path to redisson.yaml or redisson.json
spring.redis.redisson.file=classpath:redisson.yaml
dubbo.application.name=automatic-provider
dubbo.application.qos.enable=false
dubbo.registry.address=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
#dubbo.config-center.timeout=60000
dubbo.protocol.name=dubbo
dubbo.protocol.port=7779
dubbo.provider.timeout=60000
dubbo.registry.timeout=60000
dubbo.registry.version=*
dubbo.provider.group=zhiwei-automatic
dubbo.scan.basePackages=com.zhiwei.middleware.automatic.server.dubbo.service.impl
dubbo.monitor.protocol=registry
dubbo.application.shutwait=30s
#mongo
primary.uri.marker=mongodb://markeruser:marker1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/marker?authSource=admin
primary.uri.hangzhou=mongodb://automarker:HtSIcjzZ@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/qbjc?authSource=admin
mongo.connectTimeout=30000
mongo.maxWaitTime=50000
mongo.dataBaseMarker=marker
mongo.hangzhouMarker=qbjc
prod.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5 prod.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
prod.robot.push.enable=false prod.robot.push.enable=true
prod.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor prod.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
prod.robot.push.level=error prod.robot.push.level=error
prod.robot.push.app.name=automatic-server-prod prod.robot.push.app.name=automatic-server-prod
......
---
singleServerConfig:
idleConnectionTimeout: 60000
connectTimeout: 10000
timeout: 3000
retryAttempts: 3
retryInterval: 1500
subscriptionsPerConnection: 5
address: "redis://192.168.0.39:7386"
subscriptionConnectionMinimumIdleSize: 1
subscriptionConnectionPoolSize: 50
connectionMinimumIdleSize: 128
connectionPoolSize: 256
database: 3
dnsMonitoringInterval: 5000
threads: 0
nettyThreads: 0
codec: !<org.redisson.codec.JsonJacksonCodec> {}
transportMode: "NIO"
\ No newline at end of file
[["尊嘉证券","字节跳动","众邦银行","中商惠民","中软","中融金","中金所","中关村","智联","智慧星光","智纯","志程","知微数据","知微事见","知微传播分析","支付宝","证监会","臻稚","榛果","浙商银行","找钢网","掌鱼生鲜","掌合天下","掌柜宝","债券通","早早孕","云天励飞","云锋金融","云从科技","阅文集团","猿题库","猿辅导","毓婷","榆钱","余额宝","有象科技","优酷","应用宝","鹰眼监控","英雄联盟","易利贷","易酒批","易久批","易会满","易果生鲜","易贷","艺妙神州","蚁坊","依图","伊利","药神保","药明康德","亚洲一号","亚马逊","雅士利","雅培","雪球","学儿乐","旭航网贷","熊猫直播","熊猫债","熊猫tv","星选外卖","星星守护","星图","星阶优护","新氧","新通路","新能源","新美大","新浪微博","新京报","新华社","新高桥","心美力","携程","校园贷","校园白条","小猪短租","小猿搜题","小猿口算","小象生鲜","小米","小美果园","小红书","小程序","小白用车","小白信用","相互保","现金贷","现代快报","闲鱼","虾米音乐","喜康素","喜康力","喜康宝","喜翻","喜宝","西瓜视频","悟空问答","物联网","我买网","唯品金融","唯品会","唯品国际","微众银行","微云","微舆情","微医保","微信","微头条","微视","微牛证券","微民保险","微瓴","微粒贷","微博","网易","万斯","万事达","万科","推特","途众","途牛","途家","同仁堂","同盾","同程","通付盾","天天快报","天猫","腾讯","淘小铺","淘鲜达","淘点点","淘宝","速卖通","苏宁","顺丰","水钢医院","舒心美","首中投资","首长四方","首长国际","首长宝佳","首颐医疗","首师大","首钢","首都机场","圣元","商汤","软银","融创","荣耀","雀巢","趣头条","去哪儿","钱袋宝","前程无忧","千树资本","企鹅","奇虎","苹果","品骏","拼多多","皮皮虾","鹏源","鹏灏","鹏沨","鹏渤","欧冶云商","诺优能","宁波有象","宁波第三检测站","南苑机场","陌陌","摩拜","秒拍","苗建","喵鲜生","镁信","美赞成","美团","美素","美食林","美强生","美拍","美美证券","美啦","美菜","绿地","罗汉堂","龙湖","领英","猎聘","联想","利洁时","理想汽车","理财通","理财金","礼橙专车","老虎證券","老虎证券","蓝鲸征信","蓝鲸TMT","莱西","拉勾","旷视","快手","酷狗音乐","口碑网","可丽蓝","可丽兰","看准网","看准app","聚划算","聚财猫","巨量引擎","酒仙网","京东云","京东","金秀儿","火山直播","火山小视频","欢聚时代","华为","虎牙","红麦","黑珍珠","盒马","和易贷","合众","合生元","禾连健康","海囤全球","狗东","格灵深瞳","高德","富途","富民宝","斐讯","飞鹤","恩美力","恩加健","多闪","杜蕾斯","斗鱼","抖音","滴滴","大卫","创业黑马","车英慧","超级物种","菜鸟","北京字节跳动科技有限公司","斑马英语","百融","百度","安智宝","阿里","YY直播","youtube","UC","SenseTime","QQ","JD","Boss职聘","Boss直聘","A站"],["字节","主播","中金","中交路建","支付宝","政协","阅文","猿辅导","元素禾喜","元気森林","优酷","英特尔","银监会","一起学","央行","星图","新氧","小游戏","小象","小鹏","小桔","小红书","小程序","闲鱼","喜马拉雅","西瓜","沃尔玛","未成年","唯品会","微信","微视","微软","微博","微保","威马","网约车","网银","网易","王者荣耀","万科","土豆","途众","图虫","头条","统计局","同盾","同程","天眼查","天猫","腾讯","特斯拉","淘票票","淘宝","苏宁","尚德","商汤","山姆","三一五","三星","融创","荣耀","人人贷","人人车","人民日报","趣头条","区块链","清华","清北","轻聊","青桔","企鹅","麒麟","骑手","期权","期货","平安","拼多多","皮皮虾","朋友圈","拍拍乐","某音","某讯","某文","某手","某平台","某聘","某品会","某盾","某抖","某东","某大厂","秒针","秒杀","秒拍","米其林","孟晚舟","美赞臣","美团","蚂蚁","马云","马化腾","绿地","鲁班","龙湖","两会","联想","联合国","荔枝","快应用","快手","科慕","科技部","康希诺","凯迪拉克","绝地求生","巨量引擎","京东","金融","今日头条","建设银行","建行","机器人","火山","华为","花小猪","红包","恒大","嗨学","黑珍珠","黑马","盒马","好未来","哈啰","国双","国美","故宫","谷歌","狗东","公众号","工信部","工商局","跟谁学","高通","高榕","高德","富途","肺炎","飞猪","飞悦","飞书","飞慧","飞鹤","放心购","泛生子","多闪","短视频","斗鱼","抖音","懂车帝","定安","钉钉","滴滴","嘀嘀","大众点评","创业家","创业黑马","创新工场","穿山甲","超级物种","菜鸟","财新","比亚迪","比特币","贝因美","北银","北青网","北汽新能源","北排","邦盛","斑马","百融","百度","白条","安卓","爱奇艺","阿里","阿尔迪","阿迪达斯","youtube","wx","wechat","Walmart","UC","twitter","Trump","Tongdun","Tencent","sensetime","QQ","p2p","O2O","J东","JDD","JD+","JD","i黑马","IPO","Intel","huawei","Google","Futu","DOU+","dnf","COO","CEO","B站","BUG","Boss职聘","Boss直聘","bilibili","A站","Auchan","ATM","AMS","amazon","AI学","AI生态","AI启杭","AI财经社","AI","99公益","95公益","7鲜","7Fresh","714高炮","701计划","7 Fresh","5G","58同城","58集团","58二手车","58到家","51VR,","51job","36氪","1药网","1号会员店","11_11","12315","2022","2021","2020","996","315","11.11","3.15"],["北京","天津","石家庄","唐山","秦皇岛","邯郸","邢台","保定","张家口","承德","沧州","廊坊","衡水","太原","大同","阳泉","长治","晋城","朔州","晋中","运城","忻州","临汾","吕梁","呼和浩特","包头","乌海","赤峰","通辽","鄂尔多斯","呼伦贝尔","巴彦淖尔","乌兰察布","兴安","锡林郭勒","阿拉善","沈阳","大连","鞍山","抚顺","本溪","丹东","锦州","营口","阜新","辽阳","盘锦","铁岭","朝阳","葫芦岛","长春","吉林","四平","辽源","通化","白山","松原","白城","延边","哈尔滨","齐齐哈尔","鸡西","鹤岗","双鸭山","大庆","伊春","佳木斯","七台河","牡丹江","黑河","绥化","大兴安岭","上海","南京","无锡","徐州","常州","苏州","南通","连云港","淮安","盐城","扬州","镇江","泰州","宿迁","杭州","宁波","温州","嘉兴","湖州","绍兴","金华","衢州","舟山","台州","丽水","合肥","芜湖","蚌埠","淮南","马鞍山","淮北","铜陵","安庆","黄山","滁州","阜阳","宿州","六安","亳州","池州","宣城","福州","厦门","莆田","三明","泉州","漳州","南平","龙岩","宁德","南昌","景德镇","萍乡","九江","新余","鹰潭","赣州","吉安","宜春","抚州","上饶","济南","青岛","淄博","枣庄","东营","烟台","潍坊","济宁","泰安","威海","日照","临沂","德州","聊城","滨州","菏泽","郑州","开封","洛阳","平顶山","安阳","鹤壁","新乡","焦作","濮阳","许昌","漯河","三门峡","南阳","商丘","信阳","周口","驻马店","武汉","黄石","十堰","宜昌","襄阳","鄂州","荆门","孝感","荆州","黄冈","咸宁","随州","恩施","长沙","株洲","湘潭","衡阳","邵阳","岳阳","常德","张家界","益阳","郴州","永州","怀化","娄底","湘西","广州","韶关","深圳","珠海","汕头","佛山","江门","湛江","茂名","肇庆","惠州","梅州","汕尾","河源","阳江","清远","东莞","中山","潮州","揭阳","云浮","南宁","柳州","桂林","梧州","北海","防城港","钦州","贵港","玉林","百色","贺州","河池","来宾","崇左","海口","三亚","三沙","儋州","重庆","成都","自贡","攀枝花","泸州","德阳","绵阳","广元","遂宁","内江","乐山","南充","眉山","宜宾","广安","达州","雅安","巴中","资阳","阿坝","甘孜","凉山","贵阳","六盘水","遵义","安顺","毕节","铜仁","黔西南","黔东南","黔南","昆明","曲靖","玉溪","保山","昭通","丽江","普洱","临沧","楚雄","红河","文山","西双版纳","大理","德宏","怒江","迪庆","拉萨","日喀则","昌都","林芝","山南","那曲","阿里","西安","铜川","宝鸡","咸阳","渭南","延安","汉中","榆林","安康","商洛","兰州","嘉峪关","金昌","白银","天水","武威","张掖","平凉","酒泉","庆阳","定西","陇南","临夏","甘南","西宁","海东","海北","黄南","海南","果洛","玉树","海西","银川","石嘴山","吴忠","固原","中卫","乌鲁木齐","克拉玛依","吐鲁番","哈密","昌吉","博州","巴州","阿克苏","克州","喀什","和田","伊犁","塔城","阿勒泰","台北","高雄","桃园","台中","台南","新北","基隆","新竹","嘉义","香港","澳门","华北","华中","华东","华南","西北","西南","东北","河北","山西","辽宁","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","四川","贵州","云南","陕西","甘肃","青海","台湾","内蒙古","广西壮族","西藏","宁夏回族","新疆维吾尔"],["东城","西城","朝阳","丰台","石景山","海淀","门头沟","房山","通州","顺义","昌平","大兴","怀柔","平谷","密云","延庆","和平","河东","河西","南开","河北","红桥","东丽","西青","津南","北辰","武清","宝坻","滨海","宁河","静海","蓟州","长安","桥西","新华","井陉矿区","裕华","藁城","鹿泉","栾城","井陉","正定","行唐","灵寿","高邑","深泽","赞皇","无极","平山","元氏","赵县","辛集","晋州","新乐","路南","路北","古冶","开平","丰南","丰润","曹妃甸","滦南","乐亭","迁西","玉田","遵化","迁安","滦州","海港","山海关","北戴河","抚宁","青龙","昌黎","卢龙","邯山","丛台","复兴","峰峰矿区","肥乡","永年","临漳","成安","大名","涉县","磁县","邱县","鸡泽","广平","馆陶","魏县","曲周","武安","桥东","桥西","邢台","临城","内丘","柏乡","隆尧","任县","南和","宁晋","巨鹿","新河","广宗","平乡","威县","清河","临西","南宫","沙河","竞秀","莲池","满城","清苑","徐水","涞水","阜平","定兴","唐县","高阳","容城","涞源","望都","安新","易县","曲阳","蠡县","顺平","博野","雄县","涿州","定州","安国","高碑店","桥东","桥西","宣化","下花园","万全","崇礼","张北","康保","沽源","尚义","蔚县","阳原","怀安","怀来","涿鹿","赤城","双桥","双滦","鹰手营子矿区","承德","兴隆","滦平","隆化","丰宁","宽城","围场","平泉","新华","运河","沧县","青县","东光","海兴","盐山","肃宁","南皮","吴桥","献县","孟村","泊头","任丘","黄骅","河间","安次","广阳","固安","永清","香河","大城","文安","大厂","霸州","三河","桃城","冀州","枣强","武邑","武强","饶阳","安平","故城","景县","阜城","深州","小店","迎泽","杏花岭","尖草坪","万柏林","晋源","清徐","阳曲","娄烦","古交","新荣","平城","云冈","云州","阳高","天镇","广灵","灵丘","浑源","左云","城区","矿区","郊区","平定","盂县","潞州","上党","屯留","潞城","襄垣","平顺","黎城","壶关","长子","武乡","沁县","沁源","城区","沁水","阳城","陵川","泽州","高平","朔城","平鲁","山阴","应县","右玉","怀仁","榆次","榆社","左权","和顺","昔阳","寿阳","太谷","祁县","平遥","灵石","介休","盐湖","临猗","万荣","闻喜","稷山","新绛","绛县","垣曲","夏县","平陆","芮城","永济","河津","忻府","定襄","五台","代县","繁峙","宁武","静乐","神池","五寨","岢岚","河曲","保德","偏关","原平","尧都","曲沃","翼城","襄汾","洪洞","古县","安泽","浮山","吉县","乡宁","大宁","隰县","永和","蒲县","汾西","侯马","霍州","离石","文水","交城","兴县","临县","柳林","石楼","岚县","方山","中阳","交口","孝义","汾阳","新城","回民区","玉泉","赛罕","土默特左旗","托克托","和林格尔","清水河","武川","东河","昆都仑","青山","石拐","白云鄂博矿","九原","土默特右旗","固阳","达尔罕茂明安联合旗","海勃湾","海南","乌达","红山","元宝山","松山","阿鲁科尔沁旗","巴林左旗","巴林右旗","林西","克什克腾旗","翁牛特旗","喀喇沁旗","宁城","敖汉旗","科尔沁","科尔沁左翼中旗","科尔沁左翼后旗","开鲁","库伦旗","奈曼旗","扎鲁特旗","霍林郭勒","东胜","康巴什","达拉特旗","准格尔旗","鄂托克前旗","鄂托克旗","杭锦旗","乌审旗","伊金霍洛旗","海拉尔","扎赉诺尔","阿荣旗","莫力达瓦达斡尔族自治旗","鄂伦春自治旗","鄂温克族自治旗","陈巴尔虎旗","新巴尔虎左旗","新巴尔虎右旗","满洲里","牙克石","扎兰屯","额尔古纳","根河","临河","五原","磴口","乌拉特前旗","乌拉特中旗","乌拉特后旗","杭锦后旗","集宁","卓资","化德","商都","兴和","凉城","察哈尔右翼前旗","察哈尔右翼中旗","察哈尔右翼后旗","四子王旗","丰镇","乌兰浩特","阿尔山","科尔沁右翼前旗","科尔沁右翼中旗","扎赉特旗","突泉","二连浩特","锡林浩特","阿巴嘎旗","苏尼特左旗","苏尼特右旗","东乌珠穆沁旗","西乌珠穆沁旗","太仆寺旗","镶黄旗","正镶白旗","正蓝旗","多伦","阿拉善左旗","阿拉善右旗","额济纳旗","和平","沈河","大东","皇姑","铁西","苏家屯","浑南","沈北新","于洪","辽中","康平","法库","新民","中山","西岗","沙河口","甘井子","旅顺口","金州","普兰店","长海","瓦房店","庄河","铁东","铁西","立山","千山","台安","岫岩","海城","新抚","东洲","望花","顺城","抚顺","新宾","清原","平山","溪湖","明山","南芬","本溪","桓仁","元宝","振兴","振安","宽甸","东港","凤城","古塔","凌河","太和","黑山","义县","凌海","北镇","站前","西市","鲅鱼圈","老边","盖州","大石桥","海州","新邱","太平","清河门","细河","阜新","彰武","白塔","文圣","宏伟","弓长岭","太子河","辽阳","灯塔","双台子","兴隆台","大洼","盘山","银州","清河","铁岭","西丰","昌图","调兵山","开原","双塔","龙城","朝阳","建平","喀喇沁左翼","北票","凌源","连山","龙港","南票","绥中","建昌","兴城","南关","宽城","朝阳","二道","绿园","双阳","九台","农安","榆树","德惠","昌邑","龙潭","船营","丰满","永吉","蛟河","桦甸","舒兰","磐石","铁西","铁东","梨树","伊通","公主岭","双辽","龙山","西安","东丰","东辽","东昌","二道江","通化","辉南","柳河","梅河口","集安","浑江","江源","抚松","靖宇","长白","临江","宁江","前郭尔罗斯","长岭","乾安","扶余","洮北","镇赉","通榆","洮南","大安","延吉","图们","敦化","珲春","龙井","和龙","汪清","安图","道里","南岗","道外","平房","松北","香坊","呼兰","阿城","双城","依兰","方正","宾县","巴彦","木兰","通河","延寿","尚志","五常","龙沙","建华","铁锋","昂昂溪","富拉尔基","碾子山","梅里斯","龙江","依安","泰来","甘南","富裕","克山","克东","拜泉","讷河","鸡冠","恒山","滴道","梨树","城子河","麻山","鸡东","虎林","密山","向阳","工农","南山","兴安","东山","兴山","萝北","绥滨","尖山","岭东","四方台","宝山","集贤","友谊","宝清","饶河","萨尔图","龙凤","让胡路","红岗","大同","肇州","肇源","林甸","杜尔伯特","伊美","乌翠","友好","嘉荫","汤旺","丰林","大箐山","南岔","金林","铁力","向阳","前进","东风","郊区","桦南","桦川","汤原","同江","富锦","抚远","新兴","桃山","茄子河","勃利","东安","阳明","爱民","西安","林口","绥芬河","海林","宁安","穆棱","东宁","爱辉","嫩江","逊克","孙吴","北安","五大连池","北林","望奎","兰西","青冈","庆安","明水","绥棱","安达","肇东","海伦","漠河","呼玛","塔河","黄浦","徐汇","长宁","静安","普陀","虹口","杨浦","闵行","宝山","嘉定","浦东","金山","松江","青浦","奉贤","崇明","玄武","秦淮","建邺","鼓楼","浦口","栖霞","雨花台","江宁","六合","溧水","高淳","锡山","惠山","滨湖","梁溪","新吴","江阴","宜兴","鼓楼","云龙","贾汪","泉山","铜山","丰县","沛县","睢宁","新沂","邳州","天宁","钟楼","新北","武进","金坛","溧阳","虎丘","吴中","相城","姑苏","吴江","常熟","张家港","昆山","太仓","崇川","港闸","通州","如东","启东","如皋","海门","海安","连云","海州","赣榆","东海","灌云","灌南","淮安","淮阴","清江浦","洪泽","涟水","盱眙","金湖","亭湖","盐都","大丰","响水","滨海","阜宁","射阳","建湖","东台","广陵","邗江","江都","宝应","仪征","高邮","京口","润州","丹徒","丹阳","扬中","句容","海陵","高港","姜堰","兴化","靖江","泰兴","宿城","宿豫","沭阳","泗阳","泗洪","上城","下城","江干","拱墅","西湖","滨江","萧山","余杭","富阳","临安","桐庐","淳安","建德","海曙","江北","北仑","镇海","鄞州","奉化","象山","宁海","余姚","慈溪","鹿城","龙湾","瓯海","洞头","永嘉","平阳","苍南","文成","泰顺","瑞安","乐清","南湖","秀洲","嘉善","海盐","海宁","平湖","桐乡","吴兴","南浔","德清","长兴","安吉","越城","柯桥","上虞","新昌","诸暨","嵊州","婺城","金东","武义","浦江","磐安","兰溪","义乌","东阳","永康","柯城","衢江","常山","开化","龙游","江山","定海","普陀","岱山","嵊泗","椒江","黄岩","路桥","三门","天台","仙居","温岭","临海","玉环","莲都","青田","缙云","遂昌","松阳","云和","庆元","景宁","龙泉","瑶海","庐阳","蜀山","包河","长丰","肥东","肥西","庐江","巢湖","镜湖","弋江","鸠江","三山","芜湖","繁昌","南陵","无为","龙子湖","蚌山","禹会","淮上","怀远","五河","固镇","大通","田家庵","谢家集","八公山","潘集","凤台","寿县","花山","雨山","博望","当涂","含山","和县","杜集","相山","烈山","濉溪","铜官","义安","郊区","枞阳","迎江","大观","宜秀","怀宁","太湖","宿松","望江","岳西","桐城","潜山","屯溪","黄山","徽州","歙县","休宁","黟县","祁门","琅琊","南谯","来安","全椒","定远","凤阳","天长","明光","颍州","颍东","颍泉","临泉","太和","阜南","颍上","界首","埇桥","砀山","萧县","灵璧","泗县","金安","裕安","叶集","霍邱","舒城","金寨","霍山","谯城","涡阳","蒙城","利辛","贵池","东至","石台","青阳","宣州","郎溪","广德","泾县","绩溪","旌德","宁国","鼓楼","台江","仓山","马尾","晋安","长乐","闽侯","连江","罗源","闽清","永泰","平潭","福清","思明","海沧","湖里","集美","同安","翔安","城厢","涵江","荔城","秀屿","仙游","梅列","三元","明溪","清流","宁化","大田","尤溪","沙县","将乐","泰宁","建宁","永安","鲤城","丰泽","洛江","泉港","惠安","安溪","永春","德化","金门","石狮","晋江","南安","芗城","龙文","云霄","漳浦","诏安","长泰","东山","南靖","平和","华安","龙海","延平","建阳","顺昌","浦城","光泽","松溪","政和","邵武","武夷山","建瓯","新罗","永定","长汀","上杭","武平","连城","漳平","蕉城","霞浦","古田","屏南","寿宁","周宁","柘荣","福安","福鼎","东湖","西湖","青云谱","湾里","青山湖","新建","南昌","安义","进贤","昌江","珠山","浮梁","乐平","安源","湘东","莲花","上栗","芦溪","濂溪","浔阳","柴桑","武宁","修水","永修","德安","都昌","湖口","彭泽","瑞昌","共青城","庐山","渝水","分宜","月湖","余江","贵溪","章贡","南康","赣县","信丰","大余","上犹","崇义","安远","龙南","定南","全南","宁都","于都","兴国","会昌","寻乌","石城","瑞金","吉州","青原","吉安","吉水","峡江","新干","永丰","泰和","遂川","万安","安福","永新","井冈山","袁州","奉新","万载","上高","宜丰","靖安","铜鼓","丰城","樟树","高安","临川","东乡","南城","黎川","南丰","崇仁","乐安","宜黄","金溪","资溪","广昌","信州","广丰","上饶","玉山","铅山","横峰","弋阳","余干","鄱阳","万年","婺源","德兴","历下","市中","槐荫","天桥","历城","长清","章丘","济阳","莱芜","钢城","平阴","商河","市南","市北","黄岛","崂山","李沧","城阳","即墨","胶州","平度","莱西","淄川","张店","博山","临淄","周村","桓台","高青","沂源","市中","薛城","峄城","台儿庄","山亭","滕州","东营","河口","垦利","利津","广饶","芝罘","福山","牟平","莱山","长岛","龙口","莱阳","莱州","蓬莱","招远","栖霞","海阳","潍城","寒亭","坊子","奎文","临朐","昌乐","青州","诸城","寿光","安丘","高密","昌邑","任城","兖州","微山","鱼台","金乡","嘉祥","汶上","泗水","梁山","曲阜","邹城","泰山","岱岳","宁阳","东平","新泰","肥城","环翠","文登","荣成","乳山","东港","岚山","五莲","莒县","兰山","罗庄","河东","沂南","郯城","沂水","兰陵","费县","平邑","莒南","蒙阴","临沭","德城","陵城","宁津","庆云","临邑","齐河","平原","夏津","武城","乐陵","禹城","东昌府","阳谷","莘县","茌平","东阿","冠县","高唐","临清","滨城","沾化","惠民","阳信","无棣","博兴","邹平","牡丹","定陶","曹县","单县","成武","巨野","郓城","鄄城","东明","中原","二七","管城","金水","上街","惠济","中牟","巩义","荥阳","新密","新郑","登封","龙亭","顺河","鼓楼","禹王台","祥符","杞县","通许","尉氏","兰考","老城","西工","瀍河","涧西","吉利","洛龙","孟津","新安","栾川","嵩县","汝阳","宜阳","洛宁","伊川","偃师","新华","卫东","石龙","湛河","宝丰","叶县","鲁山","郏县","舞钢","汝州","文峰","北关","殷都","龙安","安阳","汤阴","滑县","内黄","林州","鹤山","山城","淇滨","浚县","淇县","红旗","卫滨","凤泉","牧野","新乡","获嘉","原阳","延津","封丘","长垣","卫辉","辉县","解放","中站","马村","山阳","修武","博爱","武陟","温县","沁阳","孟州","华龙","清丰","南乐","范县","台前","濮阳","魏都","建安","鄢陵","襄城","禹州","长葛","源汇","郾城","召陵","舞阳","临颍","湖滨","陕州","渑池","卢氏","义马","灵宝","宛城","卧龙","南召","方城","西峡","镇平","内乡","淅川","社旗","唐河","新野","桐柏","邓州","梁园","睢阳","民权","睢县","宁陵","柘城","虞城","夏邑","永城","浉河","平桥","罗山","光山","新县","商城","固始","潢川","淮滨","息县","川汇","扶沟","西华","商水","沈丘","郸城","淮阳","太康","鹿邑","项城","驿城","西平","上蔡","平舆","正阳","确山","泌阳","汝南","遂平","新蔡","江岸","江汉","硚口","汉阳","武昌","青山","洪山","东西湖","汉南","蔡甸","江夏","黄陂","新洲","黄石港","西塞山","下陆","铁山","阳新","大冶","茅箭","张湾","郧阳","郧西","竹山","竹溪","房县","丹江口","西陵","伍家岗","点军","猇亭","夷陵","远安","兴山","秭归","长阳","五峰","宜都","当阳","枝江","襄城","樊城","襄州","南漳","谷城","保康","老河口","枣阳","宜城","梁子湖","华容","鄂城","东宝","掇刀","沙洋","钟祥","京山","孝南","孝昌","大悟","云梦","应城","安陆","汉川","沙市","荆州","公安","监利","江陵","石首","洪湖","松滋","黄州","团风","红安","罗田","英山","浠水","蕲春","黄梅","麻城","武穴","咸安","嘉鱼","通城","崇阳","通山","赤壁","曾都","随县","广水","恩施","利川","建始","巴东","宣恩","咸丰","来凤","鹤峰","芙蓉","天心","岳麓","开福","雨花","望城","长沙","浏阳","宁乡","荷塘","芦淞","石峰","天元","渌口","攸县","茶陵","炎陵","醴陵","雨湖","岳塘","湘潭","湘乡","韶山","珠晖","雁峰","石鼓","蒸湘","南岳","衡阳","衡南","衡山","衡东","祁东","耒阳","常宁","双清","大祥","北塔","邵东","新邵","邵阳","隆回","洞口","绥宁","新宁","城步","武冈","岳阳楼","云溪","君山","岳阳","华容","湘阴","平江","汨罗","临湘","武陵","鼎城","安乡","汉寿","澧县","临澧","桃源","石门","津市","永定","武陵源","慈利","桑植","资阳","赫山","南县","桃江","安化","沅江","北湖","苏仙","桂阳","宜章","永兴","嘉禾","临武","汝城","桂东","安仁","资兴","零陵","冷水滩","祁阳","东安","双牌","道县","江永","宁远","蓝山","新田","江华","鹤城","中方","沅陵","辰溪","溆浦","会同","麻阳","新晃","芷江","靖州","通道","洪江","娄星","双峰","新化","冷水江","涟源","吉首","泸溪","凤凰","花垣","保靖","古丈","永顺","龙山","荔湾","越秀","海珠","天河","白云","黄埔","番禺","花都","南沙","从化","增城","武江","浈江","曲江","始兴","仁化","翁源","乳源","新丰","乐昌","南雄","罗湖","福田","南山","宝安","龙岗","盐田","龙华","坪山","光明","香洲","斗门","金湾","龙湖","金平","濠江","潮阳","潮南","澄海","南澳","禅城","南海","顺德","三水","高明","蓬江","江海","新会","台山","开平","鹤山","恩平","赤坎","霞山","坡头","麻章","遂溪","徐闻","廉江","雷州","吴川","茂南","电白","高州","化州","信宜","端州","鼎湖","高要","广宁","怀集","封开","德庆","四会","惠城","惠阳","博罗","惠东","龙门","梅江","梅县","大埔","丰顺","五华","平远","蕉岭","兴宁","城区","海丰","陆河","陆丰","源城","紫金","龙川","连平","和平","东源","江城","阳东","阳西","阳春","清城","清新","佛冈","阳山","连山","连南","英德","连州","湘桥","潮安","饶平","榕城","揭东","揭西","惠来","普宁","云城","云安","新兴","郁南","罗定","兴宁","青秀","江南","西乡塘","良庆","邕宁","武鸣","隆安","马山","上林","宾阳","横县","城中","鱼峰","柳南","柳北","柳江","柳城","鹿寨","融安","融水","三江","秀峰","叠彩","象山","七星","雁山","临桂","阳朔","灵川","全州","兴安","永福","灌阳","龙胜","资源","平乐","荔浦","恭城","万秀","长洲","龙圩","苍梧","藤县","蒙山","岑溪","海城","银海","铁山港","合浦","港口","防城","上思","东兴","钦南","钦北","灵山","浦北","港北","港南","覃塘","平南","桂平","玉州","福绵","容县","陆川","博白","兴业","北流","右江","田阳","田东","平果","德保","那坡","凌云","乐业","田林","西林","隆林","靖西","八步","平桂","昭平","钟山","富川","金城江","宜州","南丹","天峨","凤山","东兰","罗城","环江","巴马","都安","大化","兴宾","忻城","象州","武宣","金秀","合山","江州","扶绥","宁明","龙州","大新","天等","凭祥","秀英","龙华","琼山","美兰","海棠","吉阳","天涯","崖州","万州","涪陵","渝中","大渡口","江北","沙坪坝","九龙坡","南岸","北碚","綦江","大足","渝北","巴南","黔江","长寿","江津","合川","永川","南川","璧山","铜梁","潼南","荣昌","开州","梁平","武隆","城口","丰都","垫江","忠县","云阳","奉节","巫山","巫溪","石柱","秀山","酉阳","彭水","锦江","青羊","金牛","武侯","成华","龙泉驿","青白江","新都","温江","双流","郫都","金堂","大邑","蒲江","新津","都江堰","彭州","邛崃","崇州","简阳","自流井","贡井","大安","沿滩","荣县","富顺","东区","西区","仁和","米易","盐边","江阳","纳溪","龙马潭","泸县","合江","叙永","古蔺","旌阳","罗江","中江","广汉","什邡","绵竹","涪城","游仙","安州","三台","盐亭","梓潼","北川","平武","江油","利州","昭化","朝天","旺苍","青川","剑阁","苍溪","船山","安居","蓬溪","射洪","大英","市中","东兴","威远","资中","隆昌","市中","沙湾","五通桥","金口河","犍为","井研","夹江","沐川","峨边","马边","峨眉山","顺庆","高坪","嘉陵","南部","营山","蓬安","仪陇","西充","阆中","东坡","彭山","仁寿","洪雅","丹棱","青神","翠屏","南溪","叙州","江安","长宁","高县","珙县","筠连","兴文","屏山","广安","前锋","岳池","武胜","邻水","华蓥","通川","达川","宣汉","开江","大竹","渠县","万源","雨城","名山","荥经","汉源","石棉","天全","芦山","宝兴","巴州","恩阳","通江","南江","平昌","雁江","安岳","乐至","马尔康","汶川","理县","茂县","松潘","九寨沟","金川","小金","黑水","壤塘","阿坝","若尔盖","红原","康定","泸定","丹巴","九龙","雅江","道孚","炉霍","甘孜","新龙","德格","白玉","石渠","色达","理塘","巴塘","乡城","稻城","得荣","西昌","木里","盐源","德昌","会理","会东","宁南","普格","布拖","金阳","昭觉","喜德","冕宁","越西","甘洛","美姑","雷波","南明","云岩","花溪","乌当","白云","观山湖","开阳","息烽","修文","清镇","钟山","六枝特","水城","盘州","红花岗","汇川","播州","桐梓","绥阳","正安","道真","务川","凤冈","湄潭","余庆","习水","赤水","仁怀","西秀","平坝","普定","镇宁","关岭","紫云","七星关","大方","黔西","金沙","织金","纳雍","威宁","赫章","碧江","万山","江口","玉屏","石阡","思南","印江","德江","沿河","松桃","兴义","兴仁","普安","晴隆","贞丰","望谟","册亨","安龙","凯里","黄平","施秉","三穗","镇远","岑巩","天柱","锦屏","剑河","台江","黎平","榕江","从江","雷山","麻江","丹寨","都匀","福泉","荔波","贵定","瓮安","独山","平塘","罗甸","长顺","龙里","惠水","三都","五华","盘龙","官渡","西山","东川","呈贡","晋宁","富民","宜良","石林","嵩明","禄劝","寻甸","安宁","麒麟","沾益","马龙","陆良","师宗","罗平","富源","会泽","宣威","红塔","江川","澄江","通海","华宁","易门","峨山","新平","元江","隆阳","施甸","龙陵","昌宁","腾冲","昭阳","鲁甸","巧家","盐津","大关","永善","绥江","镇雄","彝良","威信","水富","古城","玉龙","永胜","华坪","宁蒗","思茅","宁洱","墨江","景东","景谷","镇沅","江城","孟连","澜沧","西盟","临翔","凤庆","云县","永德","镇康","双江","耿马","沧源","楚雄","双柏","牟定","南华","姚安","大姚","永仁","元谋","武定","禄丰","个旧","开远","蒙自","弥勒","屏边","建水","石屏","泸西","元阳","红河","金平","绿春","河口","文山","砚山","西畴","麻栗坡","马关","丘北","广南","富宁","景洪","勐海","勐腊","大理","漾濞","祥云","宾川","弥渡","南涧","巍山","永平","云龙","洱源","剑川","鹤庆","瑞丽","芒市","梁河","盈江","陇川","泸水","福贡","贡山","兰坪","香格里拉","德钦","维西","城关","堆龙德庆","达孜","林周","当雄","尼木","曲水","墨竹工卡","桑珠孜","南木林","江孜","定日","萨迦","拉孜","昂仁","谢通门","白朗","仁布","康马","定结","仲巴","亚东","吉隆","聂拉木","萨嘎","岗巴","卡若","江达","贡觉","类乌齐","丁青","察雅","八宿","左贡","芒康","洛隆","边坝","巴宜","工布江达","米林","墨脱","波密","察隅","朗县","乃东","扎囊","贡嘎","桑日","琼结","曲松","措美","洛扎","加查","隆子","错那","浪卡子","色尼","嘉黎","比如","聂荣","安多","申扎","索县","班戈","巴青","尼玛","双湖","普兰","札达","噶尔","日土","革吉","改则","措勤","新城","碑林","莲湖","灞桥","未央","雁塔","阎良","临潼","长安","高陵","鄠邑","蓝田","周至","王益","印台","耀州","宜君","渭滨","金台","陈仓","凤翔","岐山","扶风","眉县","陇县","千阳","麟游","凤县","太白","秦都","杨陵","渭城","三原","泾阳","乾县","礼泉","永寿","长武","旬邑","淳化","武功","兴平","彬州","临渭","华州","潼关","大荔","合阳","澄城","蒲城","白水","富平","韩城","华阴","宝塔","安塞","延长","延川","子长","志丹","吴起","甘泉","富县","洛川","宜川","黄龙","黄陵","汉台","南郑","城固","洋县","西乡","勉县","宁强","略阳","镇巴","留坝","佛坪","榆阳","横山","府谷","靖边","定边","绥德","米脂","佳县","吴堡","清涧","子洲","神木","汉滨","汉阴","石泉","宁陕","紫阳","岚皋","平利","镇坪","旬阳","白河","商州","洛南","丹凤","商南","山阳","镇安","柞水","城关","七里河","西固","安宁","红古","永登","皋兰","榆中","金川","永昌","白银","平川","靖远","会宁","景泰","秦州","麦积","清水","秦安","甘谷","武山","张家川","凉州","民勤","古浪","天祝","甘州","肃南","民乐","临泽","高台","山丹","崆峒","泾川","灵台","崇信","庄浪","静宁","华亭","肃州","金塔","瓜州","肃北","阿克塞","玉门","敦煌","西峰","庆城","环县","华池","合水","正宁","宁县","镇原","安定","通渭","陇西","渭源","临洮","漳县","岷县","武都","成县","文县","宕昌","康县","西和","礼县","徽县","两当","临夏","临夏","康乐","永靖","广河","和政","东乡","积石山","合作","临潭","卓尼","舟曲","迭部","玛曲","碌曲","夏河","城东","城中","城西","城北","大通","湟中","湟源","乐都","平安","民和","互助","化隆","循化","门源","祁连","海晏","刚察","同仁","尖扎","泽库","河南","共和","同德","贵德","兴海","贵南","玛沁","班玛","甘德","达日","久治","玛多","玉树","杂多","称多","治多","囊谦","曲麻莱","格尔木","德令哈","茫崖","乌兰","都兰","天峻","兴庆","西夏","金凤","永宁","贺兰","灵武","大武口","惠农","平罗","利通","红寺堡","盐池","同心","青铜峡","原州","西吉","隆德","泾源","彭阳","沙坡头","中宁","海原","天山","沙依巴克","新市","水磨沟","头屯河","达坂城","米东","乌鲁木齐","独山子","克拉玛依","白碱滩","乌尔禾","高昌","鄯善","托克逊","伊州","巴里坤哈萨克","伊吾","昌吉","阜康","呼图壁","玛纳斯","奇台","吉木萨尔","木垒哈萨克","博乐","阿拉山口","精河","温泉","库尔勒","轮台","尉犁","若羌","且末","焉耆","和静","和硕","博湖","阿克苏","温宿","库车","沙雅","新和","拜城","乌什","阿瓦提","柯坪","阿图什","阿克陶","阿合奇","乌恰","喀什","疏附","疏勒","英吉沙","泽普","莎车","叶城","麦盖提","岳普湖","伽师","巴楚","塔什库尔干塔吉克","和田","和田","墨玉","皮山","洛浦","策勒","于田","民丰","伊宁","奎屯","霍尔果斯","伊宁","察布查尔锡伯","霍城","巩留","新源","昭苏","特克斯","尼勒克","塔城","乌苏","额敏","沙湾","托里","裕民","和布克赛尔","阿勒泰","布尔津","富蕴","福海","哈巴河","青河","吉木乃","内湖","南港","中正","万华","大同","中山","松山","大安","信义","文山","士林","北投","楠梓","左营","鼓山","三民","盐埕","前金","新兴","苓雅","前镇","旗津","小港","凤山","大寮","鸟松","林园","仁武","大树","大社","冈山","路竹","桥头","梓官","弥陀","永安","燕巢","阿莲","茄萣","湖内","田寮","旗山","美浓","内门","杉林","甲仙","六龟","茂林","桃源","那玛夏","桃园","中坜","平镇","八德","杨梅","芦竹","大溪","龙潭","龟山","大园","观音","新屋","复兴","中区","东区","南区","西区","北区","北屯","西屯","南屯","太平","大里","雾峰","乌日","丰原","后里","潭子","大雅","神冈","石冈","东势","新社","和平","大肚","沙鹿","龙井","梧栖","清水","大甲","外埔","大安","中西","东区","南区","北区","安平","安南","永康","归仁","新化","左镇","玉井","楠西","南化","仁德","关庙","龙崎","官田","麻豆","佳里","西港","七股","将军","学甲","北门","新营","后壁","白河","东山","六甲","下营","柳营","盐水","善化","大内","山上","新市","安定","板桥","汐止","新店","永和","中和","土城","树林","三重","新庄","芦洲","瑞芳","三峡","莺歌","淡水","万里","金山","深坑","石碇","平溪","双溪","贡寮","坪林","乌来","泰山","林口","五股","八里","三芝","石门","中正","信义","仁爱","中山","安乐","暖暖","七堵","东区","北区","香山","东区","西区","中西","东区","南区","湾仔","九龙城","观塘","深水埗","黄大仙","油尖旺","离岛","葵青","北区","西贡","沙田","大埔","荃湾","屯门","元朗","花地玛堂","圣安多尼堂","大堂","望德堂","风顺堂","嘉模堂","圣方济各堂"]]
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>middleware-automatic-center</artifactId>
<groupId>com.zhiwei</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>middleware-automatic-center-son</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<redisson.version>3.17.3</redisson.version>
<json.version>1.2.47</json.version>
<push-log.version>2.17.0-SNAPSHOT</push-log.version>
<curator.version>2.12.0</curator.version>
<es.version>7.9.2</es.version>
<es-client.version>0.0.4-SNAPSHOT</es-client.version>
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<kafka.version>2.4.1.RELEASE</kafka.version>
<base.version>2.0.0-SNAPSHOT</base.version>
<automatic.version>1.0-SNAPSHOT</automatic.version>
<marker.version>1.2.3-SNAPSHOT</marker.version>
<filter.version>1.1.6-SNAPSHOT</filter.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
<dubbo-server.version>2.7.4.1</dubbo-server.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.dubbo/dubbo-spring-boot-starter -->
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
<version>${dubbo-server.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
<version>${dubbo-server.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<artifactId>fastjson</artifactId>
<groupId>com.alibaba</groupId>
</exclusion>
<exclusion>
<artifactId>snakeyaml</artifactId>
<groupId>org.yaml</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
<dependency>
<groupId>com.zhiwei.nlp</groupId>
<artifactId>nlp-aggree</artifactId>
<version>${nlp-aggree.version}</version>
</dependency>
<!-- 日志依赖使用crawler-filter -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-filter</artifactId>
<version>${filter.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 标注客户端 -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>marker-client</artifactId>
<version>${marker.version}</version>
<exclusions>
<exclusion>
<artifactId>fastjson</artifactId>
<groupId>com.alibaba</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>middleware-automatic-center-client</artifactId>
<version>${automatic.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo-spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.dubbo</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei.base</groupId>
<artifactId>base-objects-application</artifactId>
<version>${base.version}</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.springframework.kafka</groupId>-->
<!-- <artifactId>spring-kafka</artifactId>-->
<!-- <version>${kafka.version}</version>-->
<!-- <exclusions>-->
<!-- <exclusion>-->
<!-- <groupId>org.springframework</groupId>-->
<!-- <artifactId>spring-context</artifactId>-->
<!-- </exclusion>-->
<!-- </exclusions>-->
<!-- </dependency>-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<version>${spring-boot.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>qbjc-bean</artifactId>
<version>${qbjc-bean.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>es-client</artifactId>
<version>${es-client.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
<version>${spring-boot.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<!--日志整合-->
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>push-log</artifactId>
<version>${push-log.version}</version>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>${redisson.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework/spring-context -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring-boot.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${json.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.zhiwei.middleware.automatic.son;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
}
package com.zhiwei.middleware.automatic.server.config; package com.zhiwei.middleware.automatic.son.config;
import com.zhiwei.es.pojo.Address; import com.zhiwei.es.pojo.Address;
import com.zhiwei.es.util.IndexUtil; import com.zhiwei.es.util.IndexUtil;
......
package com.zhiwei.middleware.automatic.server.config; package com.zhiwei.middleware.automatic.son.config;
import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
......
package com.zhiwei.middleware.automatic.son.config;
import java.util.Set;
public class GlobalPojo {
}
package com.zhiwei.middleware.automatic.son.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Component
@ConfigurationProperties(prefix = "middleware")
public class MiddlewareProperties {
private String zookeeperAddress;
private String appName;
private String markGroup;
private String filterGroup;
public String getZookeeperAddress() {
return zookeeperAddress;
}
public void setZookeeperAddress(String zookeeperAddress) {
this.zookeeperAddress = zookeeperAddress;
}
public String getAppName() {
return appName;
}
public void setAppName(String appName) {
this.appName = appName;
}
public String getMarkGroup() {
return markGroup;
}
public void setMarkGroup(String markGroup) {
this.markGroup = markGroup;
}
public String getFilterGroup() {
return filterGroup;
}
public void setFilterGroup(String filterGroup) {
this.filterGroup = filterGroup;
}
}
package com.zhiwei.middleware.automatic.son.config;
import com.mongodb.ConnectionString;
import com.mongodb.MongoClientSettings;
import com.mongodb.client.MongoClients;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.data.mongodb.MongoDatabaseFactory;
import org.springframework.data.mongodb.SpringDataMongoDB;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.SimpleMongoClientDatabaseFactory;
import org.springframework.data.mongodb.core.convert.DbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultDbRefResolver;
import org.springframework.data.mongodb.core.convert.DefaultMongoTypeMapper;
import org.springframework.data.mongodb.core.convert.MappingMongoConverter;
import org.springframework.data.mongodb.core.mapping.MongoMappingContext;
import java.util.concurrent.TimeUnit;
/**
* @ClassName
* @Description TODO
* @Author ${"liu-yu"}
* @Date 2022/12/21 18:01
**/
@Configuration
public class MongoConfig {
@Value("${mongo.connectTimeout}")
private int connectTimeout;
@Value("${mongo.maxWaitTime}")
private int maxWaitTime;
@Value("${mongo.dataBaseMarker}")
private String dataBaseMarker;
@Value("${primary.uri.marker}")
private String uriMarker;
@Value("${primary.uri.hangzhou}")
private String uriHangZhou;
@Value("${mongo.hangzhouMarker}")
private String dataBaseHangZhou;
private MongoDatabaseFactory mongoDbMarkerFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriMarker));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseMarker);
}
@Primary
@Bean(name = "markerMongoTemplate")
public MongoTemplate getMongoTemplateMarker() {
MongoDatabaseFactory mongoDbFactory = mongoDbMarkerFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbMarkerFactory(), converter);
}
private MongoDatabaseFactory mongoDbHangZhouFactory() {
MongoClientSettings.Builder builder = MongoClientSettings.builder();
builder.applyConnectionString(new ConnectionString(uriHangZhou));
builder.applyToConnectionPoolSettings(connection -> {
connection.maxWaitTime(maxWaitTime, TimeUnit.MICROSECONDS);
});
builder.applyToSocketSettings(socket -> {
socket.connectTimeout(connectTimeout, TimeUnit.MICROSECONDS);
});
return new SimpleMongoClientDatabaseFactory(MongoClients.create(builder.build(), SpringDataMongoDB.driverInformation()), dataBaseHangZhou);
}
@Bean(name = "hangzhouMongoTemplate")
public MongoTemplate getMongoTemplateHangZhou() {
MongoDatabaseFactory mongoDbFactory = mongoDbHangZhouFactory();
DbRefResolver dbRefResolver = new DefaultDbRefResolver(mongoDbFactory);
MappingMongoConverter converter = new MappingMongoConverter(dbRefResolver, new MongoMappingContext());
// 不插入_class
converter.setTypeMapper(new DefaultMongoTypeMapper(null));
return new MongoTemplate(mongoDbHangZhouFactory(), converter);
}
}
package com.zhiwei.middleware.automatic.son.config;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Component;
import java.util.concurrent.ThreadPoolExecutor;
@Component
public class TaskPoolConfig {
@Bean("autMarkExecutor")
public ThreadPoolTaskExecutor autMarkExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(15);
// 配置最大线程数
executor.setMaxPoolSize(25);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("autoMark-executor-");
executor.setQueueCapacity(50);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
@Bean("templateExecutor")
public ThreadPoolTaskExecutor templateExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
// 配置核心线程数
executor.setCorePoolSize(8);
// 配置最大线程数
executor.setMaxPoolSize(15);
// 配置线程池中的线程的名称前缀
executor.setThreadNamePrefix("template-executor-");
executor.setQueueCapacity(20);
// rejection-policy:当pool已经达到max size的时候,如何处理新任务
// CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 执行初始化
executor.initialize();
return executor;
}
}
package com.zhiwei.middleware.automatic.server.dao; package com.zhiwei.middleware.automatic.son.dao;
import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilder;
......
package com.zhiwei.middleware.automatic.son.dao;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import org.springframework.data.mongodb.core.query.Query;
import java.util.List;
public interface TemplateRecordDao {
/**
* 根据条件查询模板记录集
* @param query 条件
* @return 记录集
*/
List<TemplateRecord> findTemplateRecord (Query query);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 查询模板记录数量
* @param query 条件
* @return 声量
*/
long count(Query query);
/**
* 根据插件删除模板记录
* @param query 条件
*/
void removeTemplateRecord (Query query);
}
package com.zhiwei.middleware.automatic.server.dao.impl; package com.zhiwei.middleware.automatic.son.dao.impl;
import com.zhiwei.middleware.automatic.server.dao.EsDao; import com.zhiwei.middleware.automatic.son.dao.EsDao;
import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RequestOptions;
...@@ -21,7 +21,6 @@ import java.util.List; ...@@ -21,7 +21,6 @@ import java.util.List;
@Component @Component
public class EsDaoImpl implements EsDao { public class EsDaoImpl implements EsDao {
private final RestHighLevelClient esClient; private final RestHighLevelClient esClient;
public EsDaoImpl(RestHighLevelClient esClient) { public EsDaoImpl(RestHighLevelClient esClient) {
......
package com.zhiwei.middleware.automatic.son.dao.impl;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.son.dao.TemplateRecordDao;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
public class TemplateRecordDaoImpl implements TemplateRecordDao {
private final MongoTemplate mongoTemplate;
public TemplateRecordDaoImpl(@Qualifier("markerMongoTemplate") MongoTemplate mongoTemplate) {
this.mongoTemplate = mongoTemplate;
}
@Override
public List<TemplateRecord> findTemplateRecord(Query query) {
return mongoTemplate.find(query, TemplateRecord.class, "automaticmark_template_record");
}
@Override
public void insertTemplateRecord(TemplateRecord templateRecord) {
mongoTemplate.insert(templateRecord, "automaticmark_template_record");
}
@Override
public long count(Query query) {
return mongoTemplate.count(query, TemplateRecord.class, "automaticmark_template_record");
}
@Override
public void removeTemplateRecord(Query query) {
mongoTemplate.remove(query, TemplateRecord.class, "automaticmark_template_record");
}
}
package com.zhiwei.middleware.automatic.server.dubbo.handle; package com.zhiwei.middleware.automatic.son.dubbo;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo; import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.base.filter.FilterInfo; import com.zhiwei.base.filter.FilterInfo;
import com.zhiwei.middleware.automatic.server.config.MiddlewareProperties; import com.zhiwei.middleware.automatic.son.config.MiddlewareProperties;
import com.zhiwei.middleware.automatic.server.util.DataCollectionUtil; import com.zhiwei.middleware.automatic.son.util.DataCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.EventCollectionUtil; import com.zhiwei.middleware.automatic.son.util.EventCollectionUtil;
import com.zhiwei.middleware.automatic.server.util.MarkInfoUtil; import com.zhiwei.middleware.automatic.son.util.MarkInfoUtil;
import com.zhiwei.middleware.cleaner.filter.UnifiedFilterClient; import com.zhiwei.middleware.cleaner.filter.UnifiedFilterClient;
import com.zhiwei.middleware.mark.service.MarkerClient; import com.zhiwei.middleware.mark.service.MarkerClient;
import com.zhiwei.middleware.mark.vo.QueryResult; import com.zhiwei.middleware.mark.vo.QueryResult;
...@@ -19,7 +19,6 @@ import java.util.Map; ...@@ -19,7 +19,6 @@ import java.util.Map;
@Component @Component
public class DubboHandler { public class DubboHandler {
private static final Logger log = LogManager.getLogger(DubboHandler.class); private static final Logger log = LogManager.getLogger(DubboHandler.class);
private final UnifiedFilterClient unifiedFilterClient; private final UnifiedFilterClient unifiedFilterClient;
...@@ -73,7 +72,6 @@ public class DubboHandler { ...@@ -73,7 +72,6 @@ public class DubboHandler {
/** /**
* 事件采集标注清洗接口 * 事件采集标注清洗接口
* *
* @param consumers
* @return list(重新计算filterInfo后的数据) * @return list(重新计算filterInfo后的数据)
*/ */
public List<MarkInfo> eventCollectionUpsertWithSupplement(List<JSONObject> list, String mgroup, String mperson) { public List<MarkInfo> eventCollectionUpsertWithSupplement(List<JSONObject> list, String mgroup, String mperson) {
......
package com.zhiwei.middleware.automatic.son.mission;
public class AsyncTask {
}
package com.zhiwei.middleware.automatic.son.mission;
public class InitTask {
}
package com.zhiwei.middleware.automatic.son.mission;
public class ScheduledMission {
}
package com.zhiwei.middleware.automatic.son.mission;
public class StartTask {
}
package com.zhiwei.middleware.automatic.son.service;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import java.util.List;
import java.util.Map;
public interface TemplateTitleService {
/**
* 获取项目文本模板
* @param project 项目
* @return 模板集
*/
Map<String, TemplateTitleVo> getTemplateTitleByProject(String project);
/**
* 添加项目文本模板
* @param project 项目
* @param vos 模板集
* @return 模板集数量
*/
void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos);
/**
* 修正模板标题的markTag 如果不存在就会增加
*
* @param group 项目组
* @param templateTitle 模板标题
* @param fixTag 正确的标签
*/
boolean modifyTemplateTitle(String group, String templateTitle, String fixTag);
/**
* 新增模板记录
* @param templateRecord 模板记录
*/
void insertTemplateRecord (TemplateRecord templateRecord);
/**
* 重置自动标注模板
* @param group 项目
* @param templateTitle 模板标题
* @return 是否成功
*/
boolean resetTemplate (String group, String templateTitle);
}
package com.zhiwei.middleware.automatic.son.service.impl;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.MarkInfo;
import com.zhiwei.es.index.Index;
import com.zhiwei.es.util.IndexUtil;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.son.dao.EsDao;
import com.zhiwei.middleware.automatic.son.dao.TemplateRecordDao;
import com.zhiwei.middleware.automatic.son.dubbo.DubboHandler;
import com.zhiwei.middleware.automatic.son.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.son.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.son.util.RedissonUtil;
import com.zhiwei.middleware.automatic.son.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
@Service
public class TemplateTitleServiceImpl implements TemplateTitleService {
private final Logger log = LogManager.getLogger(TemplateTitleServiceImpl.class);
private final RedissonUtil redissonUtil;
private final EsDao esDao;
private final IndexUtil.ESIndexes esIndexes;
private final TemplateRecordDao templateRecordDao;
private final DubboHandler dubboHandler;
private final ThreadPoolTaskExecutor executor;
public TemplateTitleServiceImpl(RedissonUtil redissonUtil, EsDao esDao,
IndexUtil.ESIndexes esIndexes, TemplateRecordDao templateRecordDao,
DubboHandler dubboHandler,
@Qualifier("templateExecutor") ThreadPoolTaskExecutor executor) {
this.redissonUtil = redissonUtil;
this.esDao = esDao;
this.esIndexes = esIndexes;
this.templateRecordDao = templateRecordDao;
this.dubboHandler = dubboHandler;
this.executor = executor;
}
@Override
public Map<String, TemplateTitleVo> getTemplateTitleByProject(String project) {
Map<String, String> mapValue = redissonUtil.getMapValue(Tools.assembleKey(GenericAttribute.REDIS_MAP_KEY, project));
if (Tools.isEmpty(mapValue)) {
return new HashMap<>();
}
return Tools.restoreTMap(mapValue, TemplateTitleVo.class);
}
@Override
public synchronized void setTemplateTitleByProject(String project, Map<String, TemplateTitleVo> vos) {
String key = Tools.assembleKey(GenericAttribute.REDIS_MAP_KEY, project);
for (Map.Entry<String, TemplateTitleVo> entry : vos.entrySet()) {
redissonUtil.setMapValue(key, entry.getKey(), JSONObject.toJSONString(entry.getValue()));
}
}
@Override
public boolean modifyTemplateTitle(String group, String templateTitle, String fixTag) {
try {
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
String key = Tools.assembleKey(GenericAttribute.REDIS_MAP_KEY, group);
String mapKeyValue = redissonUtil.getMapKeyValue(key, templateTitle);
if (Tools.isEmpty(mapKeyValue)) {
return false;
}
TemplateTitleVo titleVo = JSONObject.parseObject(mapKeyValue, TemplateTitleVo.class);
String oldTag = titleVo.getMtag();
titleVo.setMtag(fixTag);
redissonUtil.setMapValue(key, templateTitle, JSONObject.toJSONString(titleVo));
log.info("修改模板标签成功: group:{} templateTitle:{} oldTag:{} fixTag:{}", group, templateTitle, oldTag,
fixTag);
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
public void insertTemplateRecord(TemplateRecord templateRecord) {
templateRecordDao.insertTemplateRecord(templateRecord);
}
@Override
public boolean resetTemplate(String group, String templateTitle) {
// 防止增加模板标题有带符号的问题
templateTitle = Tools.filterSymbol(templateTitle);
Map<String, TemplateTitleVo> map = getTemplateTitleByProject(group);
if (Objects.isNull(map.get(templateTitle))) {
return false;
}
TemplateTitleVo templateTitleVo = map.get(templateTitle);
// 只要在运行中的模板 才能重置
if (Objects.isNull(templateTitleVo.getStatus()) || templateTitleVo.getStatus().name().equals(TemplateStatus.运行中.name())) {
templateTitleVo.setStatus(TemplateStatus.重置中);
setTemplateTitleByProject(group, map);
// 修改模板的标注信息
executor.execute(() -> {
try {
modifyTemplateMarkerInfo(templateTitleVo, group);
templateTitleVo.setStatus(TemplateStatus.已重置);
templateRecordDao.removeTemplateRecord(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
} catch (Exception e) {
templateTitleVo.setStatus(TemplateStatus.重置失败);
log.error("重置模板:修改聚和集错误,title:{},以加入重试队列", templateTitleVo.getTemplateTitle());
} finally {
setTemplateTitleByProject(group, map);
}
});
return true;
}
return false;
}
/**
* 模板自动标注的历史数据修改为新标签
* @param templateTitleVo 模板
* @return 是否修改成功
*/
private boolean modifyTemplateMarkerInfo (TemplateTitleVo templateTitleVo, String project) throws Exception {
long now = System.currentTimeMillis();
long count = templateRecordDao.count(new Query(Criteria.where("templateId").is(templateTitleVo.getId())));
if (count == 0) {
return true;
}
int page = (int) ((count)+ GenericAttribute.POINT_SIZE -1) / GenericAttribute.POINT_SIZE;
for (int i = 0; i < page; i ++) {
Query query = new Query(Criteria.where("templateId").is(templateTitleVo.getId()));
query.skip(i * GenericAttribute.POINT_SIZE)
.limit(GenericAttribute.POINT_SIZE);
// mongo 查询记录信息
List<TemplateRecord> templateRecord = templateRecordDao.findTemplateRecord(query);
// es 查询
SearchHits hits = findByMupdateInfos(templateRecord.stream().map(TemplateRecord::getMupdate).collect(Collectors.toList()));
// 转换成markInfo
List<MarkInfo> collect = Arrays.stream(hits.getHits())
.map(e -> MarkInfoUtil.distinguishMarkInfo(e, null))
.filter(Objects::nonNull).collect(Collectors.toList());
//发送给标注中间件处理
dubboHandler.markUpsert(collect);
}
log.info("自动标注模板:模板已改动,项目:{},模板标题:{},特征值数量:{}, 耗时:{}"
, project, templateTitleVo.getTemplateTitle(), count, System.currentTimeMillis() - now);
return true;
}
/**
* 根据特征值查询es数据
* @param mupdate 特征值
* @return es返回值
*/
private SearchHits findByMupdateInfos(List<String> mupdate) throws IOException {
List<String> mark2 = esIndexes.getMarkIndexes(Index.mark2.name());
String [] indexes = new String[mark2.size()];
mark2.toArray(indexes);
// 查询条件
BoolQueryBuilder query = QueryBuilders.boolQuery();
query.must(QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON));
BoolQueryBuilder should = QueryBuilders.boolQuery();
mupdate.forEach(e -> {
should.should(QueryBuilders.termQuery("mupdate", e));
});
query.must(should);
return esDao.search(indexes, null, query, null, 0, 1000, null);
}
}
package com.zhiwei.middleware.automatic.son.task;
import com.alibaba.fastjson.JSONObject;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.son.task.holder.ApplicationContextHolder;
import com.zhiwei.middleware.automatic.son.task.holder.TaskServiceHandler;
import com.zhiwei.middleware.automatic.son.task.service.TaskService;
import com.zhiwei.middleware.automatic.son.util.RedissonUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.Strings;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
/**
* 任务管理器
*/
@Component
public class TaskManager implements ApplicationRunner {
private static final Logger log = LogManager.getLogger(TaskManager.class);
/** 定时处理线程 **/
private static final ScheduledExecutorService TASK_EXECUTOR = Executors.newScheduledThreadPool(1,
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("task-manager").build());
private final RedissonUtil redissonUtil;
private static final int LIMIT = 5;
public TaskManager(RedissonUtil redissonUtil) {
this.redissonUtil = redissonUtil;
}
@Override
public void run(ApplicationArguments args) {
TASK_EXECUTOR.scheduleWithFixedDelay(this::pullTask, 10L, 10L, TimeUnit.SECONDS);
log.info("定时线程构建完毕");
}
public void pullTask() {
try {
List<AutoTask> tasks = redissonUtil.pullQueue(GenericAttribute.KEY, LIMIT)
.stream()
.map(e -> JSONObject.parseObject(e).toJavaObject(AutoTask.class))
.collect(Collectors.toList());
tasks.forEach(e -> {
TaskType taskType = Objects.requireNonNull(TaskType.create(e.getType()));
TaskService taskService = TaskServiceHandler.getInstance()
.getTaskService(taskType.getName());
if (taskService.thresholdWarn()) {
log.error("任务类型:{},当前运行任务已到达最大核心数", taskService.getTaskType());
}
log.info("任务类型:{},开始执行,信息:{}", taskService.getTaskType(), JSONObject.toJSONString(e));
taskService.runTask(e);
if (!Strings.isEmpty(taskType.getCacheId())) {
redissonUtil.deleteList(e.getParamSource().getString(taskType.getCacheId()));
}
});
} catch (Exception e) {
log.error("任务管理器,任务执行失败:", e);
}
}
}
package com.zhiwei.middleware.automatic.server.listener; package com.zhiwei.middleware.automatic.son.task.holder;
import org.springframework.beans.BeansException; import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware; import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;
/**
* 获取ApplicationContext
*/
@Component
public class ApplicationContextHolder implements ApplicationContextAware { public class ApplicationContextHolder implements ApplicationContextAware {
private static ApplicationContext context; private static ApplicationContext context;
......
package com.zhiwei.middleware.automatic.son.task.holder;
import com.zhiwei.middleware.automatic.son.task.service.TaskService;
import org.springframework.context.ApplicationContext;
import java.util.HashMap;
import java.util.Map;
public class TaskServiceHandler {
private static final Map<String, TaskService> SERVICE_MAP = new HashMap<>();
private TaskServiceHandler() {
ApplicationContext applicationContext = ApplicationContextHolder.getInstance();
Map<String, TaskService> beansOfType = applicationContext.getBeansOfType(TaskService.class);
beansOfType.forEach((k, v) -> SERVICE_MAP.put(v.getTaskType(), v));
}
public static TaskServiceHandler getInstance() {
return TaskServiceHandlerHolder.TASK_SERVICE_HANDLER;
}
public TaskService getTaskService(String type) {
return SERVICE_MAP.get(type);
}
private static class TaskServiceHandlerHolder {
private static final TaskServiceHandler TASK_SERVICE_HANDLER = new TaskServiceHandler();
}
}
package com.zhiwei.middleware.automatic.son.task.service;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
public interface TaskService {
/**
* 回去任务名字
* @return 名字
*/
String getTaskType();
/**
* 任务运行
* @param autoTask 标注任务
*/
void runTask(AutoTask autoTask);
/**
* 任务运行阈值预警
* @return 是否进行预警
*/
boolean thresholdWarn();
}
package com.zhiwei.middleware.automatic.son.task.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.entity.subclass.mark.*;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.MarkInfoMulti;
import com.zhiwei.middleware.automatic.server.pojo.TemplateRecord;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.son.dubbo.DubboHandler;
import com.zhiwei.middleware.automatic.son.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.son.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.son.util.MarkInfoUtil;
import com.zhiwei.middleware.automatic.son.util.RedissonUtil;
import com.zhiwei.middleware.automatic.son.util.Tools;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
@Service("TaskServiceCommon")
public class TaskServiceCommon implements TaskService {
private static final Logger log = LogManager.getLogger(TaskServiceCommon.class);
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
private final DubboHandler dubboHandler;
private final ThreadPoolTaskExecutor autoMarkExecutor;
private final static String COUNT_KEY = "count";
public TaskServiceCommon(RedissonUtil redissonUtil, TemplateTitleService templateTitleService,
DubboHandler dubboHandler,
@Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) {
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
this.dubboHandler = dubboHandler;
this.autoMarkExecutor = autoMarkExecutor;
}
@Override
public String getTaskType() {
return TaskType.COMMON_ONE.getName();
}
@Override
public void runTask(AutoTask autoTask) {
Map<String, List<MarkInfo>> dataSourceInfo = getDataSourceInfo(autoTask);
if (dataSourceInfo.isEmpty()) {
log.error("本地任务可用数据为空,taskType:{}", autoTask.getType());
}
autoMarkExecutor.execute(() -> projectAutoMark(dataSourceInfo));
}
@Override
public boolean thresholdWarn() {
return autoMarkExecutor.getActiveCount() == autoMarkExecutor.getCorePoolSize();
}
/**
* 获取源数据
* @param autoTask 任务
* @return 源数据按项目分组
*/
private Map<String, List<MarkInfo>> getDataSourceInfo(AutoTask autoTask) {
TaskType taskType = TaskType.create(autoTask.getType());
switch (Objects.requireNonNull(taskType)) {
case COMMON_ONE:
return getOneAutoInfo(autoTask.getParamSource().getString(taskType.getCacheId()));
case COMMON_TWO:
return getMultiAutoInfo(autoTask.getParamSource().getString(taskType.getCacheId()));
default:
return new HashMap<>();
}
}
/**
* 获取单个项目标注源数据
* @param key redis缓存key
* @return 源数据按项目分组
*/
private Map<String, List<MarkInfo>> getOneAutoInfo(String key) {
List<String> infos = redissonUtil.getList(key);
List<MarkInfo> data = infos.stream()
.map(e -> {
try {
JSONObject jsonObject = JSONObject.parseObject(e);
return getMarkInfo(jsonObject.getJSONObject("sourceObj"));
} catch (Exception exception) {
log.error("单项目自动标注失败,json转换异常,原数据:{}", e);
}
return null;
})
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (data.isEmpty()) {
return new HashMap<>();
}
// 分组自动化标注
return data.stream()
.collect(Collectors.groupingBy(markInfo -> markInfo.getSourceObj().getString("mgroup")));
}
/**
* 获取多个项目标注源数据
* @param key redis缓存key
* @return 源数据按项目分组
*/
private Map<String, List<MarkInfo>> getMultiAutoInfo(String key) {
List<String> infos = redissonUtil.getList(key);
List<MarkInfoMulti> data = infos.stream()
.map(e -> {
try {
JSONObject jsonObject = JSONObject.parseObject(e);
return new MarkInfoMulti(getMarkInfo(jsonObject.getJSONObject("markInfo").getJSONObject("sourceObj")),
jsonObject.getJSONArray("projects").toJavaList(String.class));
} catch (Exception exception) {
log.error("多项目自动标注失败,json转换异常,原数据:{}", e, exception);
}
return null;
})
.filter(e -> Objects.nonNull(e) && Objects.nonNull(e.getMarkInfo()) && Objects.nonNull(e.getMarkInfo().getSourceObj()))
.collect(Collectors.toList());
if (data.isEmpty()) {
return new HashMap<>();
}
Map<String, List<MarkInfo>> groupMap = new HashMap<>();
data.forEach(markInfoMulti -> {
// 标注信息,未正确填充mgroup
JSONObject example = markInfoMulti.getMarkInfo().getSourceObj();
markInfoMulti.getProjects().forEach(project -> groupMap.compute(project, (k, v) -> {
v = null == v ? new ArrayList<>() : v;
// 调整mgroup
example.put("mgroup", project);
v.add(MarkInfoUtil.transformToMarkInfo(example));
return v;
}));
});
return groupMap;
}
/**
* 项目自动标注
* @param groupMap 数据集 项目名分类
*/
private void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) {
try {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProject(entry.getKey());
asyncTitleMark(entry.getKey(), entry.getValue(), template);
} catch (Exception e) {
log.error("自动标注处理失败,项目:{}", entry.getKey(), e);
}
}
}
/**
* 异步等待
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
* @throws Exception 异步异常
*/
private void asyncTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) throws Exception {
List<List<MarkInfo>> splitList = Tools.spilt(markInfos, 1000);
List<CompletableFuture<Void>> futures = splitList.stream()
.map(e -> CompletableFuture.runAsync(() -> oneTitleMark(group, e, titleVoMap), autoMarkExecutor)).collect(Collectors.toList());
CompletableFuture<Void> allOf = CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]));
// 获取返回值
allOf.thenApply(e -> futures.stream().map(CompletableFuture::join)).get();
}
/**
* 自动标注
* @param group 项目
* @param markInfos 数据集
* @param titleVoMap 模板集
*/
private void oneTitleMark(String group, List<MarkInfo> markInfos, Map<String, TemplateTitleVo> titleVoMap) {
// 移除标题长度小于6的部分
List<MarkInfo> newList = markInfos.stream().filter(markInfo -> {
String title = markInfo.getSourceObj().getString(GenericAttribute.ES_TITLE);
return null != title && title.length() > 6;
}).collect(Collectors.toList());
for (MarkInfo markInfo : newList) {
JSONObject sourceObj = markInfo.getSourceObj();
String title = Tools.filterSymbol(sourceObj.getString(GenericAttribute.ES_TITLE));
Map<String, Object> similarMap = similarMapInfo(titleVoMap, title, group);
if (!similarMap.isEmpty()) {
// 填充数据
String aggreTitle = String.valueOf(similarMap.get("aggreTitle"));
TemplateTitleVo templateTitleVo = titleVoMap.get(aggreTitle);
String aggreTag = templateTitleVo.getMtag();
sourceObj.put(GenericAttribute.ES_M_TAG, aggreTag);
sourceObj.put(GenericAttribute.ES_M_PERSON, "自动化机器人");
sourceObj.put(GenericAttribute.ES_M_TIME, new Date().getTime());
log.info("模板标题:{} MarkSum:{} Tag:{}被标注标题:{}相似度:{}", aggreTitle, templateTitleVo.getMarkSum(), aggreTag,
title, similarMap.get("similar"));
// 刷新一下标注量和标注时间,
try {
String[] updates = dubboHandler.getMupdates(markInfo.filterInfo());
templateTitleVo.refreshMark();
templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), updates[0]));
redissonUtil.putCount(Tools.assembleKey(COUNT_KEY, group, templateTitleVo.getId()), 1);
} catch (Exception e) {
log.error("记录事件采集-标注数据特征值失败", e);
}
}
}
}
private Map<String, Object> similarMapInfo(Map<String, TemplateTitleVo> titleVoMap, String title, String group) {
// 相似度最高的模板信息
Map<String, Object> similarMap = new HashMap<>();
for (TemplateTitleVo templateTitleVo : titleVoMap.values()) {
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
// 过滤掉以重置的模板
if (templateTitleVo.getStatus() == TemplateStatus.已重置 || Tools.isEmpty(templateTitleVo.getMtag())) {
continue;
}
String aggreTitle = templateTitleVo.getTemplateTitle();
// 过滤掉标题里面的特殊符号
double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title);
double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0;
// 选取相似度最大的标注
if (similar >= GenericAttribute.SIMILAR_STANDARD && similar > currentSimilar) {
similarMap.put("similar", similar);
similarMap.put("aggreeTitle", aggreTitle);
}
}
return similarMap;
}
public static MarkInfo getMarkInfo(JSONObject hit) {
int c2 = Integer.parseInt(String.valueOf(hit.get("c2")));
switch (ClassB.TypeB.fromEncode(c2)){
case COMPLETE:
CompleteTextMark context =CompleteTextMark.restoreFromEs(hit);
return new MarkInfo(context);
case INCOMPLETE:
IncompleteTextMark incompleteTextMark = IncompleteTextMark.restoreFromEs(hit);
return new MarkInfo(incompleteTextMark);
case QA:
QATextMark qaTextMark = QATextMark.restoreFromEs(hit);
new MarkInfo(qaTextMark);
case VIDEO:
VideoMark videoMark = VideoMark.restoreFromEs(hit);
new MarkInfo(videoMark);
}
return null;
}
}
package com.zhiwei.middleware.automatic.son.task.service;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.entity.subclass.mark.CompleteTextMark;
import com.zhiwei.es.index.Index;
import com.zhiwei.es.util.IndexUtil;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.TemplateTitleVo;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.server.pojo.enums.TemplateStatus;
import com.zhiwei.middleware.automatic.son.dao.EsDao;
import com.zhiwei.middleware.automatic.son.service.TemplateTitleService;
import com.zhiwei.middleware.automatic.son.util.CosineSimilarity;
import com.zhiwei.middleware.automatic.son.util.Tools;
import com.zhiwei.nlp.AggreeBootStarter;
import com.zhiwei.nlp.vo.KResult;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.Calendar;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
@Service
public class TaskServiceTemplate implements TaskService {
private final Logger log = LogManager.getLogger(TaskServiceTemplate.class);
private final TemplateTitleService templateTitleService;
private final EsDao esDao;
private final IndexUtil.ESIndexes esIndexes;
private final ThreadPoolTaskExecutor executor;
/* 一天的秒数(为保留前一天文件) */
private static final int ONE_DAY = 60 * 60 * 24;
public TaskServiceTemplate(TemplateTitleService templateTitleService,
EsDao esDao, IndexUtil.ESIndexes esIndexes,
@Qualifier("templateExecutor") ThreadPoolTaskExecutor executor) {
this.templateTitleService = templateTitleService;
this.esDao = esDao;
this.esIndexes = esIndexes;
this.executor = executor;
}
@Override
public String getTaskType() {
return TaskType.TEMPLATE.getName();
}
@Override
public void runTask(AutoTask autoTask) {
executor.execute(() -> switchTask(autoTask));
}
@Override
public boolean thresholdWarn() {
// return executor.getActiveCount() == executor.getCorePoolSize();
return false;
}
private void switchTask (AutoTask autoTask) {
JSONObject paramSource = autoTask.getParamSource();
String group = paramSource.getString(GenericAttribute.GROUP_PARAM);
switch (Objects.requireNonNull(TaskType.create(autoTask.getType()))) {
case TEMPLATE_RESET:
templateTitleService.resetTemplate(group, paramSource.getString(GenericAttribute.TEMPLATE_TITLE));
break;
case TEMPLATE_MODIFY:
templateTitleService.modifyTemplateTitle(group, paramSource.getString(GenericAttribute.TEMPLATE_TITLE),
paramSource.getString(GenericAttribute.FIX_TAG));
break;
case TEMPLATE:
runTask(group, paramSource.getLong(GenericAttribute.START_PARAM), paramSource.getLong(GenericAttribute.END_PARAM));
break;
}
}
private void runTask(String group, Long startTime, Long endTime) {
try {
//源数据
List<Map<String, Object>> sourceList = findRecentTimeData(group,startTime,
endTime);
if (sourceList.isEmpty()) {
return;
}
log.info("发现{}组数据{}条,聚合中...", group, sourceList.size());
projectDataTemplate(group, sourceList);
} catch (Exception e) {
log.error("自动聚合模板更新失败,项目:{}", group, e);
}
}
/**
* 查询该项目 指定时间范围的数据
* @param mgroup 项目
* @return 数据集
* @throws IOException io
*/
private List<Map<String, Object>> findRecentTimeData(String mgroup, Long startTime, Long endTime) throws IOException {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
// 标注时间小时级内
QueryBuilder mtimeBuilder = QueryBuilders.rangeQuery("mtime").from(startTime).to(endTime);
Calendar calendar2 = Calendar.getInstance();
calendar2.add(Calendar.DAY_OF_MONTH, -1);
// 文章时间一天内
QueryBuilder timeBuilder = QueryBuilders.rangeQuery("time").from(calendar2.getTime().getTime()).to(endTime);
QueryBuilder mgroupBuilder = QueryBuilders.matchPhraseQuery("mgroup", mgroup);
// 过滤自动化机器人标注数据
boolQueryBuilder.must(timeBuilder).must(mtimeBuilder).must(mgroupBuilder).mustNot(autoRobotQueryBuilder())
.mustNot(QueryBuilders.termQuery("c2", 25165824)).mustNot(QueryBuilders.termQuery("c2", 16777216));
sourceBuilder.query(boolQueryBuilder).size(10000)
.fetchSource(new String[] { "ind_full_text", "mtime", "mtag", "mperson", "url","id"}, null);
return esDao.afterSearch(esIndexes.getIndexes(Index.mark.name()).toArray(new String[]{}), sourceBuilder, 1000).stream().map(SearchHit::getSourceAsMap).collect(Collectors.toList());
}
/**
* 改项目的数据生成模板
* @param group 项目
* @param sourceList 数据集
*/
private void projectDataTemplate(String group, List<Map<String, Object>> sourceList) {
//聚合模板
Map<String, TemplateTitleVo> aggregation = aggregation(transferMark(sourceList));
//旧的聚合模板
Map<String, TemplateTitleVo> templateTitleByProject = templateTitleService.getTemplateTitleByProject(group).entrySet().stream()
.filter(e -> {
String title = e.getKey();
TemplateTitleVo templateTitleVo = e.getValue();
if (Objects.isNull(templateTitleVo.getId())) {
templateTitleVo.setId(group);
}
long updateTime = templateTitleVo.getUpdateTime().getTime();
// 移除7天有效期外的数据
if (System.currentTimeMillis() - updateTime > ONE_DAY * 7 * 1000) {
log.info("{}-移除过期模板标题:{},最后更新时间:{}", group, title, updateTime);
return false;
} else if (e.getValue().getStatus() == TemplateStatus.已重置) {
log.info("已重置的模板从内存中删除,模板title:{}", e.getKey());
return false;
}
return true;
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// 新旧模板合并 且更新模板
templateTitleService.setTemplateTitleByProject(group, mergeTemplate(aggregation, templateTitleByProject));
}
/**
* 数据聚合成模板
* @param sourceList 数据集
* @return 模板集
*/
private Map<String, TemplateTitleVo> aggregation(List<CompleteTextMark> sourceList) {
Map<String, TemplateTitleVo> aggregationTitleTagMap = new ConcurrentHashMap<>();
List<String> titles = sourceList.stream().map(CompleteTextMark::getTitle).collect(Collectors.toList());
// 得到聚合集
List<KResult<Integer>> kResult = AggreeBootStarter.getKResult(titles, 0.1);
for (KResult<Integer> result : kResult) {
if (result.getDataPoints().size() < 3) {
continue;
}
// 标签统计
Map<String, Long> tagGroup = result.getDataPoints().stream().map(e -> sourceList.get(e).getMtag())
.collect(Collectors.groupingBy(mtag -> mtag, Collectors.counting()));
//得到数量最多的标签
String tag = tagGroup.entrySet().stream().max(Map.Entry.comparingByValue()).map(Map.Entry::getKey).get();
// 生成模板
String title = Tools.filterSymbol(result.getClusterName());
aggregationTitleTagMap.put(title, new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl()));
}
return aggregationTitleTagMap;
}
/**
* 新旧模板合并
* @param oldTemplate 旧模板
* @param newTemplate 新模板
*/
private Map<String, TemplateTitleVo> mergeTemplate(Map<String, TemplateTitleVo> oldTemplate, Map<String, TemplateTitleVo> newTemplate) {
for (Map.Entry<String, TemplateTitleVo> newEntry : newTemplate.entrySet()) {
List<String> templateKeys = oldTemplate.keySet().stream()
.filter(e -> CosineSimilarity.calculateTextSimWithBrand(newEntry.getKey(), e) >= 0.96)
.collect(Collectors.toList());
// 添加模板
if (templateKeys.isEmpty()) {
oldTemplate.put(newEntry.getKey(), newEntry.getValue());
} else {
// 更新标签
for (String oldKey : templateKeys) {
oldTemplate.get(oldKey).setMtag(newEntry.getValue().getMtag());
}
}
}
return oldTemplate;
}
/**
* 转换
* @param sourceMap 数据集
* @return 标准文本集
*/
private List<CompleteTextMark> transferMark(List<Map<String, Object>> sourceMap) {
return sourceMap.stream().map(CompleteTextMark::restoreFromEs).collect(Collectors.toList());
}
/**
* 查询条件
* @return 标注人为自动标注机器人
*/
private QueryBuilder autoRobotQueryBuilder() {
return QueryBuilders.termQuery("mperson", GenericAttribute.AUTO_PERSON);
}
}
package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSONObject;
import java.io.*;
import java.util.*;
public class CosineSimilarity {
private static final List<String> BRAND_WORDS = new ArrayList<>();
private static final String path = "classpath:static/brandWords.json";
static {
try {
InputStream inputStream = new FileInputStream(path);
String jsonStr = readJsonFile(inputStream);
if (null != jsonStr) {
List<List> array = JSONObject.parseArray(jsonStr, List.class);
for (List str : array) {
BRAND_WORDS.addAll(str);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static double calculateTextSimWithBrand(String doc1, String doc2) {
if (handleByLength(doc1, doc2) && compareWithBrand(doc1, doc2)) {
return calculateSimilar(doc1, doc2);
}
return 0.0;
}
/**
* 输入两段文本利用字频率的余弦定理判断二者间的相似度
*
* @param doc1,文本1
* @param doc2,文本2
* @return 相似度值
*/
private static double calculateSimilar(String doc1, String doc2) {
if (doc1 != null && doc1.trim().length() > 0 && doc2 != null && doc2.trim().length() > 0) {
Map<Integer, int[]> algorithmMap = new HashMap<Integer, int[]>();
// doc1字符词频 量化
for (int i = 0; i < doc1.length(); i++) {
char d1 = doc1.charAt(i);
if (isHanZi(d1)) {
int charIndex = getGB2312Id(d1);
if (charIndex != -1) {
int[] fq = algorithmMap.get(charIndex);
if (fq != null && fq.length == 2) {
fq[0]++;
} else {
fq = new int[2];
fq[0] = 1;
algorithmMap.put(charIndex, fq);
}
}
}
}
// doc2字符词频 量化
for (int i = 0; i < doc2.length(); i++) {
char d2 = doc2.charAt(i);
if (isHanZi(d2)) {
int charIndex = getGB2312Id(d2);
if (charIndex != -1) {
int[] fq = algorithmMap.get(charIndex);
if (fq != null && fq.length == 2) {
fq[1]++;
} else {
fq = new int[2];
fq[1] = 1;
algorithmMap.put(charIndex, fq);
}
}
}
}
Iterator<Integer> iterator = algorithmMap.keySet().iterator();
// 三角形坐标用变量表示
double sqdoc1 = 0;
double sqdoc2 = 0;
double denominator = 0;
while (iterator.hasNext()) {
int[] c = algorithmMap.get(iterator.next());
denominator += c[0] * c[1];
sqdoc1 += c[0] * c[0];
sqdoc2 += c[1] * c[1];
}
// 夹角相似度
return denominator / Math.sqrt(sqdoc1 * sqdoc2);
}
return 0.0;
}
/**
* 特殊处理一些长短差距很大的文本相似度,短文本长度未达到长文本长度的一半那么自动降权
*
* @return boolean
*/
private static boolean handleByLength(String doc1, String doc2) {
if (null == doc1 || null == doc2) {
return false;
}
// 一半长文本字符长度
int standardLength = doc1.length() > doc2.length() ? doc1.length() / 2 : doc2.length() / 2;
// 长文本-短文本字符长度 > 一半长文本字符长度
return Math.abs(doc1.length() - doc2.length()) <= standardLength;
}
/**
* 根据输入的Unicode字符,获取它的GB2312编码或者ascii编码,
*
* @param ch 输入的GB2312中文字符或者ASCII字符(128个)
* @return ch在GB2312中的位置,-1表示该字符不认识
*/
public static short getGB2312Id(char ch) {
try {
byte[] buffer = Character.toString(ch).getBytes("GB2312");
if (buffer.length != 2) {
// 正常情况下buffer应该是两个字节,否则说明ch不属于GB2312编码,故返回'?',此时说明不认识该字符
return -1;
}
int b0 = (int) (buffer[0] & 0x0FF) - 161; // 编码从A1开始,因此减去0xA1=161
int b1 = (int) (buffer[1] & 0x0FF) - 161; // 第一个字符和最后一个字符没有汉字,因此每个区只收16*6-2=94个汉字
return (short) (b0 * 94 + b1);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return -1;
}
/**
* 两条文本命中指定关键字数量是否一致
* @param doc1 文本1
* @param doc2 文本2
* @return 关键字数量是否一致
*/
private static boolean compareWithBrand(String doc1, String doc2) {
if (null == BRAND_WORDS || BRAND_WORDS.isEmpty()) {
return true;
}
Set<String> set1 = new HashSet<>();
Set<String> set2 = new HashSet<>();
for (String brand : BRAND_WORDS) {
if (doc1.contains(brand)) {
set1.add(brand);
}
if (doc2.contains(brand)) {
set2.add(brand);
}
}
if (set1.size() != set2.size()) {
return false;
}
return set1.containsAll(set2);
}
/**
* 输入一个字符判断是否为中文汉字
*
* @param ch,字符
* @return true为中文汉字,否则为false
*/
public static boolean isHanZi(char ch) {
return (ch >= 0x4E00 && ch <= 0x9FA5);
}
/**
* 读取json文件,返回json串
*
* @return
*/
public static String readJsonFile(InputStream fileInputStream) {
try {
Reader reader = new InputStreamReader(fileInputStream, "utf-8");
int ch = 0;
StringBuffer sb = new StringBuffer();
while ((ch = reader.read()) != -1) {
sb.append((char) ch);
}
reader.close();
return sb.toString();
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
}
package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.common.GenericAttribute;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
public class DataCollectionUtil {
public static void supplementForInsert(List<JSONObject> list, String group, String mtag, String mperson) {
for (JSONObject obj : list) {
obj.remove(GenericAttribute.SON_ID);
obj.put("mgroup", group);
obj.put("mtag", mtag);
if (StringUtils.isEmpty(mperson)) {
obj.put("mperson", GenericAttribute.AUTO_PERSON);
} else {
obj.put("mperson", mperson);
}
// 固定字段
obj.put("cid", GenericAttribute.AUTO_CID);
obj.put("cname", GenericAttribute.AUTO_CNAME);
}
}
}
package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSONObject;
import java.util.List;
public class EventCollectionUtil {
public static void supplementForInsert(List<JSONObject> list, String group, String mperson) {
for (JSONObject obj : list) {
obj.put("mperson", mperson);
obj.put("mgroup", group);
// 固定字段
obj.put("cid", 100040002);
obj.put("cname", "上传标注补充采集");
}
}
}
package com.zhiwei.middleware.automatic.server.util; package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.base.category.ClassB; import com.zhiwei.base.category.ClassB;
import com.zhiwei.base.category.ClassB.TypeB;
import com.zhiwei.base.entity.subclass.mark.*; import com.zhiwei.base.entity.subclass.mark.*;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHit;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
/**
*
* @ClassName: MarkInfoUtil
* @Description: MarkInfo工具类
* @author shenjunjie
* @date 2019年7月26日 下午7:06:11
*/
public class MarkInfoUtil { public class MarkInfoUtil {
public static MarkInfo transformToMarkInfo(JSONObject json) { /**
int c2 = json.getIntValue("c2"); * 根据c2 区分markInfo
if (0 == c2) { * @return markInfo
return null; */
} public static MarkInfo distinguishMarkInfo (SearchHit hit, String mtag) {
TypeB typeB = TypeB.fromEncode(c2); int c2 = Integer.parseInt(String.valueOf(hit.getSourceAsMap().get("c2")));
if (typeB == TypeB.COMPLETE) { if (Objects.isNull(mtag)) {
return new MarkInfo(CompleteTextMark.restoreFromEs(json)); hit.getSourceAsMap().remove("mtag");
} else if (typeB == TypeB.INCOMPLETE) { } else {
return new MarkInfo(IncompleteTextMark.restoreFromEs(json)); hit.getSourceAsMap().put("mtag", mtag);
} else if (typeB == TypeB.QA) { }
return new MarkInfo(QATextMark.restoreFromEs(json)); switch (ClassB.TypeB.fromEncode(c2)) {
} else if (typeB == TypeB.VIDEO) { case COMPLETE:
return new MarkInfo(VideoMark.restoreFromEs(json)); return new MarkInfo(CompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
} else { case INCOMPLETE:
return null; return new MarkInfo(IncompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
} case QA:
} return new MarkInfo(QATextMark.restoreFromEs(hit.getSourceAsMap()));
case VIDEO:
new MarkInfo(VideoMark.restoreFromEs(hit.getSourceAsMap()));
default:
return null;
}
}
/** public static List<MarkInfo> transformToMarkInfo(List<JSONObject> list) {
* 根据c2 区分markInfo List<MarkInfo> resList = new ArrayList<>();
* @return markInfo list.forEach(obj -> {
*/ resList.add(transformToMarkInfo(obj));
public static MarkInfo distinguishMarkInfo (SearchHit hit, String mtag) { });
int c2 = Integer.parseInt(String.valueOf(hit.getSourceAsMap().get("c2"))); return resList;
if (Objects.isNull(mtag)) { }
hit.getSourceAsMap().remove("mtag");
} else {
hit.getSourceAsMap().put("mtag", mtag);
}
switch (ClassB.TypeB.fromEncode(c2)) {
case COMPLETE:
return new MarkInfo(CompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
case INCOMPLETE:
return new MarkInfo(IncompleteTextMark.restoreFromEs(hit.getSourceAsMap()));
case QA:
return new MarkInfo(QATextMark.restoreFromEs(hit.getSourceAsMap()));
case VIDEO:
new MarkInfo(VideoMark.restoreFromEs(hit.getSourceAsMap()));
default:
return null;
}
}
public static List<MarkInfo> transformToMarkInfo(List<JSONObject> list) {
List<MarkInfo> resList = new ArrayList<>();
list.forEach(obj -> {
resList.add(transformToMarkInfo(obj));
});
return resList;
}
public static List<MarkInfo> filterTitleNon(List<MarkInfo> list) {
List<MarkInfo> resList = new ArrayList<>();
list.forEach(info -> {
TypeB typeB = info.getTypeB();
if (TypeB.COMPLETE == typeB || TypeB.VIDEO == typeB) {
if (StringUtils.isNotEmpty((info.getSourceObj().get("title") + ""))) {
resList.add(info);
}
}
});
return resList;
}
public static MarkInfo transformToMarkInfo(JSONObject json) {
int c2 = json.getIntValue("c2");
if (0 == c2) {
return null;
}
ClassB.TypeB typeB = ClassB.TypeB.fromEncode(c2);
if (typeB == ClassB.TypeB.COMPLETE) {
return new MarkInfo(CompleteTextMark.restoreFromEs(json));
} else if (typeB == ClassB.TypeB.INCOMPLETE) {
return new MarkInfo(IncompleteTextMark.restoreFromEs(json));
} else if (typeB == ClassB.TypeB.QA) {
return new MarkInfo(QATextMark.restoreFromEs(json));
} else if (typeB == ClassB.TypeB.VIDEO) {
return new MarkInfo(VideoMark.restoreFromEs(json));
} else {
return null;
}
}
} }
package com.zhiwei.middleware.automatic.son.util;
import org.redisson.api.*;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Component
public class RedissonUtil {
private static final String MARK_KEY = "auto:mark:";
private final RedissonClient redissonClient;
private RedissonUtil(RedissonClient redissonClient) {
this.redissonClient = redissonClient;
}
/**
* 拉取redis队列数据
* @param key key
* @param limit 条数
* @return value
*/
public List<String> pullQueue(String key, int limit) {
RQueue<String> queue = redissonClient.getQueue(redisKey(key));
return queue.poll(limit);
}
public void putCount(String key, long count) {
RAtomicLong atomicLong = redissonClient.getAtomicLong(redisKey(key));
atomicLong.addAndGet(count);
atomicLong.expire(7, TimeUnit.DAYS);
}
public Map<String, String> getMapValue(String key) {
return redissonClient.getMap(redisKey(key));
}
public String getMapKeyValue(String key, String group) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
return map.get(group);
}
public void setMapValue(String key, String group, String value) {
RMap<String, String> map = redissonClient.getMap(redisKey(key));
map.put(group, value);
}
public List<String> getList(String redisKey) {
return redissonClient.getList(redisKey(redisKey));
}
public void deleteList(String key) {
RList<Object> list = redissonClient.getList(redisKey(key));
list.delete();
}
private String redisKey(String key) {
return MARK_KEY + key;
}
}
package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSON;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.regex.Pattern;
public class Tools {
private static final Pattern SYMBOL_PATTERN = Pattern
.compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]");
/**
* 是否为空,数据为空
*
* @param obj obj
* @return boolean 返回类型
*/
public static boolean isEmpty(Object obj) {
if (!Objects.nonNull(obj)) {
return true;
}
if ((obj instanceof List)) {
return CollectionUtils.isEmpty((List<?>) obj);
}
if ((obj instanceof String)) {
return StringUtils.isEmpty((String) obj);
}
if ((obj instanceof Map)) {
return MapUtils.isEmpty((Map<?, ?>) obj);
}
return false;
}
/**
* 还原成实体类map
*
* @param strMap map
* @param clazz 目标对象
*/
public static <T> Map<String, T> restoreTMap(Map<String, String> strMap, Class<T> clazz) {
Map<String, T> resMap = new HashMap<>();
if (null == strMap) {
return resMap;
}
for (String key : strMap.keySet()) {
resMap.put(key, JSON.parseObject(strMap.get(key), clazz));
}
return resMap;
}
/**
* 过滤掉标题里面的标点符号
*
* @param title 标题
* @return 去除特殊符号后的标题
*/
public static String filterSymbol(String title) {
if (null != title) {
return SYMBOL_PATTERN.matcher(title).replaceAll("");
}
return null;
}
/**
* list切割
* @param list 源数据
* @param limit limit
* @param <T> 泛型
* @return 切割后
*/
public static<T> List<List<T>> spilt(List<T> list, int limit) {
int pageTotal = (list.size() + limit - 1) / limit;
List<List<T>> result = new ArrayList<>(pageTotal);
for (int i = 0; i < pageTotal; i++) {
int end = i + 1 == pageTotal ? list.size() : (i + 1) * limit;
result.add(list.subList(i * limit, end));
}
return result;
}
/**
* 返回组合K值
*
* @param keys
* @return String
*/
public static String assembleKey(String... keys) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < keys.length; i++) {
if (keys[i] == null) {continue;}
if (i == keys.length - 1) {
sb.append(keys[i]);
} else {
sb.append(keys[i] + ":");
}
}
return sb.toString();
}
}
server.port=7770
#path to redisson.yaml or redisson.json
spring.redis.redisson.file=classpath:redisson-local.yaml
#mongo
#primary.uri.marker=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
primary.uri.marker=mongodb://localhost:27017/localhost
primary.uri.hangzhou=mongodb://qbjc:asSADf5ffs@115.236.59.88:30001/qbjc?authSource=admin
#primary.uri=mongodb://qbjc:asSADf5ffs@202.107.192.94:17150/qbjc?authSource=admin
mongo.connectTimeout=30000
mongo.maxWaitTime=50000
mongo.dataBaseMarker=marker
mongo.hangzhouMarker=qbjc
# es
es.esClientAddresses=202.107.192.94:1443:qbjc-back:yuqing.zhiweidata.com,202.107.192.94:29400:elastic:qWxZRW42OHkuOhmF5AXX
es.clusterNodes=202.107.192.94:1443
es.clusterName=zhiweidata-new-es
es.httpClusterNodes=202.107.192.94:1443:middleware-automaticmark:auto.zhiweidata.com
#es.username=middleware-automaticmark
#es.password=auto.zhiweidata.com
es.username=joker
es.password=jokerdevops
middleware.zookeeperAddress=zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181
middleware.appName=automatic-provider
middleware.markGroup=zhiwei-mark-local-liuyu
middleware.filterGroup=local-filter
\ No newline at end of file
server.port=7770
#path to redisson.yaml or redisson.json
spring.redis.redisson.file=classpath:redisson.yaml
#mongo
primary.uri.marker=mongodb://markeruser:marker1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/marker?authSource=admin
primary.uri.hangzhou=mongodb://automarker:HtSIcjzZ@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/qbjc?authSource=admin
mongo.connectTimeout=30000
mongo.maxWaitTime=50000
mongo.dataBaseMarker=marker
mongo.hangzhouMarker=qbjc
# es
es.esClientAddresses=192.168.0.130:9200:qbjc-back:yuqing.zhiweidata.com
es.clusterNodes=192.168.0.130:9200
es.clusterName=zhiweidata-new-es
es.httpClusterNodes=192.168.0.130:9200:middleware-automaticmark:auto.zhiweidata.com
es.username=middleware-automaticmark
es.password=auto.zhiweidata.com
middleware.zookeeperAddress=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182
middleware.appName=automatic-provider
middleware.markGroup=zhiwei-mark
middleware.filterGroup=zhiwei-bloom-filter
\ No newline at end of file
#spring.profiles.active=prod
#spring.profiles.active=dev
spring.profiles.active=local
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE xml>
<!-- log4j2 自身的日志级别 -->
<Configuration status="WARN">
<Properties> <!-- 配置日志文件输出目录 -->
<Property name="LOG_HOME">./log/</Property>
<property name="APP_NAME">automatic-center-server</property>
</Properties>
<Appenders>
<!-- 定义日志输出地 -->
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} %L [%t] - %msg%n"/>
</Console>
<!--[%t]-->
<!-- INFO级别日志 -->
<RollingFile name="info_appender" fileName="${LOG_HOME}/${APP_NAME}.log"
filePattern="${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}-%i.log">
<PatternLayout pattern="%d{yyyy-MM-dd HH:mm:ss.SSS} %-5level %logger{36} %L [%t] - %msg%n"/>
<Filters>
<PushLogFilter onMatch="NEUTRAL" onMismatch="DENY"/>
</Filters>
<Policies>
<TimeBasedTriggeringPolicy/>
<SizeBasedTriggeringPolicy size="20 MB"/>
</Policies>
</RollingFile>
</Appenders>
<Loggers>
<Logger name="org.apache.curator" additivity="false"
level="trace">
<AppenderRef ref="Console" />
</Logger>
<Logger name="org.apache.zookeeper" additivity="false"
level="trace">
<AppenderRef ref="Console" />
</Logger>
<AsyncRoot level="info">
<AppenderRef ref="Console"/>
<AppenderRef ref="info_appender"/>
</AsyncRoot>
<Logger name="mylog" level="error" additivity="false">
<AppenderRef ref="Console"/>
</Logger>
</Loggers>
</Configuration>
prod.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
prod.robot.push.enable=false
prod.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
prod.robot.push.level=error
prod.robot.push.app.name=automatic-server-prod
dev.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
dev.robot.push.enable=false
dev.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
dev.robot.push.level=error
dev.robot.push.app.name=automatic-server-dev
local.robot.push.address=https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=e2218c6e-af6a-4296-9d75-7178b941a3b5
local.robot.push.enable=false
local.robot.push.filterclass=org.apache.dubbo.common.Version,com.alibaba.dubbo.common.Version,org.apache.dubbo.monitor.dubbo.DubboMonitor,com.alibaba.dubbo.monitor.dubbo.DubboMonitor
local.robot.push.level=error
local.robot.push.app.name=automatic-center-local
\ No newline at end of file
---
singleServerConfig:
idleConnectionTimeout: 10000
connectTimeout: 10000
timeout: 3000
retryAttempts: 3
retryInterval: 1500
subscriptionsPerConnection: 5
address: "redis://192.168.0.225:6379"
subscriptionConnectionMinimumIdleSize: 1
subscriptionConnectionPoolSize: 50
connectionMinimumIdleSize: 32
connectionPoolSize: 64
database: 4
dnsMonitoringInterval: 5000
threads: 0
nettyThreads: 0
codec: !<org.redisson.codec.JsonJacksonCodec> {}
transportMode: "NIO"
\ No newline at end of file
---
singleServerConfig:
idleConnectionTimeout: 60000
connectTimeout: 10000
timeout: 3000
retryAttempts: 3
retryInterval: 1500
subscriptionsPerConnection: 5
address: "redis://192.168.0.39:7386"
subscriptionConnectionMinimumIdleSize: 1
subscriptionConnectionPoolSize: 50
connectionMinimumIdleSize: 128
connectionPoolSize: 256
database: 3
dnsMonitoringInterval: 5000
threads: 0
nettyThreads: 0
codec: !<org.redisson.codec.JsonJacksonCodec> {}
transportMode: "NIO"
\ No newline at end of file
[["尊嘉证券","字节跳动","众邦银行","中商惠民","中软","中融金","中金所","中关村","智联","智慧星光","智纯","志程","知微数据","知微事见","知微传播分析","支付宝","证监会","臻稚","榛果","浙商银行","找钢网","掌鱼生鲜","掌合天下","掌柜宝","债券通","早早孕","云天励飞","云锋金融","云从科技","阅文集团","猿题库","猿辅导","毓婷","榆钱","余额宝","有象科技","优酷","应用宝","鹰眼监控","英雄联盟","易利贷","易酒批","易久批","易会满","易果生鲜","易贷","艺妙神州","蚁坊","依图","伊利","药神保","药明康德","亚洲一号","亚马逊","雅士利","雅培","雪球","学儿乐","旭航网贷","熊猫直播","熊猫债","熊猫tv","星选外卖","星星守护","星图","星阶优护","新氧","新通路","新能源","新美大","新浪微博","新京报","新华社","新高桥","心美力","携程","校园贷","校园白条","小猪短租","小猿搜题","小猿口算","小象生鲜","小米","小美果园","小红书","小程序","小白用车","小白信用","相互保","现金贷","现代快报","闲鱼","虾米音乐","喜康素","喜康力","喜康宝","喜翻","喜宝","西瓜视频","悟空问答","物联网","我买网","唯品金融","唯品会","唯品国际","微众银行","微云","微舆情","微医保","微信","微头条","微视","微牛证券","微民保险","微瓴","微粒贷","微博","网易","万斯","万事达","万科","推特","途众","途牛","途家","同仁堂","同盾","同程","通付盾","天天快报","天猫","腾讯","淘小铺","淘鲜达","淘点点","淘宝","速卖通","苏宁","顺丰","水钢医院","舒心美","首中投资","首长四方","首长国际","首长宝佳","首颐医疗","首师大","首钢","首都机场","圣元","商汤","软银","融创","荣耀","雀巢","趣头条","去哪儿","钱袋宝","前程无忧","千树资本","企鹅","奇虎","苹果","品骏","拼多多","皮皮虾","鹏源","鹏灏","鹏沨","鹏渤","欧冶云商","诺优能","宁波有象","宁波第三检测站","南苑机场","陌陌","摩拜","秒拍","苗建","喵鲜生","镁信","美赞成","美团","美素","美食林","美强生","美拍","美美证券","美啦","美菜","绿地","罗汉堂","龙湖","领英","猎聘","联想","利洁时","理想汽车","理财通","理财金","礼橙专车","老虎證券","老虎证券","蓝鲸征信","蓝鲸TMT","莱西","拉勾","旷视","快手","酷狗音乐","口碑网","可丽蓝","可丽兰","看准网","看准app","聚划算","聚财猫","巨量引擎","酒仙网","京东云","京东","金秀儿","火山直播","火山小视频","欢聚时代","华为","虎牙","红麦","黑珍珠","盒马","和易贷","合众","合生元","禾连健康","海囤全球","狗东","格灵深瞳","高德","富途","富民宝","斐讯","飞鹤","恩美力","恩加健","多闪","杜蕾斯","斗鱼","抖音","滴滴","大卫","创业黑马","车英慧","超级物种","菜鸟","北京字节跳动科技有限公司","斑马英语","百融","百度","安智宝","阿里","YY直播","youtube","UC","SenseTime","QQ","JD","Boss职聘","Boss直聘","A站"],["字节","主播","中金","中交路建","支付宝","政协","阅文","猿辅导","元素禾喜","元気森林","优酷","英特尔","银监会","一起学","央行","星图","新氧","小游戏","小象","小鹏","小桔","小红书","小程序","闲鱼","喜马拉雅","西瓜","沃尔玛","未成年","唯品会","微信","微视","微软","微博","微保","威马","网约车","网银","网易","王者荣耀","万科","土豆","途众","图虫","头条","统计局","同盾","同程","天眼查","天猫","腾讯","特斯拉","淘票票","淘宝","苏宁","尚德","商汤","山姆","三一五","三星","融创","荣耀","人人贷","人人车","人民日报","趣头条","区块链","清华","清北","轻聊","青桔","企鹅","麒麟","骑手","期权","期货","平安","拼多多","皮皮虾","朋友圈","拍拍乐","某音","某讯","某文","某手","某平台","某聘","某品会","某盾","某抖","某东","某大厂","秒针","秒杀","秒拍","米其林","孟晚舟","美赞臣","美团","蚂蚁","马云","马化腾","绿地","鲁班","龙湖","两会","联想","联合国","荔枝","快应用","快手","科慕","科技部","康希诺","凯迪拉克","绝地求生","巨量引擎","京东","金融","今日头条","建设银行","建行","机器人","火山","华为","花小猪","红包","恒大","嗨学","黑珍珠","黑马","盒马","好未来","哈啰","国双","国美","故宫","谷歌","狗东","公众号","工信部","工商局","跟谁学","高通","高榕","高德","富途","肺炎","飞猪","飞悦","飞书","飞慧","飞鹤","放心购","泛生子","多闪","短视频","斗鱼","抖音","懂车帝","定安","钉钉","滴滴","嘀嘀","大众点评","创业家","创业黑马","创新工场","穿山甲","超级物种","菜鸟","财新","比亚迪","比特币","贝因美","北银","北青网","北汽新能源","北排","邦盛","斑马","百融","百度","白条","安卓","爱奇艺","阿里","阿尔迪","阿迪达斯","youtube","wx","wechat","Walmart","UC","twitter","Trump","Tongdun","Tencent","sensetime","QQ","p2p","O2O","J东","JDD","JD+","JD","i黑马","IPO","Intel","huawei","Google","Futu","DOU+","dnf","COO","CEO","B站","BUG","Boss职聘","Boss直聘","bilibili","A站","Auchan","ATM","AMS","amazon","AI学","AI生态","AI启杭","AI财经社","AI","99公益","95公益","7鲜","7Fresh","714高炮","701计划","7 Fresh","5G","58同城","58集团","58二手车","58到家","51VR,","51job","36氪","1药网","1号会员店","11_11","12315","2022","2021","2020","996","315","11.11","3.15"],["北京","天津","石家庄","唐山","秦皇岛","邯郸","邢台","保定","张家口","承德","沧州","廊坊","衡水","太原","大同","阳泉","长治","晋城","朔州","晋中","运城","忻州","临汾","吕梁","呼和浩特","包头","乌海","赤峰","通辽","鄂尔多斯","呼伦贝尔","巴彦淖尔","乌兰察布","兴安","锡林郭勒","阿拉善","沈阳","大连","鞍山","抚顺","本溪","丹东","锦州","营口","阜新","辽阳","盘锦","铁岭","朝阳","葫芦岛","长春","吉林","四平","辽源","通化","白山","松原","白城","延边","哈尔滨","齐齐哈尔","鸡西","鹤岗","双鸭山","大庆","伊春","佳木斯","七台河","牡丹江","黑河","绥化","大兴安岭","上海","南京","无锡","徐州","常州","苏州","南通","连云港","淮安","盐城","扬州","镇江","泰州","宿迁","杭州","宁波","温州","嘉兴","湖州","绍兴","金华","衢州","舟山","台州","丽水","合肥","芜湖","蚌埠","淮南","马鞍山","淮北","铜陵","安庆","黄山","滁州","阜阳","宿州","六安","亳州","池州","宣城","福州","厦门","莆田","三明","泉州","漳州","南平","龙岩","宁德","南昌","景德镇","萍乡","九江","新余","鹰潭","赣州","吉安","宜春","抚州","上饶","济南","青岛","淄博","枣庄","东营","烟台","潍坊","济宁","泰安","威海","日照","临沂","德州","聊城","滨州","菏泽","郑州","开封","洛阳","平顶山","安阳","鹤壁","新乡","焦作","濮阳","许昌","漯河","三门峡","南阳","商丘","信阳","周口","驻马店","武汉","黄石","十堰","宜昌","襄阳","鄂州","荆门","孝感","荆州","黄冈","咸宁","随州","恩施","长沙","株洲","湘潭","衡阳","邵阳","岳阳","常德","张家界","益阳","郴州","永州","怀化","娄底","湘西","广州","韶关","深圳","珠海","汕头","佛山","江门","湛江","茂名","肇庆","惠州","梅州","汕尾","河源","阳江","清远","东莞","中山","潮州","揭阳","云浮","南宁","柳州","桂林","梧州","北海","防城港","钦州","贵港","玉林","百色","贺州","河池","来宾","崇左","海口","三亚","三沙","儋州","重庆","成都","自贡","攀枝花","泸州","德阳","绵阳","广元","遂宁","内江","乐山","南充","眉山","宜宾","广安","达州","雅安","巴中","资阳","阿坝","甘孜","凉山","贵阳","六盘水","遵义","安顺","毕节","铜仁","黔西南","黔东南","黔南","昆明","曲靖","玉溪","保山","昭通","丽江","普洱","临沧","楚雄","红河","文山","西双版纳","大理","德宏","怒江","迪庆","拉萨","日喀则","昌都","林芝","山南","那曲","阿里","西安","铜川","宝鸡","咸阳","渭南","延安","汉中","榆林","安康","商洛","兰州","嘉峪关","金昌","白银","天水","武威","张掖","平凉","酒泉","庆阳","定西","陇南","临夏","甘南","西宁","海东","海北","黄南","海南","果洛","玉树","海西","银川","石嘴山","吴忠","固原","中卫","乌鲁木齐","克拉玛依","吐鲁番","哈密","昌吉","博州","巴州","阿克苏","克州","喀什","和田","伊犁","塔城","阿勒泰","台北","高雄","桃园","台中","台南","新北","基隆","新竹","嘉义","香港","澳门","华北","华中","华东","华南","西北","西南","东北","河北","山西","辽宁","黑龙江","江苏","浙江","安徽","福建","江西","山东","河南","湖北","湖南","广东","四川","贵州","云南","陕西","甘肃","青海","台湾","内蒙古","广西壮族","西藏","宁夏回族","新疆维吾尔"],["东城","西城","朝阳","丰台","石景山","海淀","门头沟","房山","通州","顺义","昌平","大兴","怀柔","平谷","密云","延庆","和平","河东","河西","南开","河北","红桥","东丽","西青","津南","北辰","武清","宝坻","滨海","宁河","静海","蓟州","长安","桥西","新华","井陉矿区","裕华","藁城","鹿泉","栾城","井陉","正定","行唐","灵寿","高邑","深泽","赞皇","无极","平山","元氏","赵县","辛集","晋州","新乐","路南","路北","古冶","开平","丰南","丰润","曹妃甸","滦南","乐亭","迁西","玉田","遵化","迁安","滦州","海港","山海关","北戴河","抚宁","青龙","昌黎","卢龙","邯山","丛台","复兴","峰峰矿区","肥乡","永年","临漳","成安","大名","涉县","磁县","邱县","鸡泽","广平","馆陶","魏县","曲周","武安","桥东","桥西","邢台","临城","内丘","柏乡","隆尧","任县","南和","宁晋","巨鹿","新河","广宗","平乡","威县","清河","临西","南宫","沙河","竞秀","莲池","满城","清苑","徐水","涞水","阜平","定兴","唐县","高阳","容城","涞源","望都","安新","易县","曲阳","蠡县","顺平","博野","雄县","涿州","定州","安国","高碑店","桥东","桥西","宣化","下花园","万全","崇礼","张北","康保","沽源","尚义","蔚县","阳原","怀安","怀来","涿鹿","赤城","双桥","双滦","鹰手营子矿区","承德","兴隆","滦平","隆化","丰宁","宽城","围场","平泉","新华","运河","沧县","青县","东光","海兴","盐山","肃宁","南皮","吴桥","献县","孟村","泊头","任丘","黄骅","河间","安次","广阳","固安","永清","香河","大城","文安","大厂","霸州","三河","桃城","冀州","枣强","武邑","武强","饶阳","安平","故城","景县","阜城","深州","小店","迎泽","杏花岭","尖草坪","万柏林","晋源","清徐","阳曲","娄烦","古交","新荣","平城","云冈","云州","阳高","天镇","广灵","灵丘","浑源","左云","城区","矿区","郊区","平定","盂县","潞州","上党","屯留","潞城","襄垣","平顺","黎城","壶关","长子","武乡","沁县","沁源","城区","沁水","阳城","陵川","泽州","高平","朔城","平鲁","山阴","应县","右玉","怀仁","榆次","榆社","左权","和顺","昔阳","寿阳","太谷","祁县","平遥","灵石","介休","盐湖","临猗","万荣","闻喜","稷山","新绛","绛县","垣曲","夏县","平陆","芮城","永济","河津","忻府","定襄","五台","代县","繁峙","宁武","静乐","神池","五寨","岢岚","河曲","保德","偏关","原平","尧都","曲沃","翼城","襄汾","洪洞","古县","安泽","浮山","吉县","乡宁","大宁","隰县","永和","蒲县","汾西","侯马","霍州","离石","文水","交城","兴县","临县","柳林","石楼","岚县","方山","中阳","交口","孝义","汾阳","新城","回民区","玉泉","赛罕","土默特左旗","托克托","和林格尔","清水河","武川","东河","昆都仑","青山","石拐","白云鄂博矿","九原","土默特右旗","固阳","达尔罕茂明安联合旗","海勃湾","海南","乌达","红山","元宝山","松山","阿鲁科尔沁旗","巴林左旗","巴林右旗","林西","克什克腾旗","翁牛特旗","喀喇沁旗","宁城","敖汉旗","科尔沁","科尔沁左翼中旗","科尔沁左翼后旗","开鲁","库伦旗","奈曼旗","扎鲁特旗","霍林郭勒","东胜","康巴什","达拉特旗","准格尔旗","鄂托克前旗","鄂托克旗","杭锦旗","乌审旗","伊金霍洛旗","海拉尔","扎赉诺尔","阿荣旗","莫力达瓦达斡尔族自治旗","鄂伦春自治旗","鄂温克族自治旗","陈巴尔虎旗","新巴尔虎左旗","新巴尔虎右旗","满洲里","牙克石","扎兰屯","额尔古纳","根河","临河","五原","磴口","乌拉特前旗","乌拉特中旗","乌拉特后旗","杭锦后旗","集宁","卓资","化德","商都","兴和","凉城","察哈尔右翼前旗","察哈尔右翼中旗","察哈尔右翼后旗","四子王旗","丰镇","乌兰浩特","阿尔山","科尔沁右翼前旗","科尔沁右翼中旗","扎赉特旗","突泉","二连浩特","锡林浩特","阿巴嘎旗","苏尼特左旗","苏尼特右旗","东乌珠穆沁旗","西乌珠穆沁旗","太仆寺旗","镶黄旗","正镶白旗","正蓝旗","多伦","阿拉善左旗","阿拉善右旗","额济纳旗","和平","沈河","大东","皇姑","铁西","苏家屯","浑南","沈北新","于洪","辽中","康平","法库","新民","中山","西岗","沙河口","甘井子","旅顺口","金州","普兰店","长海","瓦房店","庄河","铁东","铁西","立山","千山","台安","岫岩","海城","新抚","东洲","望花","顺城","抚顺","新宾","清原","平山","溪湖","明山","南芬","本溪","桓仁","元宝","振兴","振安","宽甸","东港","凤城","古塔","凌河","太和","黑山","义县","凌海","北镇","站前","西市","鲅鱼圈","老边","盖州","大石桥","海州","新邱","太平","清河门","细河","阜新","彰武","白塔","文圣","宏伟","弓长岭","太子河","辽阳","灯塔","双台子","兴隆台","大洼","盘山","银州","清河","铁岭","西丰","昌图","调兵山","开原","双塔","龙城","朝阳","建平","喀喇沁左翼","北票","凌源","连山","龙港","南票","绥中","建昌","兴城","南关","宽城","朝阳","二道","绿园","双阳","九台","农安","榆树","德惠","昌邑","龙潭","船营","丰满","永吉","蛟河","桦甸","舒兰","磐石","铁西","铁东","梨树","伊通","公主岭","双辽","龙山","西安","东丰","东辽","东昌","二道江","通化","辉南","柳河","梅河口","集安","浑江","江源","抚松","靖宇","长白","临江","宁江","前郭尔罗斯","长岭","乾安","扶余","洮北","镇赉","通榆","洮南","大安","延吉","图们","敦化","珲春","龙井","和龙","汪清","安图","道里","南岗","道外","平房","松北","香坊","呼兰","阿城","双城","依兰","方正","宾县","巴彦","木兰","通河","延寿","尚志","五常","龙沙","建华","铁锋","昂昂溪","富拉尔基","碾子山","梅里斯","龙江","依安","泰来","甘南","富裕","克山","克东","拜泉","讷河","鸡冠","恒山","滴道","梨树","城子河","麻山","鸡东","虎林","密山","向阳","工农","南山","兴安","东山","兴山","萝北","绥滨","尖山","岭东","四方台","宝山","集贤","友谊","宝清","饶河","萨尔图","龙凤","让胡路","红岗","大同","肇州","肇源","林甸","杜尔伯特","伊美","乌翠","友好","嘉荫","汤旺","丰林","大箐山","南岔","金林","铁力","向阳","前进","东风","郊区","桦南","桦川","汤原","同江","富锦","抚远","新兴","桃山","茄子河","勃利","东安","阳明","爱民","西安","林口","绥芬河","海林","宁安","穆棱","东宁","爱辉","嫩江","逊克","孙吴","北安","五大连池","北林","望奎","兰西","青冈","庆安","明水","绥棱","安达","肇东","海伦","漠河","呼玛","塔河","黄浦","徐汇","长宁","静安","普陀","虹口","杨浦","闵行","宝山","嘉定","浦东","金山","松江","青浦","奉贤","崇明","玄武","秦淮","建邺","鼓楼","浦口","栖霞","雨花台","江宁","六合","溧水","高淳","锡山","惠山","滨湖","梁溪","新吴","江阴","宜兴","鼓楼","云龙","贾汪","泉山","铜山","丰县","沛县","睢宁","新沂","邳州","天宁","钟楼","新北","武进","金坛","溧阳","虎丘","吴中","相城","姑苏","吴江","常熟","张家港","昆山","太仓","崇川","港闸","通州","如东","启东","如皋","海门","海安","连云","海州","赣榆","东海","灌云","灌南","淮安","淮阴","清江浦","洪泽","涟水","盱眙","金湖","亭湖","盐都","大丰","响水","滨海","阜宁","射阳","建湖","东台","广陵","邗江","江都","宝应","仪征","高邮","京口","润州","丹徒","丹阳","扬中","句容","海陵","高港","姜堰","兴化","靖江","泰兴","宿城","宿豫","沭阳","泗阳","泗洪","上城","下城","江干","拱墅","西湖","滨江","萧山","余杭","富阳","临安","桐庐","淳安","建德","海曙","江北","北仑","镇海","鄞州","奉化","象山","宁海","余姚","慈溪","鹿城","龙湾","瓯海","洞头","永嘉","平阳","苍南","文成","泰顺","瑞安","乐清","南湖","秀洲","嘉善","海盐","海宁","平湖","桐乡","吴兴","南浔","德清","长兴","安吉","越城","柯桥","上虞","新昌","诸暨","嵊州","婺城","金东","武义","浦江","磐安","兰溪","义乌","东阳","永康","柯城","衢江","常山","开化","龙游","江山","定海","普陀","岱山","嵊泗","椒江","黄岩","路桥","三门","天台","仙居","温岭","临海","玉环","莲都","青田","缙云","遂昌","松阳","云和","庆元","景宁","龙泉","瑶海","庐阳","蜀山","包河","长丰","肥东","肥西","庐江","巢湖","镜湖","弋江","鸠江","三山","芜湖","繁昌","南陵","无为","龙子湖","蚌山","禹会","淮上","怀远","五河","固镇","大通","田家庵","谢家集","八公山","潘集","凤台","寿县","花山","雨山","博望","当涂","含山","和县","杜集","相山","烈山","濉溪","铜官","义安","郊区","枞阳","迎江","大观","宜秀","怀宁","太湖","宿松","望江","岳西","桐城","潜山","屯溪","黄山","徽州","歙县","休宁","黟县","祁门","琅琊","南谯","来安","全椒","定远","凤阳","天长","明光","颍州","颍东","颍泉","临泉","太和","阜南","颍上","界首","埇桥","砀山","萧县","灵璧","泗县","金安","裕安","叶集","霍邱","舒城","金寨","霍山","谯城","涡阳","蒙城","利辛","贵池","东至","石台","青阳","宣州","郎溪","广德","泾县","绩溪","旌德","宁国","鼓楼","台江","仓山","马尾","晋安","长乐","闽侯","连江","罗源","闽清","永泰","平潭","福清","思明","海沧","湖里","集美","同安","翔安","城厢","涵江","荔城","秀屿","仙游","梅列","三元","明溪","清流","宁化","大田","尤溪","沙县","将乐","泰宁","建宁","永安","鲤城","丰泽","洛江","泉港","惠安","安溪","永春","德化","金门","石狮","晋江","南安","芗城","龙文","云霄","漳浦","诏安","长泰","东山","南靖","平和","华安","龙海","延平","建阳","顺昌","浦城","光泽","松溪","政和","邵武","武夷山","建瓯","新罗","永定","长汀","上杭","武平","连城","漳平","蕉城","霞浦","古田","屏南","寿宁","周宁","柘荣","福安","福鼎","东湖","西湖","青云谱","湾里","青山湖","新建","南昌","安义","进贤","昌江","珠山","浮梁","乐平","安源","湘东","莲花","上栗","芦溪","濂溪","浔阳","柴桑","武宁","修水","永修","德安","都昌","湖口","彭泽","瑞昌","共青城","庐山","渝水","分宜","月湖","余江","贵溪","章贡","南康","赣县","信丰","大余","上犹","崇义","安远","龙南","定南","全南","宁都","于都","兴国","会昌","寻乌","石城","瑞金","吉州","青原","吉安","吉水","峡江","新干","永丰","泰和","遂川","万安","安福","永新","井冈山","袁州","奉新","万载","上高","宜丰","靖安","铜鼓","丰城","樟树","高安","临川","东乡","南城","黎川","南丰","崇仁","乐安","宜黄","金溪","资溪","广昌","信州","广丰","上饶","玉山","铅山","横峰","弋阳","余干","鄱阳","万年","婺源","德兴","历下","市中","槐荫","天桥","历城","长清","章丘","济阳","莱芜","钢城","平阴","商河","市南","市北","黄岛","崂山","李沧","城阳","即墨","胶州","平度","莱西","淄川","张店","博山","临淄","周村","桓台","高青","沂源","市中","薛城","峄城","台儿庄","山亭","滕州","东营","河口","垦利","利津","广饶","芝罘","福山","牟平","莱山","长岛","龙口","莱阳","莱州","蓬莱","招远","栖霞","海阳","潍城","寒亭","坊子","奎文","临朐","昌乐","青州","诸城","寿光","安丘","高密","昌邑","任城","兖州","微山","鱼台","金乡","嘉祥","汶上","泗水","梁山","曲阜","邹城","泰山","岱岳","宁阳","东平","新泰","肥城","环翠","文登","荣成","乳山","东港","岚山","五莲","莒县","兰山","罗庄","河东","沂南","郯城","沂水","兰陵","费县","平邑","莒南","蒙阴","临沭","德城","陵城","宁津","庆云","临邑","齐河","平原","夏津","武城","乐陵","禹城","东昌府","阳谷","莘县","茌平","东阿","冠县","高唐","临清","滨城","沾化","惠民","阳信","无棣","博兴","邹平","牡丹","定陶","曹县","单县","成武","巨野","郓城","鄄城","东明","中原","二七","管城","金水","上街","惠济","中牟","巩义","荥阳","新密","新郑","登封","龙亭","顺河","鼓楼","禹王台","祥符","杞县","通许","尉氏","兰考","老城","西工","瀍河","涧西","吉利","洛龙","孟津","新安","栾川","嵩县","汝阳","宜阳","洛宁","伊川","偃师","新华","卫东","石龙","湛河","宝丰","叶县","鲁山","郏县","舞钢","汝州","文峰","北关","殷都","龙安","安阳","汤阴","滑县","内黄","林州","鹤山","山城","淇滨","浚县","淇县","红旗","卫滨","凤泉","牧野","新乡","获嘉","原阳","延津","封丘","长垣","卫辉","辉县","解放","中站","马村","山阳","修武","博爱","武陟","温县","沁阳","孟州","华龙","清丰","南乐","范县","台前","濮阳","魏都","建安","鄢陵","襄城","禹州","长葛","源汇","郾城","召陵","舞阳","临颍","湖滨","陕州","渑池","卢氏","义马","灵宝","宛城","卧龙","南召","方城","西峡","镇平","内乡","淅川","社旗","唐河","新野","桐柏","邓州","梁园","睢阳","民权","睢县","宁陵","柘城","虞城","夏邑","永城","浉河","平桥","罗山","光山","新县","商城","固始","潢川","淮滨","息县","川汇","扶沟","西华","商水","沈丘","郸城","淮阳","太康","鹿邑","项城","驿城","西平","上蔡","平舆","正阳","确山","泌阳","汝南","遂平","新蔡","江岸","江汉","硚口","汉阳","武昌","青山","洪山","东西湖","汉南","蔡甸","江夏","黄陂","新洲","黄石港","西塞山","下陆","铁山","阳新","大冶","茅箭","张湾","郧阳","郧西","竹山","竹溪","房县","丹江口","西陵","伍家岗","点军","猇亭","夷陵","远安","兴山","秭归","长阳","五峰","宜都","当阳","枝江","襄城","樊城","襄州","南漳","谷城","保康","老河口","枣阳","宜城","梁子湖","华容","鄂城","东宝","掇刀","沙洋","钟祥","京山","孝南","孝昌","大悟","云梦","应城","安陆","汉川","沙市","荆州","公安","监利","江陵","石首","洪湖","松滋","黄州","团风","红安","罗田","英山","浠水","蕲春","黄梅","麻城","武穴","咸安","嘉鱼","通城","崇阳","通山","赤壁","曾都","随县","广水","恩施","利川","建始","巴东","宣恩","咸丰","来凤","鹤峰","芙蓉","天心","岳麓","开福","雨花","望城","长沙","浏阳","宁乡","荷塘","芦淞","石峰","天元","渌口","攸县","茶陵","炎陵","醴陵","雨湖","岳塘","湘潭","湘乡","韶山","珠晖","雁峰","石鼓","蒸湘","南岳","衡阳","衡南","衡山","衡东","祁东","耒阳","常宁","双清","大祥","北塔","邵东","新邵","邵阳","隆回","洞口","绥宁","新宁","城步","武冈","岳阳楼","云溪","君山","岳阳","华容","湘阴","平江","汨罗","临湘","武陵","鼎城","安乡","汉寿","澧县","临澧","桃源","石门","津市","永定","武陵源","慈利","桑植","资阳","赫山","南县","桃江","安化","沅江","北湖","苏仙","桂阳","宜章","永兴","嘉禾","临武","汝城","桂东","安仁","资兴","零陵","冷水滩","祁阳","东安","双牌","道县","江永","宁远","蓝山","新田","江华","鹤城","中方","沅陵","辰溪","溆浦","会同","麻阳","新晃","芷江","靖州","通道","洪江","娄星","双峰","新化","冷水江","涟源","吉首","泸溪","凤凰","花垣","保靖","古丈","永顺","龙山","荔湾","越秀","海珠","天河","白云","黄埔","番禺","花都","南沙","从化","增城","武江","浈江","曲江","始兴","仁化","翁源","乳源","新丰","乐昌","南雄","罗湖","福田","南山","宝安","龙岗","盐田","龙华","坪山","光明","香洲","斗门","金湾","龙湖","金平","濠江","潮阳","潮南","澄海","南澳","禅城","南海","顺德","三水","高明","蓬江","江海","新会","台山","开平","鹤山","恩平","赤坎","霞山","坡头","麻章","遂溪","徐闻","廉江","雷州","吴川","茂南","电白","高州","化州","信宜","端州","鼎湖","高要","广宁","怀集","封开","德庆","四会","惠城","惠阳","博罗","惠东","龙门","梅江","梅县","大埔","丰顺","五华","平远","蕉岭","兴宁","城区","海丰","陆河","陆丰","源城","紫金","龙川","连平","和平","东源","江城","阳东","阳西","阳春","清城","清新","佛冈","阳山","连山","连南","英德","连州","湘桥","潮安","饶平","榕城","揭东","揭西","惠来","普宁","云城","云安","新兴","郁南","罗定","兴宁","青秀","江南","西乡塘","良庆","邕宁","武鸣","隆安","马山","上林","宾阳","横县","城中","鱼峰","柳南","柳北","柳江","柳城","鹿寨","融安","融水","三江","秀峰","叠彩","象山","七星","雁山","临桂","阳朔","灵川","全州","兴安","永福","灌阳","龙胜","资源","平乐","荔浦","恭城","万秀","长洲","龙圩","苍梧","藤县","蒙山","岑溪","海城","银海","铁山港","合浦","港口","防城","上思","东兴","钦南","钦北","灵山","浦北","港北","港南","覃塘","平南","桂平","玉州","福绵","容县","陆川","博白","兴业","北流","右江","田阳","田东","平果","德保","那坡","凌云","乐业","田林","西林","隆林","靖西","八步","平桂","昭平","钟山","富川","金城江","宜州","南丹","天峨","凤山","东兰","罗城","环江","巴马","都安","大化","兴宾","忻城","象州","武宣","金秀","合山","江州","扶绥","宁明","龙州","大新","天等","凭祥","秀英","龙华","琼山","美兰","海棠","吉阳","天涯","崖州","万州","涪陵","渝中","大渡口","江北","沙坪坝","九龙坡","南岸","北碚","綦江","大足","渝北","巴南","黔江","长寿","江津","合川","永川","南川","璧山","铜梁","潼南","荣昌","开州","梁平","武隆","城口","丰都","垫江","忠县","云阳","奉节","巫山","巫溪","石柱","秀山","酉阳","彭水","锦江","青羊","金牛","武侯","成华","龙泉驿","青白江","新都","温江","双流","郫都","金堂","大邑","蒲江","新津","都江堰","彭州","邛崃","崇州","简阳","自流井","贡井","大安","沿滩","荣县","富顺","东区","西区","仁和","米易","盐边","江阳","纳溪","龙马潭","泸县","合江","叙永","古蔺","旌阳","罗江","中江","广汉","什邡","绵竹","涪城","游仙","安州","三台","盐亭","梓潼","北川","平武","江油","利州","昭化","朝天","旺苍","青川","剑阁","苍溪","船山","安居","蓬溪","射洪","大英","市中","东兴","威远","资中","隆昌","市中","沙湾","五通桥","金口河","犍为","井研","夹江","沐川","峨边","马边","峨眉山","顺庆","高坪","嘉陵","南部","营山","蓬安","仪陇","西充","阆中","东坡","彭山","仁寿","洪雅","丹棱","青神","翠屏","南溪","叙州","江安","长宁","高县","珙县","筠连","兴文","屏山","广安","前锋","岳池","武胜","邻水","华蓥","通川","达川","宣汉","开江","大竹","渠县","万源","雨城","名山","荥经","汉源","石棉","天全","芦山","宝兴","巴州","恩阳","通江","南江","平昌","雁江","安岳","乐至","马尔康","汶川","理县","茂县","松潘","九寨沟","金川","小金","黑水","壤塘","阿坝","若尔盖","红原","康定","泸定","丹巴","九龙","雅江","道孚","炉霍","甘孜","新龙","德格","白玉","石渠","色达","理塘","巴塘","乡城","稻城","得荣","西昌","木里","盐源","德昌","会理","会东","宁南","普格","布拖","金阳","昭觉","喜德","冕宁","越西","甘洛","美姑","雷波","南明","云岩","花溪","乌当","白云","观山湖","开阳","息烽","修文","清镇","钟山","六枝特","水城","盘州","红花岗","汇川","播州","桐梓","绥阳","正安","道真","务川","凤冈","湄潭","余庆","习水","赤水","仁怀","西秀","平坝","普定","镇宁","关岭","紫云","七星关","大方","黔西","金沙","织金","纳雍","威宁","赫章","碧江","万山","江口","玉屏","石阡","思南","印江","德江","沿河","松桃","兴义","兴仁","普安","晴隆","贞丰","望谟","册亨","安龙","凯里","黄平","施秉","三穗","镇远","岑巩","天柱","锦屏","剑河","台江","黎平","榕江","从江","雷山","麻江","丹寨","都匀","福泉","荔波","贵定","瓮安","独山","平塘","罗甸","长顺","龙里","惠水","三都","五华","盘龙","官渡","西山","东川","呈贡","晋宁","富民","宜良","石林","嵩明","禄劝","寻甸","安宁","麒麟","沾益","马龙","陆良","师宗","罗平","富源","会泽","宣威","红塔","江川","澄江","通海","华宁","易门","峨山","新平","元江","隆阳","施甸","龙陵","昌宁","腾冲","昭阳","鲁甸","巧家","盐津","大关","永善","绥江","镇雄","彝良","威信","水富","古城","玉龙","永胜","华坪","宁蒗","思茅","宁洱","墨江","景东","景谷","镇沅","江城","孟连","澜沧","西盟","临翔","凤庆","云县","永德","镇康","双江","耿马","沧源","楚雄","双柏","牟定","南华","姚安","大姚","永仁","元谋","武定","禄丰","个旧","开远","蒙自","弥勒","屏边","建水","石屏","泸西","元阳","红河","金平","绿春","河口","文山","砚山","西畴","麻栗坡","马关","丘北","广南","富宁","景洪","勐海","勐腊","大理","漾濞","祥云","宾川","弥渡","南涧","巍山","永平","云龙","洱源","剑川","鹤庆","瑞丽","芒市","梁河","盈江","陇川","泸水","福贡","贡山","兰坪","香格里拉","德钦","维西","城关","堆龙德庆","达孜","林周","当雄","尼木","曲水","墨竹工卡","桑珠孜","南木林","江孜","定日","萨迦","拉孜","昂仁","谢通门","白朗","仁布","康马","定结","仲巴","亚东","吉隆","聂拉木","萨嘎","岗巴","卡若","江达","贡觉","类乌齐","丁青","察雅","八宿","左贡","芒康","洛隆","边坝","巴宜","工布江达","米林","墨脱","波密","察隅","朗县","乃东","扎囊","贡嘎","桑日","琼结","曲松","措美","洛扎","加查","隆子","错那","浪卡子","色尼","嘉黎","比如","聂荣","安多","申扎","索县","班戈","巴青","尼玛","双湖","普兰","札达","噶尔","日土","革吉","改则","措勤","新城","碑林","莲湖","灞桥","未央","雁塔","阎良","临潼","长安","高陵","鄠邑","蓝田","周至","王益","印台","耀州","宜君","渭滨","金台","陈仓","凤翔","岐山","扶风","眉县","陇县","千阳","麟游","凤县","太白","秦都","杨陵","渭城","三原","泾阳","乾县","礼泉","永寿","长武","旬邑","淳化","武功","兴平","彬州","临渭","华州","潼关","大荔","合阳","澄城","蒲城","白水","富平","韩城","华阴","宝塔","安塞","延长","延川","子长","志丹","吴起","甘泉","富县","洛川","宜川","黄龙","黄陵","汉台","南郑","城固","洋县","西乡","勉县","宁强","略阳","镇巴","留坝","佛坪","榆阳","横山","府谷","靖边","定边","绥德","米脂","佳县","吴堡","清涧","子洲","神木","汉滨","汉阴","石泉","宁陕","紫阳","岚皋","平利","镇坪","旬阳","白河","商州","洛南","丹凤","商南","山阳","镇安","柞水","城关","七里河","西固","安宁","红古","永登","皋兰","榆中","金川","永昌","白银","平川","靖远","会宁","景泰","秦州","麦积","清水","秦安","甘谷","武山","张家川","凉州","民勤","古浪","天祝","甘州","肃南","民乐","临泽","高台","山丹","崆峒","泾川","灵台","崇信","庄浪","静宁","华亭","肃州","金塔","瓜州","肃北","阿克塞","玉门","敦煌","西峰","庆城","环县","华池","合水","正宁","宁县","镇原","安定","通渭","陇西","渭源","临洮","漳县","岷县","武都","成县","文县","宕昌","康县","西和","礼县","徽县","两当","临夏","临夏","康乐","永靖","广河","和政","东乡","积石山","合作","临潭","卓尼","舟曲","迭部","玛曲","碌曲","夏河","城东","城中","城西","城北","大通","湟中","湟源","乐都","平安","民和","互助","化隆","循化","门源","祁连","海晏","刚察","同仁","尖扎","泽库","河南","共和","同德","贵德","兴海","贵南","玛沁","班玛","甘德","达日","久治","玛多","玉树","杂多","称多","治多","囊谦","曲麻莱","格尔木","德令哈","茫崖","乌兰","都兰","天峻","兴庆","西夏","金凤","永宁","贺兰","灵武","大武口","惠农","平罗","利通","红寺堡","盐池","同心","青铜峡","原州","西吉","隆德","泾源","彭阳","沙坡头","中宁","海原","天山","沙依巴克","新市","水磨沟","头屯河","达坂城","米东","乌鲁木齐","独山子","克拉玛依","白碱滩","乌尔禾","高昌","鄯善","托克逊","伊州","巴里坤哈萨克","伊吾","昌吉","阜康","呼图壁","玛纳斯","奇台","吉木萨尔","木垒哈萨克","博乐","阿拉山口","精河","温泉","库尔勒","轮台","尉犁","若羌","且末","焉耆","和静","和硕","博湖","阿克苏","温宿","库车","沙雅","新和","拜城","乌什","阿瓦提","柯坪","阿图什","阿克陶","阿合奇","乌恰","喀什","疏附","疏勒","英吉沙","泽普","莎车","叶城","麦盖提","岳普湖","伽师","巴楚","塔什库尔干塔吉克","和田","和田","墨玉","皮山","洛浦","策勒","于田","民丰","伊宁","奎屯","霍尔果斯","伊宁","察布查尔锡伯","霍城","巩留","新源","昭苏","特克斯","尼勒克","塔城","乌苏","额敏","沙湾","托里","裕民","和布克赛尔","阿勒泰","布尔津","富蕴","福海","哈巴河","青河","吉木乃","内湖","南港","中正","万华","大同","中山","松山","大安","信义","文山","士林","北投","楠梓","左营","鼓山","三民","盐埕","前金","新兴","苓雅","前镇","旗津","小港","凤山","大寮","鸟松","林园","仁武","大树","大社","冈山","路竹","桥头","梓官","弥陀","永安","燕巢","阿莲","茄萣","湖内","田寮","旗山","美浓","内门","杉林","甲仙","六龟","茂林","桃源","那玛夏","桃园","中坜","平镇","八德","杨梅","芦竹","大溪","龙潭","龟山","大园","观音","新屋","复兴","中区","东区","南区","西区","北区","北屯","西屯","南屯","太平","大里","雾峰","乌日","丰原","后里","潭子","大雅","神冈","石冈","东势","新社","和平","大肚","沙鹿","龙井","梧栖","清水","大甲","外埔","大安","中西","东区","南区","北区","安平","安南","永康","归仁","新化","左镇","玉井","楠西","南化","仁德","关庙","龙崎","官田","麻豆","佳里","西港","七股","将军","学甲","北门","新营","后壁","白河","东山","六甲","下营","柳营","盐水","善化","大内","山上","新市","安定","板桥","汐止","新店","永和","中和","土城","树林","三重","新庄","芦洲","瑞芳","三峡","莺歌","淡水","万里","金山","深坑","石碇","平溪","双溪","贡寮","坪林","乌来","泰山","林口","五股","八里","三芝","石门","中正","信义","仁爱","中山","安乐","暖暖","七堵","东区","北区","香山","东区","西区","中西","东区","南区","湾仔","九龙城","观塘","深水埗","黄大仙","油尖旺","离岛","葵青","北区","西贡","沙田","大埔","荃湾","屯门","元朗","花地玛堂","圣安多尼堂","大堂","望德堂","风顺堂","嘉模堂","圣方济各堂"]]
\ No newline at end of file
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
<module>middleware-automatic-center-webapi</module> <module>middleware-automatic-center-webapi</module>
<module>middleware-automatic-center-client-autoconfigure</module> <module>middleware-automatic-center-client-autoconfigure</module>
<module>middleware-automatic-center-server</module> <module>middleware-automatic-center-server</module>
<module>middleware-automatic-center-son</module>
</modules> </modules>
<name>middleware-automatic</name> <name>middleware-automatic</name>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment