Commit 2a9733b4 by shentao

Merge branch 'release' into 'master'

Release

See merge request !91
parents f2c21a5a 2fedede7
......@@ -11,13 +11,13 @@
</parent>
<artifactId>middleware-automatic-center-autoconfigure</artifactId>
<version>1.0.0.1-SNAPSHOT</version>
<version>1.0.0.3-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version>
<automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
</properties>
<dependencies>
......
......@@ -11,7 +11,7 @@
</parent>
<artifactId>middleware-automatic-center-client</artifactId>
<version>1.0.0.2-SNAPSHOT</version>
<version>1.0.0.3-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
......
......@@ -53,6 +53,10 @@ public class AutoMaticClient {
return autoMaticService.compareWithTemplateTileOL(project, title);
}
public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
return autoMaticService.compareWithTemplateTileOL(project, title, isUse);
}
public PageVO<JSONObject> getTemplateDaySumData(int page, int pageSize, String templateId) {
return autoMaticService.getTemplateDaySumData(page, pageSize, templateId);
}
......
......@@ -62,6 +62,17 @@ public interface AutoMaticService {
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 根据项目组和标题在线匹配已有聚合标题 (使用中/已弃用)
*
* @param project 项目
* @param title 标题
* @param isUse 使用中
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/**
* 分页查询模板声量统计详细数据
* @param templateId 模板id
......
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.List;
import java.util.Map;
public interface GraphsServer<T, O> {
void addGraph(List<T> t);
List<O> find(String text);
}
package com.zhiwei.middleware.automatic.server.graphs;
import com.zhiwei.middleware.automatic.server.pojo.GroupTerm;
import com.zhiwei.middleware.automatic.server.pojo.MonitorKeyword;
import com.zhiwei.middleware.automatic.server.pojo.QbjcRuleMatchedInfo;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
public class MonitorGraphsImpl implements GraphsServer<MonitorKeyword, QbjcRuleMatchedInfo> {
/**
* 匹配图
*/
private Graphs graphs;
/**
* 匹配图对应的绑定信息
*/
private Map<String, List<GroupTerm>> terms;
public MonitorGraphsImpl() {
this.graphs = new Graphs();
this.terms = new HashMap<>();
}
@Override
public void addGraph(List<MonitorKeyword> monitorKeyword) {
Graphs tempGraphs = new Graphs();
Map<String, List<GroupTerm>> tempTerms = new HashMap<>();
monitorKeyword.forEach(keyword -> {
if (null != keyword.getMonitorLevel()) {
preGraphs(keyword.getKeywords(), tempGraphs);
preTerms(keyword, tempTerms);
}
});
graphs = tempGraphs;
terms = tempTerms;
}
@Override
public List<QbjcRuleMatchedInfo> find(String text) {
text = StringUtils.lowerCase(text);
// 返回值
List<QbjcRuleMatchedInfo> res = new ArrayList<>();
// 匹配
List<Keyword> results = graphs.find(text);
// 根据Keyword:key 统计分组
Map<String, List<Keyword>> kResults = results.stream().collect(Collectors.groupingBy(Keyword::getKey));
// 已统计列表,防止A&B 被统计两次
Set<String> hasMatched = new HashSet<>();
// 统计结果
Map<String, Integer> statis = graphs.change2Statistics(results);
statis.forEach((keyword, rate) -> {
Set<Keyword> hitKeywords = new HashSet<>();
List<GroupTerm> list = terms.get(keyword);
if (null == list) {
throw new IllegalStateException("keyword不存在:" + keyword);
}
// 可能符合的结果
list.forEach(groupTerm -> {
MonitorKeyword monitorKeyword = groupTerm.getMonitorKeyword();
int count = -1;
for (String checkWord : groupTerm.getAndKeywords()) {
// 已统计过,跳过该词组
if (hasMatched.contains(checkWord)) {
count = -1;
break;
}
int current = statis.getOrDefault(checkWord, 0);
if (current > 0) {
hitKeywords.addAll(kResults.get(checkWord));
}
// 初次重置或大于最小值
count = (count == -1 || current < count) ? current : count;
}
// 大于0判定为命中
if (count > 0) {
QbjcRuleMatchedInfo ruleMatchedInfo = new QbjcRuleMatchedInfo();
// 设置基本信息
ruleMatchedInfo.setId(monitorKeyword.getId());
ruleMatchedInfo.setProject(monitorKeyword.getProject());
ruleMatchedInfo.setRuleType(QbjcRuleMatchedInfo.RuleType.getByName(monitorKeyword.getType()));
ruleMatchedInfo.setChannels(monitorKeyword.getChannels());
ruleMatchedInfo.setPlatforms(monitorKeyword.getPlatforms());
ruleMatchedInfo.setMonitorLevel(monitorKeyword.getMonitorLevel());
// 设置匹配关键词相关信息
List<QbjcRuleMatchedInfo.HitInfo> infos = new ArrayList<>();
infos.add(new QbjcRuleMatchedInfo.HitInfo(hitKeywords, groupTerm.getFullName(), count));
ruleMatchedInfo.setHitInfos(infos);
res.add(ruleMatchedInfo);
}
});
hasMatched.add(keyword);
});
return res;
}
/**
* 添加词关联表
*/
private void preTerms(MonitorKeyword monitorKeyword, Map<String, List<GroupTerm>> terms) {
List<String> usedKeywords = monitorKeyword.getKeywords();
// 解析字词
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
// 拆分或逻辑(都可以作为主键)
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
String[] ands = andStr.trim().split(" +");
for (String str : ands) {
// 初次创建
terms.putIfAbsent(str, new ArrayList<>());
// 添加对应节点
terms.get(str).add(new GroupTerm(Arrays.asList(ands), usedKeyword, monitorKeyword));
}
}
});
}
/**
* 添加关键字表
*
* @param usedKeywords void
*/
private void preGraphs(List<String> usedKeywords, Graphs graphs) {
// 拆分关键词到子
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
for (String str : andStr.trim().split(" +")) {
graphs.addGraph(str);
}
}
});
}
}
......@@ -7,6 +7,7 @@ import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
public class TemplateTitleVo implements Serializable {
......@@ -19,6 +20,9 @@ public class TemplateTitleVo implements Serializable {
private long daySum;
private String mtag;
private String url;
private List<String> textNumbers;
private TemplateStatus status;
public String getId() {
......@@ -99,6 +103,14 @@ public class TemplateTitleVo implements Serializable {
this.url = url;
}
public List<String> getTextNumbers() {
return textNumbers;
}
public void setTextNumbers(List<String> textNumbers) {
this.textNumbers = textNumbers;
}
public TemplateStatus getStatus() {
return status;
}
......@@ -108,7 +120,7 @@ public class TemplateTitleVo implements Serializable {
}
public TemplateTitleVo(String templateTitle, String mtag, String url) {
public TemplateTitleVo(String templateTitle, String mtag, String url, List<String> textNumbers) {
this.updateTime = new Date();
this.createTime = new Date();
this.templateTitle = templateTitle;
......@@ -116,6 +128,7 @@ public class TemplateTitleVo implements Serializable {
this.daySum = 0L;
this.mtag = mtag;
this.url = url;
this.textNumbers = textNumbers;
this.status = TemplateStatus.运行中;
}
......
......@@ -21,7 +21,7 @@
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
<dubbo-server.version>2.7.4.1</dubbo-server.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version>
<automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
<base.version>2.0.0-SNAPSHOT</base.version>
</properties>
......
......@@ -69,6 +69,17 @@ public interface AutoMaticService {
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 根据项目组和标题在线匹配已有聚合标题 (使用中/已弃用)
*
* @param project 项目
* @param title 标题
* @param isUse 使用中
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/**
* 分页查询模板声量统计详细数据
* @param templateId 模板id
......
......@@ -68,7 +68,7 @@ public class AutoMaticServiceImpl implements AutoMaticService {
}
@Override
public Map<String, Object> getTemplateTitleByProjectByLive( String project, String searchInfo, List<String> tags, long startTime,
public Map<String, Object> getTemplateTitleByProjectByLive(String project, String searchInfo, List<String> tags, long startTime,
long endTime, int page, int size, String orderField, boolean isAsc) {
return templateTitleService.searchTemplateTitle(GenericAttribute.REDIS_MAP_KEY, project, searchInfo, tags, startTime, endTime, page, size, orderField, isAsc);
}
......@@ -86,7 +86,12 @@ public class AutoMaticServiceImpl implements AutoMaticService {
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return templateTitleService.compareWithTemplateTileOL(project, title);
return templateTitleService.compareWithTemplateTileOL(project, title, true);
}
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
return templateTitleService.compareWithTemplateTileOL(project, title, isUse);
}
@Override
......
......@@ -56,7 +56,7 @@ public interface TemplateTitleService {
* @param title 标题
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title);
Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/**
* 分页查询模板声量统计详细数据
......
......@@ -126,21 +126,20 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
}
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
Map<String, Object> res = new HashMap<>(3);
res.put("isMatched", false);
if (StringUtils.isEmpty(title) || StringUtils.isEmpty(project)) {
res.put("errorMessage", "标题或项目为空");
return res;
}
Map<String, TemplateTitleVo> templateTitleVoMap = getTemplateTitleByProjectLive(project);
Map<String, TemplateTitleVo> templateTitleVoMap = isUse ? getTemplateTitleByProjectLive(project) : getTemplateTitleByProjectLost(project);
if (templateTitleVoMap.isEmpty()) {
res.put("errorMessage", "该项目组未有聚合集");
return res;
}
TemplateTitleVo titleVo = null;
double similarity = 0.0;
String group = null;
for (Map.Entry<String, TemplateTitleVo> entry : templateTitleVoMap.entrySet()) {
if (TemplateStatus.已重置 == entry.getValue().getStatus()) {
continue;
......@@ -150,7 +149,6 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) {
similarity = currentSimilarity;
titleVo = entry.getValue();
group = entry.getKey();
}
}
if (Objects.nonNull(titleVo)) {
......
......@@ -35,15 +35,22 @@ public class Tools {
* @return true:符合 false:不符合
*/
public static boolean filterTag(List<String> tags, String mtag) {
boolean res = true;
if (tags != null) {
for (String tag : tags) {
if (mtag.equals(Tools.sortTag(tag))) {
return true;
// if (mtag.equals(Tools.sortTag(tag))) {
// return true;
// }
if (!res) {
break;
}
res = mtag.contains(tag);
}
if (tags.isEmpty() && "".equals(mtag)) {
return true;
}
return tags.isEmpty() && "".equals(mtag);
}
return true;
return res;
}
......
......@@ -24,7 +24,7 @@
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<kafka.version>2.4.1.RELEASE</kafka.version>
<base.version>2.0.0-SNAPSHOT</base.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version>
<automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
<marker.version>1.2.3-SNAPSHOT</marker.version>
<filter.version>1.1.6-SNAPSHOT</filter.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
......
package com.zhiwei.middleware.automatic.son.task.holder;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.son.task.service.TaskService;
......
......@@ -26,6 +26,8 @@ import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.util.CollectionUtils;
import java.util.*;
import java.util.stream.Collectors;
......@@ -34,26 +36,22 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
private static final Logger log = LogManager.getLogger(TaskServiceCommon.class);
private final static double NUMBER_SIMILAR_STANDARD = 0.29;
private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService;
private final DubboHandler dubboHandler;
private final TemplateRecordDao templateRecordDao;
private final ThreadPoolTaskExecutor autoMarkExecutor;
private final static String COUNT_KEY = "count";
public TaskServiceCommon(RedissonUtil redissonUtil, TemplateTitleService templateTitleService,
DubboHandler dubboHandler,
TemplateRecordDao templateRecordDao,
@Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) {
this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService;
this.dubboHandler = dubboHandler;
this.templateRecordDao = templateRecordDao;
this.autoMarkExecutor = autoMarkExecutor;
taskCache(TaskType.COMMON_TWO, this::getMultiAutoInfo);
taskCache(TaskType.COMMON_ONE, this::getOneAutoInfo);
......@@ -154,7 +152,7 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
* 项目自动标注
* @param groupMap 数据集 项目名分类
*/
private void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
public void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) {
try {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProjectLive(entry.getKey());
......@@ -263,9 +261,6 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
// // 特征值记录
templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), title, id, getUrl(markInfo), getPlatform(markInfo),
sourceObj.getString("source"), sourceObj.getString("real_source"), Tools.TIME_FORMAT.format(sourceObj.getLong("time")), updates[0]));
// 测试环境临时添加,用于对比
// templateRecordDao.tempRecord(new TemplateTempRecord(templateTitleVo.getId(), templateTitleVo.getTemplateTitle(), group, getUrl(markInfo),
// id, templateTitleVo.getMtag()));
return true;
} catch (Exception e) {
log.error("记录事件采集-标注数据特征值失败", e);
......@@ -305,19 +300,41 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
continue;
}
String aggreTitle = templateTitleVo.getTemplateTitle();
// templateTitleVo.setTextNumbers(Tools.numberMatch(Tools.filterSymbol(aggreTitle)));
// 过滤掉标题里面的特殊符号
double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title);
double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0;
double similarStandard = textNumberMatch(templateTitleVo.getTextNumbers(), title) ? GenericAttribute.SIMILAR_STANDARD : GenericAttribute.SIMILAR_STANDARD - NUMBER_SIMILAR_STANDARD;
// 选取相似度最大的标注
if (similar >= GenericAttribute.SIMILAR_STANDARD && similar > currentSimilar) {
if (similar >= similarStandard && similar > currentSimilar) {
similarMap.put("similar", similar);
similarMap.put("aggreeTitle", aggreTitle);
}
if (similar == 1.0) {
break;
}
}
return similarMap;
}
public static MarkInfo getMarkInfo(JSONObject hit) {
private boolean textNumberMatch(List<String> textNumber, String text) {
boolean res = false;
if (CollectionUtils.isEmpty(textNumber)) {
return res;
}
List<String> numberMatch = Tools.numberMatch(text);
if (CollectionUtils.isEmpty(numberMatch) || numberMatch.size() != textNumber.size()) {
return res;
}
for (String number : textNumber) {
if (!numberMatch.contains(number)) {
return res;
}
}
return true;
}
private static MarkInfo getMarkInfo(JSONObject hit) {
int c2 = Integer.parseInt(String.valueOf(hit.get("c2")));
switch (ClassB.TypeB.fromEncode(c2)){
case COMPLETE:
......
......@@ -43,6 +43,8 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta
private final Logger log = LogManager.getLogger(TaskServiceTemplate.class);
private static final List<String> FILTER_TITLE = Arrays.asList("Notitle");
private final TemplateTitleService templateTitleService;
private final EsDao esDao;
......@@ -245,7 +247,11 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta
if (title.length() < 6) {
continue;
}
TemplateTitleVo templateTitleVo = new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl());
// 指定标题 不进行聚合
if (FILTER_TITLE.contains(title)) {
continue;
}
TemplateTitleVo templateTitleVo = new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl(), Tools.numberMatch(title));
templateTitleVo.buildId(group);
aggregationTitleTagMap.put(title, templateTitleVo);
}
......
package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSON;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Tools {
private static final Pattern SYMBOL_PATTERN = Pattern
.compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]");
private static final Pattern NUMBER_PATTERN = Pattern.compile("\\d+");
public static final FastDateFormat TIME_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss");
/**
* 是否为空,数据为空
......@@ -116,4 +119,13 @@ public class Tools {
}
return res;
}
public static List<String> numberMatch(String text) {
List<String> res = new ArrayList<>();
Matcher matcher = NUMBER_PATTERN.matcher(text);
while (matcher.find()) {
res.add(matcher.group());
}
return res;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment