Commit 2a9733b4 by shentao

Merge branch 'release' into 'master'

Release

See merge request !91
parents f2c21a5a 2fedede7
...@@ -11,13 +11,13 @@ ...@@ -11,13 +11,13 @@
</parent> </parent>
<artifactId>middleware-automatic-center-autoconfigure</artifactId> <artifactId>middleware-automatic-center-autoconfigure</artifactId>
<version>1.0.0.1-SNAPSHOT</version> <version>1.0.0.3-SNAPSHOT</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version> <java.version>1.8</java.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version> <automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
</properties> </properties>
<dependencies> <dependencies>
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
</parent> </parent>
<artifactId>middleware-automatic-center-client</artifactId> <artifactId>middleware-automatic-center-client</artifactId>
<version>1.0.0.2-SNAPSHOT</version> <version>1.0.0.3-SNAPSHOT</version>
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
......
...@@ -53,6 +53,10 @@ public class AutoMaticClient { ...@@ -53,6 +53,10 @@ public class AutoMaticClient {
return autoMaticService.compareWithTemplateTileOL(project, title); return autoMaticService.compareWithTemplateTileOL(project, title);
} }
public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
return autoMaticService.compareWithTemplateTileOL(project, title, isUse);
}
public PageVO<JSONObject> getTemplateDaySumData(int page, int pageSize, String templateId) { public PageVO<JSONObject> getTemplateDaySumData(int page, int pageSize, String templateId) {
return autoMaticService.getTemplateDaySumData(page, pageSize, templateId); return autoMaticService.getTemplateDaySumData(page, pageSize, templateId);
} }
......
...@@ -62,6 +62,17 @@ public interface AutoMaticService { ...@@ -62,6 +62,17 @@ public interface AutoMaticService {
* @return 返回值 * @return 返回值
*/ */
Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 根据项目组和标题在线匹配已有聚合标题 (使用中/已弃用)
*
* @param project 项目
* @param title 标题
* @param isUse 使用中
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/** /**
* 分页查询模板声量统计详细数据 * 分页查询模板声量统计详细数据
* @param templateId 模板id * @param templateId 模板id
......
package com.zhiwei.middleware.automatic.server.graphs;
import java.util.List;
import java.util.Map;
public interface GraphsServer<T, O> {
void addGraph(List<T> t);
List<O> find(String text);
}
package com.zhiwei.middleware.automatic.server.graphs;
import com.zhiwei.middleware.automatic.server.pojo.GroupTerm;
import com.zhiwei.middleware.automatic.server.pojo.MonitorKeyword;
import com.zhiwei.middleware.automatic.server.pojo.QbjcRuleMatchedInfo;
import org.apache.commons.lang3.StringUtils;
import java.util.*;
import java.util.stream.Collectors;
public class MonitorGraphsImpl implements GraphsServer<MonitorKeyword, QbjcRuleMatchedInfo> {
/**
* 匹配图
*/
private Graphs graphs;
/**
* 匹配图对应的绑定信息
*/
private Map<String, List<GroupTerm>> terms;
public MonitorGraphsImpl() {
this.graphs = new Graphs();
this.terms = new HashMap<>();
}
@Override
public void addGraph(List<MonitorKeyword> monitorKeyword) {
Graphs tempGraphs = new Graphs();
Map<String, List<GroupTerm>> tempTerms = new HashMap<>();
monitorKeyword.forEach(keyword -> {
if (null != keyword.getMonitorLevel()) {
preGraphs(keyword.getKeywords(), tempGraphs);
preTerms(keyword, tempTerms);
}
});
graphs = tempGraphs;
terms = tempTerms;
}
@Override
public List<QbjcRuleMatchedInfo> find(String text) {
text = StringUtils.lowerCase(text);
// 返回值
List<QbjcRuleMatchedInfo> res = new ArrayList<>();
// 匹配
List<Keyword> results = graphs.find(text);
// 根据Keyword:key 统计分组
Map<String, List<Keyword>> kResults = results.stream().collect(Collectors.groupingBy(Keyword::getKey));
// 已统计列表,防止A&B 被统计两次
Set<String> hasMatched = new HashSet<>();
// 统计结果
Map<String, Integer> statis = graphs.change2Statistics(results);
statis.forEach((keyword, rate) -> {
Set<Keyword> hitKeywords = new HashSet<>();
List<GroupTerm> list = terms.get(keyword);
if (null == list) {
throw new IllegalStateException("keyword不存在:" + keyword);
}
// 可能符合的结果
list.forEach(groupTerm -> {
MonitorKeyword monitorKeyword = groupTerm.getMonitorKeyword();
int count = -1;
for (String checkWord : groupTerm.getAndKeywords()) {
// 已统计过,跳过该词组
if (hasMatched.contains(checkWord)) {
count = -1;
break;
}
int current = statis.getOrDefault(checkWord, 0);
if (current > 0) {
hitKeywords.addAll(kResults.get(checkWord));
}
// 初次重置或大于最小值
count = (count == -1 || current < count) ? current : count;
}
// 大于0判定为命中
if (count > 0) {
QbjcRuleMatchedInfo ruleMatchedInfo = new QbjcRuleMatchedInfo();
// 设置基本信息
ruleMatchedInfo.setId(monitorKeyword.getId());
ruleMatchedInfo.setProject(monitorKeyword.getProject());
ruleMatchedInfo.setRuleType(QbjcRuleMatchedInfo.RuleType.getByName(monitorKeyword.getType()));
ruleMatchedInfo.setChannels(monitorKeyword.getChannels());
ruleMatchedInfo.setPlatforms(monitorKeyword.getPlatforms());
ruleMatchedInfo.setMonitorLevel(monitorKeyword.getMonitorLevel());
// 设置匹配关键词相关信息
List<QbjcRuleMatchedInfo.HitInfo> infos = new ArrayList<>();
infos.add(new QbjcRuleMatchedInfo.HitInfo(hitKeywords, groupTerm.getFullName(), count));
ruleMatchedInfo.setHitInfos(infos);
res.add(ruleMatchedInfo);
}
});
hasMatched.add(keyword);
});
return res;
}
/**
* 添加词关联表
*/
private void preTerms(MonitorKeyword monitorKeyword, Map<String, List<GroupTerm>> terms) {
List<String> usedKeywords = monitorKeyword.getKeywords();
// 解析字词
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
// 拆分或逻辑(都可以作为主键)
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
String[] ands = andStr.trim().split(" +");
for (String str : ands) {
// 初次创建
terms.putIfAbsent(str, new ArrayList<>());
// 添加对应节点
terms.get(str).add(new GroupTerm(Arrays.asList(ands), usedKeyword, monitorKeyword));
}
}
});
}
/**
* 添加关键字表
*
* @param usedKeywords void
*/
private void preGraphs(List<String> usedKeywords, Graphs graphs) {
// 拆分关键词到子
usedKeywords.forEach(usedKeyword -> {
usedKeyword = StringUtils.lowerCase(usedKeyword);
String[] andStrs = usedKeyword.split("\\|");
for (String andStr : andStrs) {
for (String str : andStr.trim().split(" +")) {
graphs.addGraph(str);
}
}
});
}
}
...@@ -7,6 +7,7 @@ import java.math.BigInteger; ...@@ -7,6 +7,7 @@ import java.math.BigInteger;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
public class TemplateTitleVo implements Serializable { public class TemplateTitleVo implements Serializable {
...@@ -19,6 +20,9 @@ public class TemplateTitleVo implements Serializable { ...@@ -19,6 +20,9 @@ public class TemplateTitleVo implements Serializable {
private long daySum; private long daySum;
private String mtag; private String mtag;
private String url; private String url;
private List<String> textNumbers;
private TemplateStatus status; private TemplateStatus status;
public String getId() { public String getId() {
...@@ -99,6 +103,14 @@ public class TemplateTitleVo implements Serializable { ...@@ -99,6 +103,14 @@ public class TemplateTitleVo implements Serializable {
this.url = url; this.url = url;
} }
public List<String> getTextNumbers() {
return textNumbers;
}
public void setTextNumbers(List<String> textNumbers) {
this.textNumbers = textNumbers;
}
public TemplateStatus getStatus() { public TemplateStatus getStatus() {
return status; return status;
} }
...@@ -108,7 +120,7 @@ public class TemplateTitleVo implements Serializable { ...@@ -108,7 +120,7 @@ public class TemplateTitleVo implements Serializable {
} }
public TemplateTitleVo(String templateTitle, String mtag, String url) { public TemplateTitleVo(String templateTitle, String mtag, String url, List<String> textNumbers) {
this.updateTime = new Date(); this.updateTime = new Date();
this.createTime = new Date(); this.createTime = new Date();
this.templateTitle = templateTitle; this.templateTitle = templateTitle;
...@@ -116,6 +128,7 @@ public class TemplateTitleVo implements Serializable { ...@@ -116,6 +128,7 @@ public class TemplateTitleVo implements Serializable {
this.daySum = 0L; this.daySum = 0L;
this.mtag = mtag; this.mtag = mtag;
this.url = url; this.url = url;
this.textNumbers = textNumbers;
this.status = TemplateStatus.运行中; this.status = TemplateStatus.运行中;
} }
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version> <qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version> <nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
<dubbo-server.version>2.7.4.1</dubbo-server.version> <dubbo-server.version>2.7.4.1</dubbo-server.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version> <automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
<base.version>2.0.0-SNAPSHOT</base.version> <base.version>2.0.0-SNAPSHOT</base.version>
</properties> </properties>
......
...@@ -69,6 +69,17 @@ public interface AutoMaticService { ...@@ -69,6 +69,17 @@ public interface AutoMaticService {
*/ */
Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title);
/**
* 根据项目组和标题在线匹配已有聚合标题 (使用中/已弃用)
*
* @param project 项目
* @param title 标题
* @param isUse 使用中
* @return 返回值
*/
Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/** /**
* 分页查询模板声量统计详细数据 * 分页查询模板声量统计详细数据
* @param templateId 模板id * @param templateId 模板id
......
...@@ -68,7 +68,7 @@ public class AutoMaticServiceImpl implements AutoMaticService { ...@@ -68,7 +68,7 @@ public class AutoMaticServiceImpl implements AutoMaticService {
} }
@Override @Override
public Map<String, Object> getTemplateTitleByProjectByLive( String project, String searchInfo, List<String> tags, long startTime, public Map<String, Object> getTemplateTitleByProjectByLive(String project, String searchInfo, List<String> tags, long startTime,
long endTime, int page, int size, String orderField, boolean isAsc) { long endTime, int page, int size, String orderField, boolean isAsc) {
return templateTitleService.searchTemplateTitle(GenericAttribute.REDIS_MAP_KEY, project, searchInfo, tags, startTime, endTime, page, size, orderField, isAsc); return templateTitleService.searchTemplateTitle(GenericAttribute.REDIS_MAP_KEY, project, searchInfo, tags, startTime, endTime, page, size, orderField, isAsc);
} }
...@@ -86,7 +86,12 @@ public class AutoMaticServiceImpl implements AutoMaticService { ...@@ -86,7 +86,12 @@ public class AutoMaticServiceImpl implements AutoMaticService {
@Override @Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) { public Map<String, Object> compareWithTemplateTileOL(String project, String title) {
return templateTitleService.compareWithTemplateTileOL(project, title); return templateTitleService.compareWithTemplateTileOL(project, title, true);
}
@Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
return templateTitleService.compareWithTemplateTileOL(project, title, isUse);
} }
@Override @Override
......
...@@ -56,7 +56,7 @@ public interface TemplateTitleService { ...@@ -56,7 +56,7 @@ public interface TemplateTitleService {
* @param title 标题 * @param title 标题
* @return 返回值 * @return 返回值
*/ */
Map<String, Object> compareWithTemplateTileOL(String project, String title); Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse);
/** /**
* 分页查询模板声量统计详细数据 * 分页查询模板声量统计详细数据
......
...@@ -126,21 +126,20 @@ public class TemplateTitleServiceImpl implements TemplateTitleService { ...@@ -126,21 +126,20 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
} }
@Override @Override
public Map<String, Object> compareWithTemplateTileOL(String project, String title) { public Map<String, Object> compareWithTemplateTileOL(String project, String title, boolean isUse) {
Map<String, Object> res = new HashMap<>(3); Map<String, Object> res = new HashMap<>(3);
res.put("isMatched", false); res.put("isMatched", false);
if (StringUtils.isEmpty(title) || StringUtils.isEmpty(project)) { if (StringUtils.isEmpty(title) || StringUtils.isEmpty(project)) {
res.put("errorMessage", "标题或项目为空"); res.put("errorMessage", "标题或项目为空");
return res; return res;
} }
Map<String, TemplateTitleVo> templateTitleVoMap = getTemplateTitleByProjectLive(project); Map<String, TemplateTitleVo> templateTitleVoMap = isUse ? getTemplateTitleByProjectLive(project) : getTemplateTitleByProjectLost(project);
if (templateTitleVoMap.isEmpty()) { if (templateTitleVoMap.isEmpty()) {
res.put("errorMessage", "该项目组未有聚合集"); res.put("errorMessage", "该项目组未有聚合集");
return res; return res;
} }
TemplateTitleVo titleVo = null; TemplateTitleVo titleVo = null;
double similarity = 0.0; double similarity = 0.0;
String group = null;
for (Map.Entry<String, TemplateTitleVo> entry : templateTitleVoMap.entrySet()) { for (Map.Entry<String, TemplateTitleVo> entry : templateTitleVoMap.entrySet()) {
if (TemplateStatus.已重置 == entry.getValue().getStatus()) { if (TemplateStatus.已重置 == entry.getValue().getStatus()) {
continue; continue;
...@@ -150,7 +149,6 @@ public class TemplateTitleServiceImpl implements TemplateTitleService { ...@@ -150,7 +149,6 @@ public class TemplateTitleServiceImpl implements TemplateTitleService {
if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) { if (currentSimilarity > GenericAttribute.SIMILAR_STANDARD && currentSimilarity > similarity) {
similarity = currentSimilarity; similarity = currentSimilarity;
titleVo = entry.getValue(); titleVo = entry.getValue();
group = entry.getKey();
} }
} }
if (Objects.nonNull(titleVo)) { if (Objects.nonNull(titleVo)) {
......
...@@ -35,15 +35,22 @@ public class Tools { ...@@ -35,15 +35,22 @@ public class Tools {
* @return true:符合 false:不符合 * @return true:符合 false:不符合
*/ */
public static boolean filterTag(List<String> tags, String mtag) { public static boolean filterTag(List<String> tags, String mtag) {
boolean res = true;
if (tags != null) { if (tags != null) {
for (String tag : tags) { for (String tag : tags) {
if (mtag.equals(Tools.sortTag(tag))) { // if (mtag.equals(Tools.sortTag(tag))) {
return true; // return true;
// }
if (!res) {
break;
} }
res = mtag.contains(tag);
}
if (tags.isEmpty() && "".equals(mtag)) {
return true;
} }
return tags.isEmpty() && "".equals(mtag);
} }
return true; return res;
} }
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
<qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version> <qbjc-bean.version>1.1.4.1-SNAPSHOT</qbjc-bean.version>
<kafka.version>2.4.1.RELEASE</kafka.version> <kafka.version>2.4.1.RELEASE</kafka.version>
<base.version>2.0.0-SNAPSHOT</base.version> <base.version>2.0.0-SNAPSHOT</base.version>
<automatic.version>1.0.0.2-SNAPSHOT</automatic.version> <automatic.version>1.0.0.3-SNAPSHOT</automatic.version>
<marker.version>1.2.3-SNAPSHOT</marker.version> <marker.version>1.2.3-SNAPSHOT</marker.version>
<filter.version>1.1.6-SNAPSHOT</filter.version> <filter.version>1.1.6-SNAPSHOT</filter.version>
<nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version> <nlp-aggree.version>0.0.5-SNAPSHOT</nlp-aggree.version>
......
package com.zhiwei.middleware.automatic.son.task.holder; package com.zhiwei.middleware.automatic.son.task.holder;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask; import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType; import com.zhiwei.middleware.automatic.server.pojo.enums.TaskType;
import com.zhiwei.middleware.automatic.son.task.service.TaskService; import com.zhiwei.middleware.automatic.son.task.service.TaskService;
......
...@@ -26,6 +26,8 @@ import org.springframework.beans.factory.annotation.Qualifier; ...@@ -26,6 +26,8 @@ import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import org.springframework.util.CollectionUtils;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
...@@ -34,26 +36,22 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo ...@@ -34,26 +36,22 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
private static final Logger log = LogManager.getLogger(TaskServiceCommon.class); private static final Logger log = LogManager.getLogger(TaskServiceCommon.class);
private final static double NUMBER_SIMILAR_STANDARD = 0.29;
private final RedissonUtil redissonUtil; private final RedissonUtil redissonUtil;
private final TemplateTitleService templateTitleService; private final TemplateTitleService templateTitleService;
private final DubboHandler dubboHandler; private final DubboHandler dubboHandler;
private final TemplateRecordDao templateRecordDao;
private final ThreadPoolTaskExecutor autoMarkExecutor; private final ThreadPoolTaskExecutor autoMarkExecutor;
private final static String COUNT_KEY = "count";
public TaskServiceCommon(RedissonUtil redissonUtil, TemplateTitleService templateTitleService, public TaskServiceCommon(RedissonUtil redissonUtil, TemplateTitleService templateTitleService,
DubboHandler dubboHandler, DubboHandler dubboHandler,
TemplateRecordDao templateRecordDao,
@Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) { @Qualifier("autMarkExecutor") ThreadPoolTaskExecutor autoMarkExecutor) {
this.redissonUtil = redissonUtil; this.redissonUtil = redissonUtil;
this.templateTitleService = templateTitleService; this.templateTitleService = templateTitleService;
this.dubboHandler = dubboHandler; this.dubboHandler = dubboHandler;
this.templateRecordDao = templateRecordDao;
this.autoMarkExecutor = autoMarkExecutor; this.autoMarkExecutor = autoMarkExecutor;
taskCache(TaskType.COMMON_TWO, this::getMultiAutoInfo); taskCache(TaskType.COMMON_TWO, this::getMultiAutoInfo);
taskCache(TaskType.COMMON_ONE, this::getOneAutoInfo); taskCache(TaskType.COMMON_ONE, this::getOneAutoInfo);
...@@ -154,7 +152,7 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo ...@@ -154,7 +152,7 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
* 项目自动标注 * 项目自动标注
* @param groupMap 数据集 项目名分类 * @param groupMap 数据集 项目名分类
*/ */
private void projectAutoMark(Map<String, List<MarkInfo>> groupMap) { public void projectAutoMark(Map<String, List<MarkInfo>> groupMap) {
for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) { for (Map.Entry<String, List<MarkInfo>> entry : groupMap.entrySet()) {
try { try {
Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProjectLive(entry.getKey()); Map<String, TemplateTitleVo> template = templateTitleService.getTemplateTitleByProjectLive(entry.getKey());
...@@ -263,9 +261,6 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo ...@@ -263,9 +261,6 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
// // 特征值记录 // // 特征值记录
templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), title, id, getUrl(markInfo), getPlatform(markInfo), templateTitleService.insertTemplateRecord(new TemplateRecord(templateTitleVo.getId(), title, id, getUrl(markInfo), getPlatform(markInfo),
sourceObj.getString("source"), sourceObj.getString("real_source"), Tools.TIME_FORMAT.format(sourceObj.getLong("time")), updates[0])); sourceObj.getString("source"), sourceObj.getString("real_source"), Tools.TIME_FORMAT.format(sourceObj.getLong("time")), updates[0]));
// 测试环境临时添加,用于对比
// templateRecordDao.tempRecord(new TemplateTempRecord(templateTitleVo.getId(), templateTitleVo.getTemplateTitle(), group, getUrl(markInfo),
// id, templateTitleVo.getMtag()));
return true; return true;
} catch (Exception e) { } catch (Exception e) {
log.error("记录事件采集-标注数据特征值失败", e); log.error("记录事件采集-标注数据特征值失败", e);
...@@ -305,19 +300,41 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo ...@@ -305,19 +300,41 @@ public class TaskServiceCommon extends BaseTaskTypePair<TaskServiceCommon.TaskCo
continue; continue;
} }
String aggreTitle = templateTitleVo.getTemplateTitle(); String aggreTitle = templateTitleVo.getTemplateTitle();
// templateTitleVo.setTextNumbers(Tools.numberMatch(Tools.filterSymbol(aggreTitle)));
// 过滤掉标题里面的特殊符号 // 过滤掉标题里面的特殊符号
double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title); double similar = CosineSimilarity.calculateTextSimWithBrand(aggreTitle, title);
double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0; double currentSimilar = similarMap.get("similar") != null ? (double) similarMap.get("similar") : 0.0;
double similarStandard = textNumberMatch(templateTitleVo.getTextNumbers(), title) ? GenericAttribute.SIMILAR_STANDARD : GenericAttribute.SIMILAR_STANDARD - NUMBER_SIMILAR_STANDARD;
// 选取相似度最大的标注 // 选取相似度最大的标注
if (similar >= GenericAttribute.SIMILAR_STANDARD && similar > currentSimilar) { if (similar >= similarStandard && similar > currentSimilar) {
similarMap.put("similar", similar); similarMap.put("similar", similar);
similarMap.put("aggreeTitle", aggreTitle); similarMap.put("aggreeTitle", aggreTitle);
} }
if (similar == 1.0) {
break;
}
} }
return similarMap; return similarMap;
} }
public static MarkInfo getMarkInfo(JSONObject hit) { private boolean textNumberMatch(List<String> textNumber, String text) {
boolean res = false;
if (CollectionUtils.isEmpty(textNumber)) {
return res;
}
List<String> numberMatch = Tools.numberMatch(text);
if (CollectionUtils.isEmpty(numberMatch) || numberMatch.size() != textNumber.size()) {
return res;
}
for (String number : textNumber) {
if (!numberMatch.contains(number)) {
return res;
}
}
return true;
}
private static MarkInfo getMarkInfo(JSONObject hit) {
int c2 = Integer.parseInt(String.valueOf(hit.get("c2"))); int c2 = Integer.parseInt(String.valueOf(hit.get("c2")));
switch (ClassB.TypeB.fromEncode(c2)){ switch (ClassB.TypeB.fromEncode(c2)){
case COMPLETE: case COMPLETE:
......
...@@ -43,6 +43,8 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta ...@@ -43,6 +43,8 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta
private final Logger log = LogManager.getLogger(TaskServiceTemplate.class); private final Logger log = LogManager.getLogger(TaskServiceTemplate.class);
private static final List<String> FILTER_TITLE = Arrays.asList("Notitle");
private final TemplateTitleService templateTitleService; private final TemplateTitleService templateTitleService;
private final EsDao esDao; private final EsDao esDao;
...@@ -245,7 +247,11 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta ...@@ -245,7 +247,11 @@ public class TaskServiceTemplate extends BaseTaskTypePair<TaskServiceTemplate.Ta
if (title.length() < 6) { if (title.length() < 6) {
continue; continue;
} }
TemplateTitleVo templateTitleVo = new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl()); // 指定标题 不进行聚合
if (FILTER_TITLE.contains(title)) {
continue;
}
TemplateTitleVo templateTitleVo = new TemplateTitleVo(title, tag, sourceList.get(result.getDataPoints().get(0)).getUrl(), Tools.numberMatch(title));
templateTitleVo.buildId(group); templateTitleVo.buildId(group);
aggregationTitleTagMap.put(title, templateTitleVo); aggregationTitleTagMap.put(title, templateTitleVo);
} }
......
package com.zhiwei.middleware.automatic.son.util; package com.zhiwei.middleware.automatic.son.util;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.zhiwei.middleware.automatic.server.pojo.AutoTask;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils; import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.FastDateFormat; import org.apache.commons.lang3.time.FastDateFormat;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public class Tools { public class Tools {
private static final Pattern SYMBOL_PATTERN = Pattern private static final Pattern SYMBOL_PATTERN = Pattern
.compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]"); .compile("[\\p{P}+~$`^=丨|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000]");
private static final Pattern NUMBER_PATTERN = Pattern.compile("\\d+");
public static final FastDateFormat TIME_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss"); public static final FastDateFormat TIME_FORMAT = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss");
/** /**
* 是否为空,数据为空 * 是否为空,数据为空
...@@ -116,4 +119,13 @@ public class Tools { ...@@ -116,4 +119,13 @@ public class Tools {
} }
return res; return res;
} }
public static List<String> numberMatch(String text) {
List<String> res = new ArrayList<>();
Matcher matcher = NUMBER_PATTERN.matcher(text);
while (matcher.find()) {
res.add(matcher.group());
}
return res;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment