Commit 8c21bc05 by shentao

Merge branch 'feature' into 'dev'

2024/6/7 原发溯源-关键词匹配逻辑调整、事件影响力更新结果导出表头调整 test

See merge request !533
parents 9347af14 bea1ff0b
......@@ -16,15 +16,15 @@ public class ExportBytedanceEventComputeResultDTO {
@ExcelProperty("事件名")
private String name;
@ExcelProperty("渠道指标")
@ExcelProperty("综合影响力")
private Double finalInf;
@ExcelProperty("渠道指数")
private Double inf;
@ExcelProperty("流量指")
@ExcelProperty("流量指")
private Double RMI;
@ExcelProperty("综合影响力")
private Double finalInf;
@ExcelProperty("传播量")
private Integer dataCount;
}
......@@ -103,6 +103,8 @@ public class MarkDataServiceImpl implements MarkDataService {
private static final String[] EVENT_FETCH_SOURCE = new String[]{"id", "ind_full_text", "c5", "real_source", "source", "mtime", "time", "url", "mtag", "foreign", "brandkbs_mark_cache_maps"};
private static final String ORIGIN_SEARCH_PATTERN = "[\\p{P}+~$`^=|<>~`$^+=|<>¥×\\s\u200B\u200C\u200D\u00A0\u0020\u3000\\d]";
@Value("${istarshine.addIStarShineKSData.url}")
private String addIStarShineKSDataUrl;
......@@ -3515,7 +3517,6 @@ public class MarkDataServiceImpl implements MarkDataService {
try {
BoolQueryBuilder query = getAllIndexSearchOriginQuery(dto);
String[] indexes = esClientDao.getAllIndexes(dto.getStartTime(), dto.getEndTime());
Long total = esClientDao.count(indexes, query, null);
// platform query
if (Objects.equals("其他", dto.getPlatforms().get(0))){
List<String> platforms = getWholeSearchPlatformPercentage(dto).stream().map(json -> json.getString("platform")).collect(Collectors.toList());
......@@ -3526,9 +3527,10 @@ public class MarkDataServiceImpl implements MarkDataService {
query.must(EsQueryTools.assemblePlatformQuery(GlobalPojo.PLATFORMS.stream()
.filter(platform -> Objects.equals(dto.getPlatforms().get(0), platform.getName())).collect(Collectors.toList())));
}
Long total = esClientDao.count(indexes, query, null);
int page = dto.getPage();
int pageSize = dto.getPageSize();
FieldSortBuilder sort = new FieldSortBuilder("time").order(SortOrder.ASC);
FieldSortBuilder sort = new FieldSortBuilder(GenericAttribute.ES_TIME).order(SortOrder.DESC);
SearchHits hits = esClientDao.searchHits(indexes, query, null, null, sort, (page - 1) * pageSize, pageSize, null);
List<JSONObject> list = new ArrayList<>();
for (SearchHit hit : hits) {
......@@ -3537,7 +3539,7 @@ public class MarkDataServiceImpl implements MarkDataService {
jsonObject.put("platform", baseMap.getPlatform());
jsonObject.put("source", baseMap.getSource());
jsonObject.put("realSource", baseMap.getRealSource());
jsonObject.put("title", baseMap.getTitle());
jsonObject.put("title", baseMap.getTitleNullOptionalContent());
jsonObject.put("url", baseMap.getUrl());
jsonObject.put("time", baseMap.getTime());
list.add(jsonObject);
......@@ -3549,17 +3551,45 @@ public class MarkDataServiceImpl implements MarkDataService {
return null;
}
/**
* 原发溯源大库es查询
* @param dto
* @return
*/
private BoolQueryBuilder getAllIndexSearchOriginQuery(MarkSearchDTO dto) {
BoolQueryBuilder query = QueryBuilders.boolQuery();
// time
dto.setEndTime(System.currentTimeMillis());
dto.setStartTime(dto.getEndTime() - Constant.ONE_MONTH);
// keyword
query.must(QueryBuilders.rangeQuery("time").gte(dto.getStartTime()).lte(dto.getEndTime()));
String keyword = dto.getKeyword();
// 特殊符号、数字替换空格
String processedKeyword = Tools.canonicalKeyword(keyword.replaceAll(ORIGIN_SEARCH_PATTERN, " "));
BoolQueryBuilder keywordQuery = QueryBuilders.boolQuery();
// 使用matchPhrasePrefixQuery,保证单词被截取后也能查询
keywordQuery.must(QueryBuilders.matchPhrasePrefixQuery(GenericAttribute.ES_IND_FULL_TEXT, dto.getKeyword()));
// keyword条件一: 空格为且条件
if (StringUtils.isNotEmpty(processedKeyword)) {
BoolQueryBuilder keywordQuery1 = QueryBuilders.boolQuery();
String[] words = processedKeyword.split(" +");
for (int i = 0; i < words.length; i++) {
BoolQueryBuilder wordQuery = QueryBuilders.boolQuery();
if (i != words.length - 1) {
keywordQuery1.must(wordQuery.must(QueryBuilders.matchPhraseQuery(GenericAttribute.ES_IND_FULL_TEXT, words[i])));
} else {
keywordQuery1.must(wordQuery.must(QueryBuilders.matchPhrasePrefixQuery(GenericAttribute.ES_IND_FULL_TEXT, words[i])));
}
}
keywordQuery.should(keywordQuery1);
}
// keyword条件二
BoolQueryBuilder keywordQuery2 = QueryBuilders.boolQuery();
keywordQuery2.must(QueryBuilders.matchPhrasePrefixQuery(GenericAttribute.ES_IND_FULL_TEXT, keyword));
// 两个keyword条件满足一个即可
keywordQuery.should(keywordQuery2).minimumShouldMatch(1);
query.must(keywordQuery);
// platform
BoolQueryBuilder platformQuery = EsQueryTools.assemblePlatformQuery(GlobalPojo.PLATFORMS);
return QueryBuilders.boolQuery().must(keywordQuery).must(platformQuery)
.must(QueryBuilders.rangeQuery("time").gte(dto.getStartTime()).lte(dto.getEndTime()));
query.must(platformQuery);
return query;
}
private JSONObject getDailyReportSummary(JSONObject yuqingAmount, JSONObject yuqingEmotion, JSONObject platformPercentage){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment