Commit 63816bf1 by shentao

2018/7/4 1自动标注加入2es搜索语句优化

parent 46e516cf
......@@ -121,6 +121,12 @@
<artifactId>fastjson</artifactId>
<version>1.2.34</version>
</dependency>
<!-- 自动标注中间件 -->
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>automaticmark-client</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
</dependencies>
......
......@@ -43,6 +43,10 @@ public class ES4RedisStart {
// 遍历项目
for (Project project : projects) {
// if(!project.getProjectName().equals("证监会")) {
// continue;
// }
/**
* 项目全部平台(公共+私有)
......
......@@ -19,6 +19,7 @@ import com.zhiwei.messageflow.bean.MediaMessage;
import com.zhiwei.messageflow.bean.VideoMessage;
import com.zhiwei.messageflow.bean.WeiboMessage;
import com.zhiwei.messageflow.bean.ZhihuMessage;
import com.zhiwei.messageflow.es.service.AutoMarkService;
import com.zhiwei.messageflow.es.service.EarlyWarningService;
import com.zhiwei.messageflow.mongo.bean.KeywordNew;
import com.zhiwei.messageflow.mongo.bean.NoiseRule;
......@@ -36,6 +37,9 @@ import com.zhiwei.messageflow.service.DisposeMessageService;
public class ES4RedisTask {
private final static Logger log = LoggerFactory.getLogger(ES4RedisTask.class);
@Autowired
private AutoMarkService autoMarkService;
@Autowired
private DisposeMessageService disposeMessageService;
......@@ -145,6 +149,10 @@ public class ES4RedisTask {
}
allkeywords.addAll(kwn.getKeyWords());
}
//项目关键词为空
if(allkeywords.isEmpty()) {
continue;
}
// 根据不同平台获取数据(同一方法,统一封装为消息流实体
......@@ -159,7 +167,10 @@ public class ES4RedisTask {
// log.info("{}平台{}关键字词组无消息", platformName, allkeytitle);
continue;
}
//自动标注
autoMarkService.autoMarkMessages(messages,project);
// log.info("{}平台{}关键词数据获取{}条", platformName, "全部", messages.size());
// 记录新的rsid
......
......@@ -30,7 +30,8 @@ public class ES4RedisThreadNew extends Thread {
// 单个平台单个关键词组每次查询数量
private static final int count = 300;
// private static final int count = 50;
// private static final int max_Thread_num = 40;
// private static int Thread_num = 0;
// private static final int max_Running_num = 3;
......
package com.zhiwei.messageflow.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.stereotype.Component;
import lombok.Data;
import lombok.ToString;
/**
* 中间件配置类
* @ClassName: MiddlewareConfig
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午11:23:46
*/
@Data
@ToString
@Component
@Configuration
@ConfigurationProperties(prefix = "middleware")
@PropertySource(value = "classpath:middleware.properties")
public class MiddlewareConfig {
public static String zookeeperIp;
}
package com.zhiwei.messageflow.es.service;
import java.util.List;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.messageflow.mongo.bean.Project;
/**
* 自动标注Service
* @ClassName: AutoMarkService
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午10:42:49
*/
public interface AutoMarkService {
/**
* 自动标注消息按项目
* @Title: autoMarkMessages
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param messages
* @param @param project 设定文件
* @return void 返回类型
*/
void autoMarkMessages(List<JSONObject> messages, Project project);
}
package com.zhiwei.messageflow.es.service.impl;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.messageflow.config.MiddlewareConfig;
import com.zhiwei.messageflow.es.service.AutoMarkService;
import com.zhiwei.messageflow.mongo.bean.Project;
import com.zhiwei.messageflow.util.MatchingInfoUtil;
import com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient;
@Component
public class AutoMarkServiceImpl implements AutoMarkService {
private static AutomaticMarkClient client = AutomaticMarkClient.getClient("zookeeper://192.168.0.234:2181");
// private static AutomaticMarkClient client =
// AutomaticMarkClient.getClient("zookeeper://192.168.0.203:2181");
@Override
public void autoMarkMessages(List<JSONObject> messages, Project project) {
if (project.getIsAutoMark()) {
List<JSONObject> mediaMarkList = new ArrayList<>();
// 按markPt组装自动标注
// 暂时只自动标注网媒的
for (JSONObject msg : messages) {
String markPt = msg.getString("markPt");
switch (markPt) {
case "网媒":
if (canbeAutoMark(msg)) {
mediaMarkList.add(msg);
}
break;
default:
break;
}
}
// 批量封装
List<DBObject> list = new ArrayList<>();
for (JSONObject mediaMark : mediaMarkList) {
DBObject dbObject = new BasicDBObject();
dbObject.put("_id", mediaMark.getString("id"));
dbObject.put("title", mediaMark.getString("title").replaceAll("<[.[^>]]*>", ""));
dbObject.put("markGroup", project.getProjectName());
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list.add(dbObject);
}
client.autoMark(list, "media");
}
}
/**
* 判断是否能被自动标注(问答,论坛除外
* @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg
* @param @return 设定文件
* @return boolean 返回类型
*/
private boolean canbeAutoMark(JSONObject msg) {
boolean res = true;
String pt = MatchingInfoUtil.getBriefMediaPt(msg.getString("type"), msg.getString("type"),
msg.getString("source"));
if (pt.equals("问答") || pt.equals("贴吧论坛")) {
res = false;
}
return res;
}
}
......@@ -130,7 +130,7 @@ public class ES4BeanServiceImpl implements ES4BeanService {
messages = noiseProcessingService.allDenoising(noiseRules, searchHits, platform, project);
} catch (Exception e) {
log.error(e.getStackTrace() + " " + e.getMessage());
log.error("error:",e);
}
return messages;
......
......@@ -465,7 +465,9 @@ public class ESQueryUtil {
}
queryBuilder.should(mixboolQueryBuilder);
} else {
ESQueryUtil.matchPhraseQueryFields(queryBuilder, "should", keyword, fieldlist);
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
ESQueryUtil.matchPhraseQueryFields(boolQueryBuilder, "should", keyword, fieldlist);
queryBuilder.should(boolQueryBuilder);
}
}
return queryBuilder;
......
middleware.zookeeperIp=zookeeper://192.168.0.234:2181
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment