Commit c5040a08 by 303514581@qq.com

2019/5/15 自动标注启动改配置文件,词频计算添加

parent e3ceb231
...@@ -351,11 +351,6 @@ public class ES4RedisTask { ...@@ -351,11 +351,6 @@ public class ES4RedisTask {
continue; continue;
} }
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages = disposeMessageService.accumulateMessage(messages,messageskey);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
num += messageskey.size(); num += messageskey.size();
// 记录新的rsid // 记录新的rsid
keyrsid = ramkey.getRsid(); keyrsid = ramkey.getRsid();
...@@ -364,6 +359,11 @@ public class ES4RedisTask { ...@@ -364,6 +359,11 @@ public class ES4RedisTask {
redisService.setMessage2Redis(redisKey, messageskey, keywordscount); redisService.setMessage2Redis(redisKey, messageskey, keywordscount);
newRsidMap.put(redisKey, Integer.valueOf(keyrsid.toString())); newRsidMap.put(redisKey, Integer.valueOf(keyrsid.toString()));
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages = disposeMessageService.accumulateMessage(messages,messageskey);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
} // 遍历关键词组 } // 遍历关键词组
// 向redis写入数据 // 向redis写入数据
......
...@@ -3,6 +3,10 @@ package com.zhiwei.messageflow.es.service.impl; ...@@ -3,6 +3,10 @@ package com.zhiwei.messageflow.es.service.impl;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -15,8 +19,14 @@ import com.zhiwei.messageflow.util.MatchingInfoUtil; ...@@ -15,8 +19,14 @@ import com.zhiwei.messageflow.util.MatchingInfoUtil;
import com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient; import com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient;
@Component @Component
@Configuration
@ConfigurationProperties(prefix = "middleware")
@PropertySource(value = "classpath:middleware.properties")
public class AutoMarkServiceImpl implements AutoMarkService { public class AutoMarkServiceImpl implements AutoMarkService {
@Value(("${middleware.isAutoMark}"))
private boolean isAutoMark;
private static AutomaticMarkClient client = AutomaticMarkClient.getClient(MiddlewareConfig.zookeeperIp); private static AutomaticMarkClient client = AutomaticMarkClient.getClient(MiddlewareConfig.zookeeperIp);
// private static AutomaticMarkClient client = AutomaticMarkClient.getClient("zookeeper://192.168.0.234:2181"); // private static AutomaticMarkClient client = AutomaticMarkClient.getClient("zookeeper://192.168.0.234:2181");
...@@ -49,12 +59,16 @@ public class AutoMarkServiceImpl implements AutoMarkService { ...@@ -49,12 +59,16 @@ public class AutoMarkServiceImpl implements AutoMarkService {
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString()); // System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list.add(dbObject); list.add(dbObject);
} }
if (isAutoMark)
System.out.println("isAutoMark:"+isAutoMark);
client.autoMark(list, "media"); client.autoMark(list, "media");
} }
} }
/** /**
* 判断是否能被自动标注(问答,论坛除外 * 判断是否能被自动标注(问答,论坛除外
*
* @Title: canbeAutoMark * @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用) * @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg * @param @param msg
......
package com.zhiwei.messageflow.es.service.impl; package com.zhiwei.messageflow.es.service.impl;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -809,6 +810,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -809,6 +810,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String roottext = String.valueOf(sourceHitMap.get("roottext")); String roottext = String.valueOf(sourceHitMap.get("roottext"));
Map<String, Integer> highLCount = new HashMap<>(); Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> { keywords.stream().forEach(kw -> {
boolean ishit = true; boolean ishit = true;
if (kw.contains(" ")) { if (kw.contains(" ")) {
...@@ -823,13 +825,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -823,13 +825,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(text + roottext, kw); ishit = Tools.approximateStringMatching(text + roottext, kw);
} }
if (ishit) { if (ishit) {
if (highLCount.containsKey(kw)) { hitKws.add(kw);
highLCount.put(kw, highLCount.get(kw) + 1); }
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(text + roottext, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else { } else {
highLCount.put(kw, 1); count = Tools.appearNumber(text + roottext, kw);
} }
highLCount.put(kw, count);
} }
});
Map<String, Integer> hLMap = highLCount.entrySet().stream() Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue()) .sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
...@@ -886,6 +900,29 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -886,6 +900,29 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
img = regxString(content2); img = regxString(content2);
} }
Map<String, Integer> highLCount = new HashMap<>(); Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> { keywords.stream().forEach(kw -> {
boolean ishit = true; boolean ishit = true;
if (kw.contains(" ")) { if (kw.contains(" ")) {
...@@ -900,13 +937,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -900,13 +937,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw); ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw);
} }
if (ishit) { if (ishit) {
if (highLCount.containsKey(kw)) { hitKws.add(kw);
highLCount.put(kw, highLCount.get(kw) + 1); }
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(questionTitle + questionContent + answerContent, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else { } else {
highLCount.put(kw, 1); count = Tools.appearNumber(questionTitle + questionContent + answerContent, kw);
} }
highLCount.put(kw, count);
} }
});
Map<String, Integer> hLMap = highLCount.entrySet().stream() Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue()) .sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
...@@ -981,7 +1030,30 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -981,7 +1030,30 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String title = String.valueOf(sourceHitMap.get("title")); String title = String.valueOf(sourceHitMap.get("title"));
String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : ""; String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : "";
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map<String, Integer> highLCount = new HashMap<>(); Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> { keywords.stream().forEach(kw -> {
boolean ishit = true; boolean ishit = true;
if (kw.contains(" ")) { if (kw.contains(" ")) {
...@@ -996,13 +1068,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -996,13 +1068,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(title + content, kw); ishit = Tools.approximateStringMatching(title + content, kw);
} }
if (ishit) { if (ishit) {
if (highLCount.containsKey(kw)) { hitKws.add(kw);
highLCount.put(kw, highLCount.get(kw) + 1); }
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(title + content, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else { } else {
highLCount.put(kw, 1); count = Tools.appearNumber(title + content, kw);
} }
highLCount.put(kw, count);
} }
});
Map<String, Integer> hLMap = highLCount.entrySet().stream() Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue()) .sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
...@@ -1051,7 +1135,31 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -1051,7 +1135,31 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
} }
String title = String.valueOf(sourceHitMap.get("title")); String title = String.valueOf(sourceHitMap.get("title"));
String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : ""; String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : "";
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map<String, Integer> highLCount = new HashMap<>(); Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> { keywords.stream().forEach(kw -> {
boolean ishit = true; boolean ishit = true;
if (kw.contains(" ")) { if (kw.contains(" ")) {
...@@ -1066,13 +1174,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService { ...@@ -1066,13 +1174,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(title + content, kw); ishit = Tools.approximateStringMatching(title + content, kw);
} }
if (ishit) { if (ishit) {
if (highLCount.containsKey(kw)) { hitKws.add(kw);
highLCount.put(kw, highLCount.get(kw) + 1); }
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(title + content, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else { } else {
highLCount.put(kw, 1); count = Tools.appearNumber(title + content, kw);
} }
highLCount.put(kw, count);
} }
});
Map<String, Integer> hLMap = highLCount.entrySet().stream() Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue()) .sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
......
...@@ -1184,4 +1184,23 @@ public class Tools { ...@@ -1184,4 +1184,23 @@ public class Tools {
return tag; return tag;
} }
/**
* 获取指定字符串出现的次数
*
* @param srcText 源字符串
* @param findText 要查找的字符串
* @return
*/
public static int appearNumber(String srcText, String findText) {
int count = 0;
srcText = srcText.toLowerCase();
findText = findText.toLowerCase();
Pattern p = Pattern.compile(findText);
Matcher m = p.matcher(srcText);
while (m.find()) {
count++;
}
return count;
}
} }
#middleware.zookeeperIp=zookeeper://192.168.0.36:2181 #middleware.zookeeperIp=zookeeper://192.168.0.36:2181
#middleware.isAutoMark=false
middleware.zookeeperIp=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181; middleware.zookeeperIp=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181;
middleware.isAutoMark=true
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment