Commit c5040a08 by 303514581@qq.com

2019/5/15 自动标注启动改配置文件,词频计算添加

parent e3ceb231
......@@ -351,11 +351,6 @@ public class ES4RedisTask {
continue;
}
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages = disposeMessageService.accumulateMessage(messages,messageskey);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
num += messageskey.size();
// 记录新的rsid
keyrsid = ramkey.getRsid();
......@@ -364,6 +359,11 @@ public class ES4RedisTask {
redisService.setMessage2Redis(redisKey, messageskey, keywordscount);
newRsidMap.put(redisKey, Integer.valueOf(keyrsid.toString()));
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages = disposeMessageService.accumulateMessage(messages,messageskey);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
} // 遍历关键词组
// 向redis写入数据
......
......@@ -3,6 +3,10 @@ package com.zhiwei.messageflow.es.service.impl;
import java.util.ArrayList;
import java.util.List;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
......@@ -15,8 +19,14 @@ import com.zhiwei.messageflow.util.MatchingInfoUtil;
import com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient;
@Component
@Configuration
@ConfigurationProperties(prefix = "middleware")
@PropertySource(value = "classpath:middleware.properties")
public class AutoMarkServiceImpl implements AutoMarkService {
@Value(("${middleware.isAutoMark}"))
private boolean isAutoMark;
private static AutomaticMarkClient client = AutomaticMarkClient.getClient(MiddlewareConfig.zookeeperIp);
// private static AutomaticMarkClient client = AutomaticMarkClient.getClient("zookeeper://192.168.0.234:2181");
......@@ -49,12 +59,16 @@ public class AutoMarkServiceImpl implements AutoMarkService {
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list.add(dbObject);
}
if (isAutoMark)
System.out.println("isAutoMark:"+isAutoMark);
client.autoMark(list, "media");
}
}
/**
* 判断是否能被自动标注(问答,论坛除外
*
* @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg
......
package com.zhiwei.messageflow.es.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
......@@ -809,6 +810,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String roottext = String.valueOf(sourceHitMap.get("roottext"));
Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> {
boolean ishit = true;
if (kw.contains(" ")) {
......@@ -823,13 +825,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(text + roottext, kw);
}
if (ishit) {
if (highLCount.containsKey(kw)) {
highLCount.put(kw, highLCount.get(kw) + 1);
hitKws.add(kw);
}
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(text + roottext, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else {
highLCount.put(kw, 1);
count = Tools.appearNumber(text + roottext, kw);
}
highLCount.put(kw, count);
}
});
Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
......@@ -886,6 +900,29 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
img = regxString(content2);
}
Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> {
boolean ishit = true;
if (kw.contains(" ")) {
......@@ -900,13 +937,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw);
}
if (ishit) {
if (highLCount.containsKey(kw)) {
highLCount.put(kw, highLCount.get(kw) + 1);
hitKws.add(kw);
}
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(questionTitle + questionContent + answerContent, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else {
highLCount.put(kw, 1);
count = Tools.appearNumber(questionTitle + questionContent + answerContent, kw);
}
highLCount.put(kw, count);
}
});
Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
......@@ -981,7 +1030,30 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String title = String.valueOf(sourceHitMap.get("title"));
String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : "";
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> {
boolean ishit = true;
if (kw.contains(" ")) {
......@@ -996,13 +1068,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(title + content, kw);
}
if (ishit) {
if (highLCount.containsKey(kw)) {
highLCount.put(kw, highLCount.get(kw) + 1);
hitKws.add(kw);
}
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(title + content, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else {
highLCount.put(kw, 1);
count = Tools.appearNumber(title + content, kw);
}
highLCount.put(kw, count);
}
});
Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
......@@ -1051,7 +1135,31 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
}
String title = String.valueOf(sourceHitMap.get("title"));
String content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : "";
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map<String, Integer> highLCount = new HashMap<>();
List<String> hitKws = new ArrayList<>();
keywords.stream().forEach(kw -> {
boolean ishit = true;
if (kw.contains(" ")) {
......@@ -1066,13 +1174,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit = Tools.approximateStringMatching(title + content, kw);
}
if (ishit) {
if (highLCount.containsKey(kw)) {
highLCount.put(kw, highLCount.get(kw) + 1);
hitKws.add(kw);
}
});
for (int i = 0; i < hitKws.size(); i++) {
String kw = hitKws.get(i);
int count = 0;
if (kw.contains(" ")) {
String[] keys = kw.split(" ");
for (int j = 0; j < keys.length; j++) {
String key = keys[j];
int c = Tools.appearNumber(title + content, key);
if ((c < count && c != 0) || count == 0)
count = c;
}
} else {
highLCount.put(kw, 1);
count = Tools.appearNumber(title + content, kw);
}
highLCount.put(kw, count);
}
});
Map<String, Integer> hLMap = highLCount.entrySet().stream()
.sorted((Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) -> o2.getValue() - o1.getValue())
......
......@@ -1184,4 +1184,23 @@ public class Tools {
return tag;
}
/**
* 获取指定字符串出现的次数
*
* @param srcText 源字符串
* @param findText 要查找的字符串
* @return
*/
public static int appearNumber(String srcText, String findText) {
int count = 0;
srcText = srcText.toLowerCase();
findText = findText.toLowerCase();
Pattern p = Pattern.compile(findText);
Matcher m = p.matcher(srcText);
while (m.find()) {
count++;
}
return count;
}
}
#middleware.zookeeperIp=zookeeper://192.168.0.36:2181
#middleware.isAutoMark=false
middleware.zookeeperIp=zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181;
middleware.isAutoMark=true
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment