Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
messageflow
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
虞诚毅
messageflow
Commits
c5040a08
Commit
c5040a08
authored
May 15, 2019
by
303514581@qq.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
2019/5/15 自动标注启动改配置文件,词频计算添加
parent
e3ceb231
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
177 additions
and
21 deletions
+177
-21
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
+5
-5
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
+14
-0
src/main/java/com/zhiwei/messageflow/es/service/impl/HighLightFillingServiceImpl.java
+136
-16
src/main/java/com/zhiwei/messageflow/util/Tools.java
+19
-0
src/main/resources/middleware.properties
+3
-0
No files found.
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
View file @
c5040a08
...
...
@@ -351,11 +351,6 @@ public class ES4RedisTask {
continue
;
}
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages
=
disposeMessageService
.
accumulateMessage
(
messages
,
messageskey
);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
num
+=
messageskey
.
size
();
// 记录新的rsid
keyrsid
=
ramkey
.
getRsid
();
...
...
@@ -364,6 +359,11 @@ public class ES4RedisTask {
redisService
.
setMessage2Redis
(
redisKey
,
messageskey
,
keywordscount
);
newRsidMap
.
put
(
redisKey
,
Integer
.
valueOf
(
keyrsid
.
toString
()));
//合并子词组
// log.info("in messages:{};messageskey{}", messages.size(), messageskey.size());
messages
=
disposeMessageService
.
accumulateMessage
(
messages
,
messageskey
);
// log.info("out messages:{};messageskey{}", messages.size(), messageskey.size());
}
// 遍历关键词组
// 向redis写入数据
...
...
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
View file @
c5040a08
...
...
@@ -3,6 +3,10 @@ package com.zhiwei.messageflow.es.service.impl;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.springframework.beans.factory.annotation.Value
;
import
org.springframework.boot.context.properties.ConfigurationProperties
;
import
org.springframework.context.annotation.Configuration
;
import
org.springframework.context.annotation.PropertySource
;
import
org.springframework.stereotype.Component
;
import
com.alibaba.fastjson.JSONObject
;
...
...
@@ -15,8 +19,14 @@ import com.zhiwei.messageflow.util.MatchingInfoUtil;
import
com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient
;
@Component
@Configuration
@ConfigurationProperties
(
prefix
=
"middleware"
)
@PropertySource
(
value
=
"classpath:middleware.properties"
)
public
class
AutoMarkServiceImpl
implements
AutoMarkService
{
@Value
((
"${middleware.isAutoMark}"
))
private
boolean
isAutoMark
;
private
static
AutomaticMarkClient
client
=
AutomaticMarkClient
.
getClient
(
MiddlewareConfig
.
zookeeperIp
);
// private static AutomaticMarkClient client = AutomaticMarkClient.getClient("zookeeper://192.168.0.234:2181");
...
...
@@ -49,12 +59,16 @@ public class AutoMarkServiceImpl implements AutoMarkService {
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list
.
add
(
dbObject
);
}
if
(
isAutoMark
)
System
.
out
.
println
(
"isAutoMark:"
+
isAutoMark
);
client
.
autoMark
(
list
,
"media"
);
}
}
/**
* 判断是否能被自动标注(问答,论坛除外
*
* @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg
...
...
src/main/java/com/zhiwei/messageflow/es/service/impl/HighLightFillingServiceImpl.java
View file @
c5040a08
package
com
.
zhiwei
.
messageflow
.
es
.
service
.
impl
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
...
...
@@ -809,6 +810,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String
roottext
=
String
.
valueOf
(
sourceHitMap
.
get
(
"roottext"
));
Map
<
String
,
Integer
>
highLCount
=
new
HashMap
<>();
List
<
String
>
hitKws
=
new
ArrayList
<>();
keywords
.
stream
().
forEach
(
kw
->
{
boolean
ishit
=
true
;
if
(
kw
.
contains
(
" "
))
{
...
...
@@ -823,13 +825,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit
=
Tools
.
approximateStringMatching
(
text
+
roottext
,
kw
);
}
if
(
ishit
)
{
if
(
highLCount
.
containsKey
(
kw
))
{
highLCount
.
put
(
kw
,
highLCount
.
get
(
kw
)
+
1
);
hitKws
.
add
(
kw
);
}
});
for
(
int
i
=
0
;
i
<
hitKws
.
size
();
i
++)
{
String
kw
=
hitKws
.
get
(
i
);
int
count
=
0
;
if
(
kw
.
contains
(
" "
))
{
String
[]
keys
=
kw
.
split
(
" "
);
for
(
int
j
=
0
;
j
<
keys
.
length
;
j
++)
{
String
key
=
keys
[
j
];
int
c
=
Tools
.
appearNumber
(
text
+
roottext
,
key
);
if
((
c
<
count
&&
c
!=
0
)
||
count
==
0
)
count
=
c
;
}
}
else
{
highLCount
.
put
(
kw
,
1
);
count
=
Tools
.
appearNumber
(
text
+
roottext
,
kw
);
}
highLCount
.
put
(
kw
,
count
);
}
});
Map
<
String
,
Integer
>
hLMap
=
highLCount
.
entrySet
().
stream
()
.
sorted
((
Map
.
Entry
<
String
,
Integer
>
o1
,
Map
.
Entry
<
String
,
Integer
>
o2
)
->
o2
.
getValue
()
-
o1
.
getValue
())
...
...
@@ -886,6 +900,29 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
img
=
regxString
(
content2
);
}
Map
<
String
,
Integer
>
highLCount
=
new
HashMap
<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(questionTitle + questionContent + answerContent, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
List
<
String
>
hitKws
=
new
ArrayList
<>();
keywords
.
stream
().
forEach
(
kw
->
{
boolean
ishit
=
true
;
if
(
kw
.
contains
(
" "
))
{
...
...
@@ -900,13 +937,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit
=
Tools
.
approximateStringMatching
(
questionTitle
+
questionContent
+
answerContent
,
kw
);
}
if
(
ishit
)
{
if
(
highLCount
.
containsKey
(
kw
))
{
highLCount
.
put
(
kw
,
highLCount
.
get
(
kw
)
+
1
);
hitKws
.
add
(
kw
);
}
});
for
(
int
i
=
0
;
i
<
hitKws
.
size
();
i
++)
{
String
kw
=
hitKws
.
get
(
i
);
int
count
=
0
;
if
(
kw
.
contains
(
" "
))
{
String
[]
keys
=
kw
.
split
(
" "
);
for
(
int
j
=
0
;
j
<
keys
.
length
;
j
++)
{
String
key
=
keys
[
j
];
int
c
=
Tools
.
appearNumber
(
questionTitle
+
questionContent
+
answerContent
,
key
);
if
((
c
<
count
&&
c
!=
0
)
||
count
==
0
)
count
=
c
;
}
}
else
{
highLCount
.
put
(
kw
,
1
);
count
=
Tools
.
appearNumber
(
questionTitle
+
questionContent
+
answerContent
,
kw
);
}
highLCount
.
put
(
kw
,
count
);
}
});
Map
<
String
,
Integer
>
hLMap
=
highLCount
.
entrySet
().
stream
()
.
sorted
((
Map
.
Entry
<
String
,
Integer
>
o1
,
Map
.
Entry
<
String
,
Integer
>
o2
)
->
o2
.
getValue
()
-
o1
.
getValue
())
...
...
@@ -981,7 +1030,30 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
String
title
=
String
.
valueOf
(
sourceHitMap
.
get
(
"title"
));
String
content
=
sourceHitMap
.
containsKey
(
"content"
)
?
sourceHitMap
.
get
(
"content"
)
+
""
:
""
;
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map
<
String
,
Integer
>
highLCount
=
new
HashMap
<>();
List
<
String
>
hitKws
=
new
ArrayList
<>();
keywords
.
stream
().
forEach
(
kw
->
{
boolean
ishit
=
true
;
if
(
kw
.
contains
(
" "
))
{
...
...
@@ -996,13 +1068,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit
=
Tools
.
approximateStringMatching
(
title
+
content
,
kw
);
}
if
(
ishit
)
{
if
(
highLCount
.
containsKey
(
kw
))
{
highLCount
.
put
(
kw
,
highLCount
.
get
(
kw
)
+
1
);
hitKws
.
add
(
kw
);
}
});
for
(
int
i
=
0
;
i
<
hitKws
.
size
();
i
++)
{
String
kw
=
hitKws
.
get
(
i
);
int
count
=
0
;
if
(
kw
.
contains
(
" "
))
{
String
[]
keys
=
kw
.
split
(
" "
);
for
(
int
j
=
0
;
j
<
keys
.
length
;
j
++)
{
String
key
=
keys
[
j
];
int
c
=
Tools
.
appearNumber
(
title
+
content
,
key
);
if
((
c
<
count
&&
c
!=
0
)
||
count
==
0
)
count
=
c
;
}
}
else
{
highLCount
.
put
(
kw
,
1
);
count
=
Tools
.
appearNumber
(
title
+
content
,
kw
);
}
highLCount
.
put
(
kw
,
count
);
}
});
Map
<
String
,
Integer
>
hLMap
=
highLCount
.
entrySet
().
stream
()
.
sorted
((
Map
.
Entry
<
String
,
Integer
>
o1
,
Map
.
Entry
<
String
,
Integer
>
o2
)
->
o2
.
getValue
()
-
o1
.
getValue
())
...
...
@@ -1051,7 +1135,31 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
}
String
title
=
String
.
valueOf
(
sourceHitMap
.
get
(
"title"
));
String
content
=
sourceHitMap
.
containsKey
(
"content"
)
?
sourceHitMap
.
get
(
"content"
)
+
""
:
""
;
// Map<String, Integer> highLCount = new HashMap<>();
// keywords.stream().forEach(kw -> {
// boolean ishit = true;
// if (kw.contains(" ")) {
// String[] keys = kw.split(" ");
// for (int j = 0; j < keys.length; j++) {
// String key = keys[j];
// ishit = Tools.approximateStringMatching(title + content, key);
// if (!ishit)
// break;
// }
// } else {
// ishit = Tools.approximateStringMatching(title + content, kw);
// }
// if (ishit) {
// if (highLCount.containsKey(kw)) {
// highLCount.put(kw, highLCount.get(kw) + 1);
// } else {
// highLCount.put(kw, 1);
// }
// }
// });
Map
<
String
,
Integer
>
highLCount
=
new
HashMap
<>();
List
<
String
>
hitKws
=
new
ArrayList
<>();
keywords
.
stream
().
forEach
(
kw
->
{
boolean
ishit
=
true
;
if
(
kw
.
contains
(
" "
))
{
...
...
@@ -1066,13 +1174,25 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
ishit
=
Tools
.
approximateStringMatching
(
title
+
content
,
kw
);
}
if
(
ishit
)
{
if
(
highLCount
.
containsKey
(
kw
))
{
highLCount
.
put
(
kw
,
highLCount
.
get
(
kw
)
+
1
);
hitKws
.
add
(
kw
);
}
});
for
(
int
i
=
0
;
i
<
hitKws
.
size
();
i
++)
{
String
kw
=
hitKws
.
get
(
i
);
int
count
=
0
;
if
(
kw
.
contains
(
" "
))
{
String
[]
keys
=
kw
.
split
(
" "
);
for
(
int
j
=
0
;
j
<
keys
.
length
;
j
++)
{
String
key
=
keys
[
j
];
int
c
=
Tools
.
appearNumber
(
title
+
content
,
key
);
if
((
c
<
count
&&
c
!=
0
)
||
count
==
0
)
count
=
c
;
}
}
else
{
highLCount
.
put
(
kw
,
1
);
count
=
Tools
.
appearNumber
(
title
+
content
,
kw
);
}
highLCount
.
put
(
kw
,
count
);
}
});
Map
<
String
,
Integer
>
hLMap
=
highLCount
.
entrySet
().
stream
()
.
sorted
((
Map
.
Entry
<
String
,
Integer
>
o1
,
Map
.
Entry
<
String
,
Integer
>
o2
)
->
o2
.
getValue
()
-
o1
.
getValue
())
...
...
src/main/java/com/zhiwei/messageflow/util/Tools.java
View file @
c5040a08
...
...
@@ -1184,4 +1184,23 @@ public class Tools {
return
tag
;
}
/**
* 获取指定字符串出现的次数
*
* @param srcText 源字符串
* @param findText 要查找的字符串
* @return
*/
public
static
int
appearNumber
(
String
srcText
,
String
findText
)
{
int
count
=
0
;
srcText
=
srcText
.
toLowerCase
();
findText
=
findText
.
toLowerCase
();
Pattern
p
=
Pattern
.
compile
(
findText
);
Matcher
m
=
p
.
matcher
(
srcText
);
while
(
m
.
find
())
{
count
++;
}
return
count
;
}
}
src/main/resources/middleware.properties
View file @
c5040a08
#middleware.zookeeperIp=zookeeper://192.168.0.36:2181
#middleware.isAutoMark=false
middleware.zookeeperIp
=
zookeeper://192.168.0.203:2181;zookeeper://192.168.0.104:2181;zookeeper://192.168.0.105:2181;
middleware.isAutoMark
=
true
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment