Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
messageflow
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
虞诚毅
messageflow
Commits
63816bf1
Commit
63816bf1
authored
Jul 04, 2018
by
shentao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
2018/7/4 1自动标注加入2es搜索语句优化
parent
46e516cf
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
156 additions
and
4 deletions
+156
-4
pom.xml
+6
-0
src/main/java/com/zhiwei/messageflow/ES4RedisStart.java
+4
-0
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
+12
-1
src/main/java/com/zhiwei/messageflow/ES4RedisThreadNew.java
+2
-1
src/main/java/com/zhiwei/messageflow/config/MiddlewareConfig.java
+27
-0
src/main/java/com/zhiwei/messageflow/es/service/AutoMarkService.java
+25
-0
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
+74
-0
src/main/java/com/zhiwei/messageflow/es/service/impl/ES4BeanServiceImpl.java
+1
-1
src/main/java/com/zhiwei/messageflow/util/ESQueryUtil.java
+3
-1
src/main/java/com/zhiwei/messageflow/util/MatchingInfoUtil.java
+0
-0
src/main/resources/middleware.properties
+2
-0
No files found.
pom.xml
View file @
63816bf1
...
@@ -121,6 +121,12 @@
...
@@ -121,6 +121,12 @@
<artifactId>
fastjson
</artifactId>
<artifactId>
fastjson
</artifactId>
<version>
1.2.34
</version>
<version>
1.2.34
</version>
</dependency>
</dependency>
<!-- 自动标注中间件 -->
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
automaticmark-client
</artifactId>
<version>
1.0-SNAPSHOT
</version>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisStart.java
View file @
63816bf1
...
@@ -43,6 +43,10 @@ public class ES4RedisStart {
...
@@ -43,6 +43,10 @@ public class ES4RedisStart {
// 遍历项目
// 遍历项目
for
(
Project
project
:
projects
)
{
for
(
Project
project
:
projects
)
{
// if(!project.getProjectName().equals("证监会")) {
// continue;
// }
/**
/**
* 项目全部平台(公共+私有)
* 项目全部平台(公共+私有)
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
View file @
63816bf1
...
@@ -19,6 +19,7 @@ import com.zhiwei.messageflow.bean.MediaMessage;
...
@@ -19,6 +19,7 @@ import com.zhiwei.messageflow.bean.MediaMessage;
import
com.zhiwei.messageflow.bean.VideoMessage
;
import
com.zhiwei.messageflow.bean.VideoMessage
;
import
com.zhiwei.messageflow.bean.WeiboMessage
;
import
com.zhiwei.messageflow.bean.WeiboMessage
;
import
com.zhiwei.messageflow.bean.ZhihuMessage
;
import
com.zhiwei.messageflow.bean.ZhihuMessage
;
import
com.zhiwei.messageflow.es.service.AutoMarkService
;
import
com.zhiwei.messageflow.es.service.EarlyWarningService
;
import
com.zhiwei.messageflow.es.service.EarlyWarningService
;
import
com.zhiwei.messageflow.mongo.bean.KeywordNew
;
import
com.zhiwei.messageflow.mongo.bean.KeywordNew
;
import
com.zhiwei.messageflow.mongo.bean.NoiseRule
;
import
com.zhiwei.messageflow.mongo.bean.NoiseRule
;
...
@@ -36,6 +37,9 @@ import com.zhiwei.messageflow.service.DisposeMessageService;
...
@@ -36,6 +37,9 @@ import com.zhiwei.messageflow.service.DisposeMessageService;
public
class
ES4RedisTask
{
public
class
ES4RedisTask
{
private
final
static
Logger
log
=
LoggerFactory
.
getLogger
(
ES4RedisTask
.
class
);
private
final
static
Logger
log
=
LoggerFactory
.
getLogger
(
ES4RedisTask
.
class
);
@Autowired
private
AutoMarkService
autoMarkService
;
@Autowired
@Autowired
private
DisposeMessageService
disposeMessageService
;
private
DisposeMessageService
disposeMessageService
;
...
@@ -145,6 +149,10 @@ public class ES4RedisTask {
...
@@ -145,6 +149,10 @@ public class ES4RedisTask {
}
}
allkeywords
.
addAll
(
kwn
.
getKeyWords
());
allkeywords
.
addAll
(
kwn
.
getKeyWords
());
}
}
//项目关键词为空
if
(
allkeywords
.
isEmpty
())
{
continue
;
}
// 根据不同平台获取数据(同一方法,统一封装为消息流实体
// 根据不同平台获取数据(同一方法,统一封装为消息流实体
...
@@ -159,7 +167,10 @@ public class ES4RedisTask {
...
@@ -159,7 +167,10 @@ public class ES4RedisTask {
// log.info("{}平台{}关键字词组无消息", platformName, allkeytitle);
// log.info("{}平台{}关键字词组无消息", platformName, allkeytitle);
continue
;
continue
;
}
}
//自动标注
autoMarkService
.
autoMarkMessages
(
messages
,
project
);
// log.info("{}平台{}关键词数据获取{}条", platformName, "全部", messages.size());
// log.info("{}平台{}关键词数据获取{}条", platformName, "全部", messages.size());
// 记录新的rsid
// 记录新的rsid
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisThreadNew.java
View file @
63816bf1
...
@@ -30,7 +30,8 @@ public class ES4RedisThreadNew extends Thread {
...
@@ -30,7 +30,8 @@ public class ES4RedisThreadNew extends Thread {
// 单个平台单个关键词组每次查询数量
// 单个平台单个关键词组每次查询数量
private
static
final
int
count
=
300
;
private
static
final
int
count
=
300
;
// private static final int count = 50;
// private static final int max_Thread_num = 40;
// private static final int max_Thread_num = 40;
// private static int Thread_num = 0;
// private static int Thread_num = 0;
// private static final int max_Running_num = 3;
// private static final int max_Running_num = 3;
...
...
src/main/java/com/zhiwei/messageflow/config/MiddlewareConfig.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
config
;
import
org.springframework.boot.context.properties.ConfigurationProperties
;
import
org.springframework.context.annotation.Configuration
;
import
org.springframework.context.annotation.PropertySource
;
import
org.springframework.stereotype.Component
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* 中间件配置类
* @ClassName: MiddlewareConfig
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午11:23:46
*/
@Data
@ToString
@Component
@Configuration
@ConfigurationProperties
(
prefix
=
"middleware"
)
@PropertySource
(
value
=
"classpath:middleware.properties"
)
public
class
MiddlewareConfig
{
public
static
String
zookeeperIp
;
}
src/main/java/com/zhiwei/messageflow/es/service/AutoMarkService.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
es
.
service
;
import
java.util.List
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.messageflow.mongo.bean.Project
;
/**
* 自动标注Service
* @ClassName: AutoMarkService
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午10:42:49
*/
public
interface
AutoMarkService
{
/**
* 自动标注消息按项目
* @Title: autoMarkMessages
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param messages
* @param @param project 设定文件
* @return void 返回类型
*/
void
autoMarkMessages
(
List
<
JSONObject
>
messages
,
Project
project
);
}
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
es
.
service
.
impl
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.springframework.stereotype.Component
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.messageflow.config.MiddlewareConfig
;
import
com.zhiwei.messageflow.es.service.AutoMarkService
;
import
com.zhiwei.messageflow.mongo.bean.Project
;
import
com.zhiwei.messageflow.util.MatchingInfoUtil
;
import
com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient
;
@Component
public
class
AutoMarkServiceImpl
implements
AutoMarkService
{
private
static
AutomaticMarkClient
client
=
AutomaticMarkClient
.
getClient
(
"zookeeper://192.168.0.234:2181"
);
// private static AutomaticMarkClient client =
// AutomaticMarkClient.getClient("zookeeper://192.168.0.203:2181");
@Override
public
void
autoMarkMessages
(
List
<
JSONObject
>
messages
,
Project
project
)
{
if
(
project
.
getIsAutoMark
())
{
List
<
JSONObject
>
mediaMarkList
=
new
ArrayList
<>();
// 按markPt组装自动标注
// 暂时只自动标注网媒的
for
(
JSONObject
msg
:
messages
)
{
String
markPt
=
msg
.
getString
(
"markPt"
);
switch
(
markPt
)
{
case
"网媒"
:
if
(
canbeAutoMark
(
msg
))
{
mediaMarkList
.
add
(
msg
);
}
break
;
default
:
break
;
}
}
// 批量封装
List
<
DBObject
>
list
=
new
ArrayList
<>();
for
(
JSONObject
mediaMark
:
mediaMarkList
)
{
DBObject
dbObject
=
new
BasicDBObject
();
dbObject
.
put
(
"_id"
,
mediaMark
.
getString
(
"id"
));
dbObject
.
put
(
"title"
,
mediaMark
.
getString
(
"title"
).
replaceAll
(
"<[.[^>]]*>"
,
""
));
dbObject
.
put
(
"markGroup"
,
project
.
getProjectName
());
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list
.
add
(
dbObject
);
}
client
.
autoMark
(
list
,
"media"
);
}
}
/**
* 判断是否能被自动标注(问答,论坛除外
* @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg
* @param @return 设定文件
* @return boolean 返回类型
*/
private
boolean
canbeAutoMark
(
JSONObject
msg
)
{
boolean
res
=
true
;
String
pt
=
MatchingInfoUtil
.
getBriefMediaPt
(
msg
.
getString
(
"type"
),
msg
.
getString
(
"type"
),
msg
.
getString
(
"source"
));
if
(
pt
.
equals
(
"问答"
)
||
pt
.
equals
(
"贴吧论坛"
))
{
res
=
false
;
}
return
res
;
}
}
src/main/java/com/zhiwei/messageflow/es/service/impl/ES4BeanServiceImpl.java
View file @
63816bf1
...
@@ -130,7 +130,7 @@ public class ES4BeanServiceImpl implements ES4BeanService {
...
@@ -130,7 +130,7 @@ public class ES4BeanServiceImpl implements ES4BeanService {
messages
=
noiseProcessingService
.
allDenoising
(
noiseRules
,
searchHits
,
platform
,
project
);
messages
=
noiseProcessingService
.
allDenoising
(
noiseRules
,
searchHits
,
platform
,
project
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
e
.
getStackTrace
()
+
" "
+
e
.
getMessage
()
);
log
.
error
(
"error:"
,
e
);
}
}
return
messages
;
return
messages
;
...
...
src/main/java/com/zhiwei/messageflow/util/ESQueryUtil.java
View file @
63816bf1
...
@@ -465,7 +465,9 @@ public class ESQueryUtil {
...
@@ -465,7 +465,9 @@ public class ESQueryUtil {
}
}
queryBuilder
.
should
(
mixboolQueryBuilder
);
queryBuilder
.
should
(
mixboolQueryBuilder
);
}
else
{
}
else
{
ESQueryUtil
.
matchPhraseQueryFields
(
queryBuilder
,
"should"
,
keyword
,
fieldlist
);
BoolQueryBuilder
boolQueryBuilder
=
QueryBuilders
.
boolQuery
();
ESQueryUtil
.
matchPhraseQueryFields
(
boolQueryBuilder
,
"should"
,
keyword
,
fieldlist
);
queryBuilder
.
should
(
boolQueryBuilder
);
}
}
}
}
return
queryBuilder
;
return
queryBuilder
;
...
...
src/main/java/com/zhiwei/messageflow/util/MatchingInfoUtil.java
0 → 100644
View file @
63816bf1
This diff is collapsed.
Click to expand it.
src/main/resources/middleware.properties
0 → 100644
View file @
63816bf1
middleware.zookeeperIp
=
zookeeper://192.168.0.234:2181
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment