Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
messageflow
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
虞诚毅
messageflow
Commits
63816bf1
Commit
63816bf1
authored
Jul 04, 2018
by
shentao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
2018/7/4 1自动标注加入2es搜索语句优化
parent
46e516cf
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
706 additions
and
4 deletions
+706
-4
pom.xml
+6
-0
src/main/java/com/zhiwei/messageflow/ES4RedisStart.java
+4
-0
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
+12
-1
src/main/java/com/zhiwei/messageflow/ES4RedisThreadNew.java
+2
-1
src/main/java/com/zhiwei/messageflow/config/MiddlewareConfig.java
+27
-0
src/main/java/com/zhiwei/messageflow/es/service/AutoMarkService.java
+25
-0
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
+74
-0
src/main/java/com/zhiwei/messageflow/es/service/impl/ES4BeanServiceImpl.java
+1
-1
src/main/java/com/zhiwei/messageflow/util/ESQueryUtil.java
+3
-1
src/main/java/com/zhiwei/messageflow/util/MatchingInfoUtil.java
+550
-0
src/main/resources/middleware.properties
+2
-0
No files found.
pom.xml
View file @
63816bf1
...
...
@@ -121,6 +121,12 @@
<artifactId>
fastjson
</artifactId>
<version>
1.2.34
</version>
</dependency>
<!-- 自动标注中间件 -->
<dependency>
<groupId>
com.zhiwei.middleware
</groupId>
<artifactId>
automaticmark-client
</artifactId>
<version>
1.0-SNAPSHOT
</version>
</dependency>
</dependencies>
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisStart.java
View file @
63816bf1
...
...
@@ -43,6 +43,10 @@ public class ES4RedisStart {
// 遍历项目
for
(
Project
project
:
projects
)
{
// if(!project.getProjectName().equals("证监会")) {
// continue;
// }
/**
* 项目全部平台(公共+私有)
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisTask.java
View file @
63816bf1
...
...
@@ -19,6 +19,7 @@ import com.zhiwei.messageflow.bean.MediaMessage;
import
com.zhiwei.messageflow.bean.VideoMessage
;
import
com.zhiwei.messageflow.bean.WeiboMessage
;
import
com.zhiwei.messageflow.bean.ZhihuMessage
;
import
com.zhiwei.messageflow.es.service.AutoMarkService
;
import
com.zhiwei.messageflow.es.service.EarlyWarningService
;
import
com.zhiwei.messageflow.mongo.bean.KeywordNew
;
import
com.zhiwei.messageflow.mongo.bean.NoiseRule
;
...
...
@@ -36,6 +37,9 @@ import com.zhiwei.messageflow.service.DisposeMessageService;
public
class
ES4RedisTask
{
private
final
static
Logger
log
=
LoggerFactory
.
getLogger
(
ES4RedisTask
.
class
);
@Autowired
private
AutoMarkService
autoMarkService
;
@Autowired
private
DisposeMessageService
disposeMessageService
;
...
...
@@ -145,6 +149,10 @@ public class ES4RedisTask {
}
allkeywords
.
addAll
(
kwn
.
getKeyWords
());
}
//项目关键词为空
if
(
allkeywords
.
isEmpty
())
{
continue
;
}
// 根据不同平台获取数据(同一方法,统一封装为消息流实体
...
...
@@ -159,7 +167,10 @@ public class ES4RedisTask {
// log.info("{}平台{}关键字词组无消息", platformName, allkeytitle);
continue
;
}
//自动标注
autoMarkService
.
autoMarkMessages
(
messages
,
project
);
// log.info("{}平台{}关键词数据获取{}条", platformName, "全部", messages.size());
// 记录新的rsid
...
...
src/main/java/com/zhiwei/messageflow/ES4RedisThreadNew.java
View file @
63816bf1
...
...
@@ -30,7 +30,8 @@ public class ES4RedisThreadNew extends Thread {
// 单个平台单个关键词组每次查询数量
private
static
final
int
count
=
300
;
// private static final int count = 50;
// private static final int max_Thread_num = 40;
// private static int Thread_num = 0;
// private static final int max_Running_num = 3;
...
...
src/main/java/com/zhiwei/messageflow/config/MiddlewareConfig.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
config
;
import
org.springframework.boot.context.properties.ConfigurationProperties
;
import
org.springframework.context.annotation.Configuration
;
import
org.springframework.context.annotation.PropertySource
;
import
org.springframework.stereotype.Component
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* 中间件配置类
* @ClassName: MiddlewareConfig
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午11:23:46
*/
@Data
@ToString
@Component
@Configuration
@ConfigurationProperties
(
prefix
=
"middleware"
)
@PropertySource
(
value
=
"classpath:middleware.properties"
)
public
class
MiddlewareConfig
{
public
static
String
zookeeperIp
;
}
src/main/java/com/zhiwei/messageflow/es/service/AutoMarkService.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
es
.
service
;
import
java.util.List
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.messageflow.mongo.bean.Project
;
/**
* 自动标注Service
* @ClassName: AutoMarkService
* @Description: TODO(这里用一句话描述这个类的作用)
* @author shentao
* @date 2018年6月27日 上午10:42:49
*/
public
interface
AutoMarkService
{
/**
* 自动标注消息按项目
* @Title: autoMarkMessages
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param messages
* @param @param project 设定文件
* @return void 返回类型
*/
void
autoMarkMessages
(
List
<
JSONObject
>
messages
,
Project
project
);
}
src/main/java/com/zhiwei/messageflow/es/service/impl/AutoMarkServiceImpl.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
es
.
service
.
impl
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.springframework.stereotype.Component
;
import
com.alibaba.fastjson.JSONObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.messageflow.config.MiddlewareConfig
;
import
com.zhiwei.messageflow.es.service.AutoMarkService
;
import
com.zhiwei.messageflow.mongo.bean.Project
;
import
com.zhiwei.messageflow.util.MatchingInfoUtil
;
import
com.zhiwei.middleware.automaticmark.Service.AutomaticMarkClient
;
@Component
public
class
AutoMarkServiceImpl
implements
AutoMarkService
{
private
static
AutomaticMarkClient
client
=
AutomaticMarkClient
.
getClient
(
"zookeeper://192.168.0.234:2181"
);
// private static AutomaticMarkClient client =
// AutomaticMarkClient.getClient("zookeeper://192.168.0.203:2181");
@Override
public
void
autoMarkMessages
(
List
<
JSONObject
>
messages
,
Project
project
)
{
if
(
project
.
getIsAutoMark
())
{
List
<
JSONObject
>
mediaMarkList
=
new
ArrayList
<>();
// 按markPt组装自动标注
// 暂时只自动标注网媒的
for
(
JSONObject
msg
:
messages
)
{
String
markPt
=
msg
.
getString
(
"markPt"
);
switch
(
markPt
)
{
case
"网媒"
:
if
(
canbeAutoMark
(
msg
))
{
mediaMarkList
.
add
(
msg
);
}
break
;
default
:
break
;
}
}
// 批量封装
List
<
DBObject
>
list
=
new
ArrayList
<>();
for
(
JSONObject
mediaMark
:
mediaMarkList
)
{
DBObject
dbObject
=
new
BasicDBObject
();
dbObject
.
put
(
"_id"
,
mediaMark
.
getString
(
"id"
));
dbObject
.
put
(
"title"
,
mediaMark
.
getString
(
"title"
).
replaceAll
(
"<[.[^>]]*>"
,
""
));
dbObject
.
put
(
"markGroup"
,
project
.
getProjectName
());
// System.err.println(dbObject.get("_id").toString()+"title:"+dbObject.get("title").toString()+"company"+dbObject.get("markGroup").toString());
list
.
add
(
dbObject
);
}
client
.
autoMark
(
list
,
"media"
);
}
}
/**
* 判断是否能被自动标注(问答,论坛除外
* @Title: canbeAutoMark
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param msg
* @param @return 设定文件
* @return boolean 返回类型
*/
private
boolean
canbeAutoMark
(
JSONObject
msg
)
{
boolean
res
=
true
;
String
pt
=
MatchingInfoUtil
.
getBriefMediaPt
(
msg
.
getString
(
"type"
),
msg
.
getString
(
"type"
),
msg
.
getString
(
"source"
));
if
(
pt
.
equals
(
"问答"
)
||
pt
.
equals
(
"贴吧论坛"
))
{
res
=
false
;
}
return
res
;
}
}
src/main/java/com/zhiwei/messageflow/es/service/impl/ES4BeanServiceImpl.java
View file @
63816bf1
...
...
@@ -130,7 +130,7 @@ public class ES4BeanServiceImpl implements ES4BeanService {
messages
=
noiseProcessingService
.
allDenoising
(
noiseRules
,
searchHits
,
platform
,
project
);
}
catch
(
Exception
e
)
{
log
.
error
(
e
.
getStackTrace
()
+
" "
+
e
.
getMessage
()
);
log
.
error
(
"error:"
,
e
);
}
return
messages
;
...
...
src/main/java/com/zhiwei/messageflow/util/ESQueryUtil.java
View file @
63816bf1
...
...
@@ -465,7 +465,9 @@ public class ESQueryUtil {
}
queryBuilder
.
should
(
mixboolQueryBuilder
);
}
else
{
ESQueryUtil
.
matchPhraseQueryFields
(
queryBuilder
,
"should"
,
keyword
,
fieldlist
);
BoolQueryBuilder
boolQueryBuilder
=
QueryBuilders
.
boolQuery
();
ESQueryUtil
.
matchPhraseQueryFields
(
boolQueryBuilder
,
"should"
,
keyword
,
fieldlist
);
queryBuilder
.
should
(
boolQueryBuilder
);
}
}
return
queryBuilder
;
...
...
src/main/java/com/zhiwei/messageflow/util/MatchingInfoUtil.java
0 → 100644
View file @
63816bf1
package
com
.
zhiwei
.
messageflow
.
util
;
import
java.io.IOException
;
import
java.net.MalformedURLException
;
import
java.net.URL
;
import
java.text.ParseException
;
import
java.text.SimpleDateFormat
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Locale
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.commons.lang3.StringUtils
;
import
org.elasticsearch.common.text.Text
;
import
org.elasticsearch.search.SearchHit
;
import
org.elasticsearch.search.SearchHits
;
import
org.elasticsearch.search.fetch.subphase.highlight.HighlightField
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.fasterxml.jackson.core.JsonProcessingException
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
redis.clients.jedis.Tuple
;
@SuppressWarnings
(
"unchecked"
)
public
class
MatchingInfoUtil
{
private
static
ObjectMapper
mapper
=
new
ObjectMapper
();
/**
* SearchHIT转jsonobject
*
* @Title: getBean
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param
* searchHit
* @param @return
* 设定文件
* @return JSONObject 返回类型
*/
public
static
JSONObject
getBean
(
SearchHit
searchHit
)
{
/**
* ES数据读取
*/
Map
<
String
,
Object
>
sourceHitMap
=
searchHit
.
getSource
();
// Map<String, HighlightField> highlightFieldsHitMap =
// searchHit.getHighlightFields();
String
message
;
JSONObject
res
=
new
JSONObject
();
try
{
message
=
mapper
.
writeValueAsString
(
sourceHitMap
);
res
=
JSONObject
.
parseObject
(
message
);
}
catch
(
JsonProcessingException
e
)
{
// TODO Auto-generated catch block
e
.
printStackTrace
();
}
String
type
=
searchHit
.
getType
();
Long
rstime
=
null
!=
searchHit
.
getSource
().
get
(
"rstime"
)
?
Long
.
valueOf
(
searchHit
.
getSource
().
get
(
"rstime"
)
+
""
)
:
null
;
if
(
type
.
equals
(
"status"
)
&&
null
!=
rstime
)
{
// 微博
res
=
handleWeiboBean
(
searchHit
,
res
);
}
else
if
(
type
.
equals
(
"zhihu"
))
{
// zhihu
res
=
handleZhihuBean
(
searchHit
,
res
);
}
else
if
(
type
.
equals
(
"video"
))
{
// 视频
res
=
handleVideoBean
(
searchHit
,
res
);
}
else
{
// 网媒
res
=
handleMediaBean
(
searchHit
,
res
);
}
return
res
;
}
private
static
JSONObject
handleWeiboBean
(
SearchHit
searchHit
,
JSONObject
res
)
{
/**
* ES数据读取
*/
Map
<
String
,
Object
>
sourceHitMap
=
searchHit
.
getSource
();
Map
<
String
,
HighlightField
>
highlightFieldsHitMap
=
searchHit
.
getHighlightFields
();
String
text
=
""
;
String
roottext
=
""
;
Text
[]
textlist
=
highlightFieldsHitMap
.
containsKey
(
"text"
)
?
highlightFieldsHitMap
.
get
(
"text"
).
getFragments
()
:
null
;
Text
[]
roottextlist
=
highlightFieldsHitMap
.
containsKey
(
"roottext"
)
?
highlightFieldsHitMap
.
get
(
"roottext"
).
getFragments
()
:
null
;
if
(
textlist
!=
null
&&
textlist
.
length
>
0
)
{
for
(
Text
string
:
textlist
)
{
text
=
text
+
string
.
string
();
}
}
if
(
roottextlist
!=
null
&&
roottextlist
.
length
>
0
)
{
for
(
Text
string
:
roottextlist
)
{
roottext
=
roottext
+
string
.
string
();
}
}
if
(
text
.
isEmpty
())
{
text
=
String
.
valueOf
(
sourceHitMap
.
get
(
"text"
));
}
if
(
roottext
.
isEmpty
())
{
roottext
=
String
.
valueOf
(
sourceHitMap
.
get
(
"roottext"
));
}
res
.
put
(
"_id"
,
searchHit
.
getId
());
res
.
put
(
"text"
,
text
);
res
.
put
(
"roottext"
,
roottext
);
res
.
put
(
"markPt"
,
"微博"
);
res
.
put
(
"pt"
,
"微博"
);
// 处理渠道影响力
// HuserInfoWeibo huserInfoWeibo =
// huserInfoDao.getHuserInfoWeibo(String.valueOf(sourceHitMap.get("user_id")));
// res.put("channelIndex", huserInfoWeibo == null ? 1.0 :
// huserInfoWeibo.getChannelIndex());
return
res
;
}
private
static
JSONObject
handleZhihuBean
(
SearchHit
searchHit
,
JSONObject
res
)
{
Map
<
String
,
Object
>
sourceHitMap
=
searchHit
.
getSource
();
Map
<
String
,
HighlightField
>
highlightFieldsHitMap
=
searchHit
.
getHighlightFields
();
res
.
put
(
"_id"
,
searchHit
.
getId
());
res
.
put
(
"markPt"
,
"知乎"
);
res
.
put
(
"pt"
,
"知乎"
);
String
insert_at
=
res
.
getString
(
"insert_at"
);
if
(
insert_at
!=
null
&&
!
insert_at
.
equals
(
""
)
&&
!
insert_at
.
replaceFirst
(
"000"
,
""
).
equals
(
""
))
{
Date
insertDate
=
TimeUtil
.
parseTime
(
insert_at
.
replaceFirst
(
"000"
,
""
),
"yyyy-MM-dd'T'HH:mm:ss.SSS"
);
if
(
insertDate
!=
null
)
res
.
put
(
"insert_at"
,
insertDate
.
toString
());
}
String
update_at
=
res
.
getString
(
"update_at"
);
if
(
update_at
!=
null
&&
!
update_at
.
equals
(
""
)
&&
!
update_at
.
replaceFirst
(
"000"
,
""
).
equals
(
""
))
{
Date
updateDate
=
TimeUtil
.
parseTime
(
update_at
.
replaceFirst
(
"000"
,
""
),
"yyyy-MM-dd'T'HH:mm:ss.SSS"
);
if
(
updateDate
!=
null
)
res
.
put
(
"update_at"
,
updateDate
.
toString
());
}
String
questionTitle
=
""
;
String
questionContent
=
""
;
String
answerContent
=
""
;
String
img
=
""
;
String
content1
=
res
.
getString
(
"question_content"
)
==
null
?
""
:
res
.
getString
(
"question_content"
);
String
content2
=
res
.
getString
(
"answer_content"
)
==
null
?
""
:
res
.
getString
(
"answer_content"
);
if
(!
""
.
equals
(
content1
))
{
img
=
regxString
(
content1
);
}
if
(!
""
.
equals
(
content2
))
{
img
=
regxString
(
content2
);
}
Text
[]
titlelist
=
highlightFieldsHitMap
.
containsKey
(
"question_title"
)
?
highlightFieldsHitMap
.
get
(
"question_title"
).
getFragments
()
:
null
;
Text
[]
questionContentlist
=
highlightFieldsHitMap
.
containsKey
(
"question_content"
)
?
highlightFieldsHitMap
.
get
(
"question_content"
).
getFragments
()
:
null
;
Text
[]
answerContentList
=
highlightFieldsHitMap
.
containsKey
(
"answer_content"
)
?
highlightFieldsHitMap
.
get
(
"answer_content"
).
getFragments
()
:
null
;
if
(
titlelist
!=
null
&&
titlelist
.
length
>
0
)
{
for
(
Text
string
:
titlelist
)
{
questionTitle
=
questionTitle
+
string
.
string
();
}
}
if
(
questionContentlist
!=
null
&&
questionContentlist
.
length
>
0
)
{
for
(
Text
string
:
questionContentlist
)
{
// 通过高亮位置截取过长字符串
String
cutStr
=
string
.
string
();
if
(
cutStr
.
length
()
>
150
)
{
int
i
=
cutStr
.
indexOf
(
"<font"
);
if
(
i
>
10
)
{
cutStr
=
cutStr
.
substring
(
i
-
10
,
cutStr
.
length
());
cutStr
=
"……"
+
cutStr
;
}
}
questionContent
=
questionContent
+
cutStr
;
}
}
if
(
answerContentList
!=
null
&&
answerContentList
.
length
>
0
)
{
for
(
Text
string
:
answerContentList
)
{
// 通过高亮位置截取过长字符串
String
cutStr
=
string
.
string
();
if
(
cutStr
.
length
()
>
150
)
{
int
i
=
cutStr
.
indexOf
(
"<font"
);
if
(
i
>
10
)
{
cutStr
=
cutStr
.
substring
(
i
-
10
,
cutStr
.
length
());
cutStr
=
"……"
+
cutStr
;
}
}
answerContent
=
answerContent
+
cutStr
;
}
}
if
(
questionTitle
.
isEmpty
())
{
questionTitle
=
res
.
getString
(
"question_title"
)
+
""
;
if
(
questionTitle
.
length
()
>
300
)
{
questionTitle
=
questionTitle
.
substring
(
0
,
300
);
}
}
if
(
questionContent
.
isEmpty
())
{
questionContent
=
res
.
getString
(
"question_content"
)
+
""
;
if
(
questionContent
.
length
()
>
300
)
{
questionContent
=
questionContent
.
substring
(
0
,
300
);
}
}
if
(
answerContent
.
isEmpty
())
{
answerContent
=
res
.
getString
(
"answer_content"
)
+
""
;
if
(
answerContent
.
length
()
>
300
)
{
answerContent
=
answerContent
.
substring
(
0
,
300
);
}
}
res
.
put
(
"question_title"
,
questionTitle
.
replaceAll
(
"<img[^>]*>"
,
""
));
res
.
put
(
"question_content"
,
questionContent
.
replaceAll
(
"<img[^>]*>"
,
""
));
res
.
put
(
"answer_content"
,
answerContent
.
replaceAll
(
"<img[^>]*>"
,
""
));
res
.
put
(
"img"
,
img
);
if
(
res
.
getString
(
"img_url"
)
!=
null
)
{
String
imgUrl
=
res
.
getString
(
"img_url"
);
if
(
null
!=
imgUrl
&&
""
.
equals
(
imgUrl
))
{
res
.
put
(
"img"
,
imgUrl
);
;
}
}
return
res
;
}
private
static
JSONObject
handleVideoBean
(
SearchHit
searchHit
,
JSONObject
res
)
{
Map
<
String
,
Object
>
sourceHitMap
=
searchHit
.
getSource
();
Map
<
String
,
HighlightField
>
highlightFieldsHitMap
=
searchHit
.
getHighlightFields
();
res
.
put
(
"markPt"
,
"视频"
);
res
.
put
(
"pt"
,
sourceHitMap
.
get
(
"pt"
).
toString
());
res
.
put
(
"_id"
,
searchHit
.
getId
());
try
{
String
vtime
=
TimeUtil
.
parseTime
(
sourceHitMap
.
get
(
"time"
).
toString
().
replaceFirst
(
"000"
,
""
),
"yyyy-MM-dd'T'HH:mm:ss"
)
.
toString
();
res
.
put
(
"time"
,
vtime
);
}
catch
(
Exception
e
)
{
String
vtime
=
TimeUtil
.
formatDate
(
new
Date
());
res
.
put
(
"time"
,
vtime
);
// log.error("videotime为空" + searchHit.getId());
}
String
title
=
""
;
Text
[]
titlelist
=
highlightFieldsHitMap
.
containsKey
(
"title"
)
?
highlightFieldsHitMap
.
get
(
"title"
).
getFragments
()
:
null
;
if
(
titlelist
!=
null
&&
titlelist
.
length
>
0
)
{
for
(
Text
string
:
titlelist
)
{
// 通过高亮位置截取过长字符串
String
cutStr
=
string
.
string
();
title
=
title
+
cutStr
;
}
}
if
(
title
.
isEmpty
())
{
title
=
String
.
valueOf
(
sourceHitMap
.
get
(
"title"
));
}
res
.
put
(
"title"
,
title
);
return
res
;
}
private
static
JSONObject
handleMediaBean
(
SearchHit
searchHit
,
JSONObject
res
)
{
Map
<
String
,
Object
>
sourceHitMap
=
searchHit
.
getSource
();
Map
<
String
,
HighlightField
>
highlightFieldsHitMap
=
searchHit
.
getHighlightFields
();
res
.
put
(
"markPt"
,
"网媒"
);
res
.
put
(
"pt"
,
sourceHitMap
.
get
(
"pt"
).
toString
());
res
.
put
(
"_id"
,
searchHit
.
getId
());
try
{
String
mtime
=
TimeUtil
.
parseTime
(
sourceHitMap
.
get
(
"time"
).
toString
().
replaceFirst
(
"000"
,
""
),
"yyyy-MM-dd'T'HH:mm:ss"
)
.
toString
();
res
.
put
(
"time"
,
mtime
);
}
catch
(
Exception
e
)
{
String
mtime
=
TimeUtil
.
formatDate
(
new
Date
());
res
.
put
(
"time"
,
mtime
);
// log.error("mediatime为空" + searchHit.getId());
// return null;
}
String
content
=
""
;
String
title
=
""
;
Text
[]
titlelist
=
highlightFieldsHitMap
.
containsKey
(
"title"
)
?
highlightFieldsHitMap
.
get
(
"title"
).
getFragments
()
:
null
;
Text
[]
contentlist
=
highlightFieldsHitMap
.
containsKey
(
"content"
)
?
highlightFieldsHitMap
.
get
(
"content"
).
getFragments
()
:
null
;
if
(
titlelist
!=
null
&&
titlelist
.
length
>
0
)
{
for
(
Text
string
:
titlelist
)
{
// 通过高亮位置截取过长字符串
String
cutStr
=
string
.
string
();
title
=
title
+
cutStr
;
}
}
if
(
contentlist
!=
null
&&
contentlist
.
length
>
0
)
{
for
(
Text
string
:
contentlist
)
{
// 通过高亮位置截取过长字符串
String
cutStr
=
string
.
string
();
if
(
cutStr
.
length
()
>
150
)
{
int
i
=
cutStr
.
indexOf
(
"<font"
);
if
(
i
>
10
)
{
cutStr
=
cutStr
.
substring
(
i
-
10
,
cutStr
.
length
());
cutStr
=
"……"
+
cutStr
;
}
}
content
=
content
+
cutStr
;
}
}
if
(
content
.
isEmpty
())
{
content
=
sourceHitMap
.
containsKey
(
"content"
)
?
sourceHitMap
.
get
(
"content"
)
+
""
:
""
;
if
(
content
.
length
()
>
300
)
{
content
=
content
.
substring
(
0
,
300
);
}
}
if
(
title
.
isEmpty
())
{
title
=
sourceHitMap
.
containsKey
(
"title"
)
?
String
.
valueOf
(
sourceHitMap
.
get
(
"title"
))
:
""
;
;
}
res
.
put
(
"content"
,
content
);
res
.
put
(
"title"
,
title
);
// String source = res.getString("source")+"";
// if ((sourceHitMap.get("pt") + "").equals("微信")) {
// HuserInfoWeixin huserInfoWeixin = huserInfoDao.getHuserInfoWeixin(source);
// if (huserInfoWeixin != null) {
// res.put("channelIndex", huserInfoWeixin.getChannelIndex());
// } else {
// res.put("channelIndex", 1.0);
// }
// } else {
// HuserInfoMedia huserInfoMedia = huserInfoDao.getHuserInfoMedia(source);
// if (huserInfoMedia != null) {
// res.put("channelIndex", huserInfoMedia.getChannelIndex());
// } else {
// res.put("channelIndex", 1.0);
// }
// }
return
res
;
}
public
static
String
regxString
(
String
str
)
{
Pattern
p
=
Pattern
.
compile
(
"[http]{4}[:0-9a-zA-Z_/.]+.png"
);
Matcher
m
=
p
.
matcher
(
str
);
while
(
m
.
find
())
{
return
m
.
group
();
}
return
""
;
}
/**
* 判断是否标注所属固定平台
*
* @Title: mediaPt
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param
* type
* @param @param
* mid
* @param @param
* source
* @param @param
* pt
* @param @return
* 设定文件
* @return boolean 返回类型
*/
public
static
boolean
mediaPt
(
String
type
,
String
mid
,
String
source
,
String
pt
)
{
boolean
isPt
=
false
;
if
(
pt
.
equals
(
"网媒"
))
{
// "网媒", "百度新闻", "360新闻", "google新闻"
// 新旧系统网媒和今日头条混杂,一起取出来通过链接区分
if
((
type
.
equals
(
"网媒"
)
||
type
.
endsWith
(
"新闻"
)
||
type
.
endsWith
(
"自媒体"
)
||
type
.
equals
(
"插件标注"
)
||
type
.
equals
(
"一点资讯"
)
||
type
.
endsWith
(
"头条"
))
&&
(!
mid
.
contains
(
"mp.weixin.qq.com"
)))
{
if
(
mid
.
indexOf
(
"toutiao.com"
)
==
-
1
)
{
isPt
=
true
;
}
}
}
else
if
(
pt
.
equals
(
"微信"
))
{
if
(
mid
.
contains
(
"mp.weixin.qq.com"
))
{
isPt
=
true
;
}
}
else
if
(
pt
.
equals
(
"平媒"
))
{
if
(
type
.
equals
(
"平媒"
))
{
isPt
=
true
;
}
}
else
if
(
pt
.
equals
(
"今日头条"
))
{
// "网媒", "百度新闻", "360新闻", "google新闻"
// 新旧系统网媒和今日头条混杂,一起取出来通过链接区分
if
((
type
.
equals
(
"网媒"
)
||
type
.
endsWith
(
"新闻"
)
||
type
.
endsWith
(
"自媒体"
)
||
type
.
equals
(
"插件标注"
)
||
type
.
equals
(
"一点资讯"
)
||
type
.
endsWith
(
"头条"
))
&&
(!
mid
.
contains
(
"mp.weixin.qq.com"
)))
{
if
(
mid
.
indexOf
(
"toutiao.com"
)
!=
-
1
)
{
isPt
=
true
;
}
}
}
else
if
(
pt
.
equals
(
"客户端"
))
{
if
(
type
.
endsWith
(
"客户端"
)
&&
!
type
.
equals
(
"雪球客户端"
))
{
isPt
=
true
;
}
}
else
if
(
pt
.
equals
(
"贴吧论坛"
))
{
if
(
type
.
endsWith
(
"贴吧"
)
||
type
.
endsWith
(
"论坛"
))
{
isPt
=
true
;
}
}
else
if
(
pt
.
equals
(
"问答"
))
{
if
(
type
.
endsWith
(
"问答"
)
||
type
.
equals
(
"脉脉"
)
||
type
.
equals
(
"雪球客户端"
)
||
source
.
equals
(
"悟空问答"
))
{
isPt
=
true
;
}
}
else
if
(
pt
.
equals
(
"视频"
))
{
if
(
source
.
equals
(
"西瓜视频"
))
{
isPt
=
true
;
}
}
return
isPt
;
}
/**
* 获取标注所属固定平台 (下载用
*
* @Title: getMediaPt
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param
* type
* @param @param
* mid
* @param @param
* source
* @param @return
* 设定文件
* @return String 返回类型
*/
public
static
String
getMediaPt
(
String
type
,
String
mid
,
String
source
)
{
String
isPt
=
"网媒"
;
// "网媒", "百度新闻", "360新闻", "google新闻"
// 新旧系统网媒和今日头条混杂,一起取出来通过链接区分
if
((
type
.
equals
(
"网媒"
)
||
type
.
endsWith
(
"新闻"
)
||
type
.
endsWith
(
"自媒体"
)
||
type
.
equals
(
"插件标注"
)
||
type
.
equals
(
"一点资讯"
)
||
type
.
endsWith
(
"头条"
))
&&
(!
mid
.
contains
(
"mp.weixin.qq.com"
)))
{
if
(
mid
.
indexOf
(
"toutiao.com"
)
==
-
1
)
{
isPt
=
"网媒"
;
}
else
{
isPt
=
type
;
}
}
else
if
(
mid
.
contains
(
"mp.weixin.qq.com"
))
{
isPt
=
"微信"
;
}
else
if
(
type
.
equals
(
"平媒"
))
{
isPt
=
"平媒"
;
}
else
if
(
type
.
endsWith
(
"客户端"
)
&&
!
type
.
equals
(
"雪球客户端"
))
{
isPt
=
type
;
}
else
if
(
type
.
endsWith
(
"贴吧"
)
||
type
.
endsWith
(
"论坛"
))
{
isPt
=
"贴吧论坛"
;
}
else
if
(
type
.
endsWith
(
"问答"
)
||
type
.
equals
(
"脉脉"
)
||
type
.
equals
(
"雪球客户端"
)
||
source
.
equals
(
"悟空问答"
))
{
isPt
=
type
;
}
return
isPt
;
}
/**
* 获取标注所属固定平台 (简报用
*
* @Title: getBriefMediaPt
* @Description: TODO(这里用一句话描述这个方法的作用)
* @param @param
* type
* @param @param
* mid
* @param @param
* source
* @param @return
* 设定文件
* @return String 返回类型
*/
public
static
String
getBriefMediaPt
(
String
type
,
String
mid
,
String
source
)
{
String
isPt
=
"网媒"
;
// "网媒", "百度新闻", "360新闻", "google新闻"
// 新旧系统网媒和今日头条混杂,一起取出来通过链接区分
if
((
type
.
equals
(
"网媒"
)
||
type
.
endsWith
(
"新闻"
)
||
type
.
endsWith
(
"自媒体"
)
||
type
.
equals
(
"插件标注"
)
||
type
.
equals
(
"一点资讯"
)
||
type
.
endsWith
(
"头条"
))
&&
(!
mid
.
contains
(
"mp.weixin.qq.com"
)))
{
if
(
mid
.
indexOf
(
"toutiao.com"
)
==
-
1
)
{
isPt
=
"网媒"
;
}
else
{
isPt
=
"今日头条"
;
}
}
else
if
(
mid
.
contains
(
"mp.weixin.qq.com"
))
{
isPt
=
"微信"
;
}
else
if
(
type
.
equals
(
"平媒"
))
{
isPt
=
"平媒"
;
}
else
if
(
type
.
endsWith
(
"客户端"
)
&&
!
type
.
equals
(
"雪球客户端"
))
{
isPt
=
"客户端"
;
}
else
if
(
type
.
endsWith
(
"贴吧"
)
||
type
.
endsWith
(
"论坛"
))
{
isPt
=
"贴吧论坛"
;
}
else
if
(
type
.
endsWith
(
"问答"
)
||
type
.
equals
(
"脉脉"
)
||
type
.
equals
(
"雪球客户端"
)
||
source
.
equals
(
"悟空问答"
))
{
isPt
=
"问答"
;
}
return
isPt
;
}
public
static
String
getPlugInType
(
String
pt
)
{
String
res
=
"网媒"
;
switch
(
pt
)
{
case
"百度新闻"
:
res
=
"百度新闻"
;
break
;
case
"搜狗新闻"
:
res
=
"搜狗新闻"
;
break
;
case
"360新闻"
:
res
=
"360新闻"
;
break
;
default
:
break
;
}
return
res
;
}
}
src/main/resources/middleware.properties
0 → 100644
View file @
63816bf1
middleware.zookeeperIp
=
zookeeper://192.168.0.234:2181
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment