Commit 832c8657 by shentao

2018/10/12 新版采集消息流线上版1.0.0

parent 01a0668c
......@@ -200,6 +200,14 @@
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>2.0.4.RELEASE</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
......
......@@ -41,7 +41,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
@Override
public WeiboMessage getWeiboBean(SearchHit searchHit) {
JSONObject wmJson = getBean(searchHit);
/**
......@@ -54,8 +54,8 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
WeiboMessage wm = new WeiboMessage();
try {
message = mapper.writeValueAsString(sourceHitMap);
// JSONObject mesJson = JSONObject.parseObject(message);
// System.out.println(searchHit.getType());
// JSONObject mesJson = JSONObject.parseObject(message);
// System.out.println(searchHit.getType());
wm = mapper.readValue(message, WeiboMessage.class);
} catch (IOException e) {
e.printStackTrace();
......@@ -95,23 +95,23 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
HuserInfoWeibo huserInfoWeibo = huserInfoDao.getHuserInfoWeibo(String.valueOf(sourceHitMap.get("user_id")));
wm.setChannelIndex(huserInfoWeibo == null ? 1.0 : huserInfoWeibo.getChannelIndex());
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(wm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(wm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
return wm;
}
@Override
public ZhihuMessage getZhihuBean(SearchHit searchHit) {
JSONObject wmJson = getBean(searchHit);
Map<String, Object> sourceHitMap = searchHit.getSource();
Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
......@@ -235,24 +235,23 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
zm.setImg(imgUrl);
}
}
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(zm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(zm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
return zm;
}
@Override
public VideoMessage getVideoBean(SearchHit searchHit) {
JSONObject wmJson = getBean(searchHit);
Map<String, Object> sourceHitMap = searchHit.getSource();
Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
......@@ -290,23 +289,23 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
title = vm.getTitle();
}
vm.setTitle(title);
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(vm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(vm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
return vm;
}
@Override
public MediaMessage getMediaBean(SearchHit searchHit) {
JSONObject wmJson = getBean(searchHit);
Map<String, Object> sourceHitMap = searchHit.getSource();
Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
......@@ -385,15 +384,15 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
mm.setChannelIndex(1.0);
}
}
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(mm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// System.err.println("json"+wmJson);
// try {
// System.err.println("mapper"+mapper.writeValueAsString(mm));
// } catch (JsonProcessingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
return mm;
}
......@@ -412,10 +411,11 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
* ES数据读取
*/
Map<String, Object> sourceHitMap = searchHit.getSource();
// Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
// Map<String, HighlightField> highlightFieldsHitMap =
// searchHit.getHighlightFields();
String message;
JSONObject res =new JSONObject();
JSONObject res = new JSONObject();
try {
message = mapper.writeValueAsString(sourceHitMap);
res = JSONObject.parseObject(message);
......@@ -424,32 +424,34 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
e.printStackTrace();
}
String type = searchHit.getType();
Long rstime = null!=searchHit.getSource().get("rstime")?Long.valueOf(searchHit.getSource().get("rstime")+""):null;
if(type.equals("status")&&null!=rstime) {
//微博
res = handleWeiboBean(searchHit,res);
}else if(type.equals("zhihu")) {
//zhihu
res = handleZhihuBean(searchHit,res);
}else if(type.equals("video")) {
//视频
res = handleVideoBean(searchHit,res);
}else {
//网媒
res = handleMediaBean(searchHit,res);
Long rstime = null != searchHit.getSource().get("rstime")
? Long.valueOf(searchHit.getSource().get("rstime") + "")
: null;
if (type.equals("status") && null != rstime) {
// 微博
res = handleWeiboBean(searchHit, res);
} else if (type.equals("zhihu")) {
// zhihu
res = handleZhihuBean(searchHit, res);
} else if (type.equals("video")) {
// 视频
res = handleVideoBean(searchHit, res);
} else {
// 网媒
res = handleMediaBean(searchHit, res);
}
return res;
}
private JSONObject handleWeiboBean(SearchHit searchHit, JSONObject res) {
/**
* ES数据读取
*/
Map<String, Object> sourceHitMap = searchHit.getSource();
Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
String text = "";
String roottext = "";
Text[] textlist = highlightFieldsHitMap.containsKey("text") ? highlightFieldsHitMap.get("text").getFragments()
......@@ -485,7 +487,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
HuserInfoWeibo huserInfoWeibo = huserInfoDao.getHuserInfoWeibo(String.valueOf(sourceHitMap.get("user_id")));
res.put("channelIndex", huserInfoWeibo == null ? 1.0 : huserInfoWeibo.getChannelIndex());
return res;
}
......@@ -495,18 +497,16 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
res.put("_id", searchHit.getId());
res.put("markPt", "知乎");
String insert_at = res.getString("insert_at");
if (insert_at != null && !insert_at.equals("") && !insert_at.replaceFirst("000", "").equals("")) {
Date insertDate = TimeUtil.parseTime(insert_at.replaceFirst("000", ""),
"yyyy-MM-dd'T'HH:mm:ss.SSS");
Date insertDate = TimeUtil.parseTime(insert_at.replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss.SSS");
if (insertDate != null)
res.put("insert_at", insertDate.toString());
}
String update_at = res.getString("update_at");
if (update_at != null && !update_at.equals("") && !update_at.replaceFirst("000", "").equals("")) {
Date updateDate = TimeUtil.parseTime(update_at.replaceFirst("000", ""),
"yyyy-MM-dd'T'HH:mm:ss.SSS");
Date updateDate = TimeUtil.parseTime(update_at.replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss.SSS");
if (updateDate != null)
res.put("update_at", updateDate.toString());
}
......@@ -574,14 +574,14 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
}
if (questionTitle.isEmpty()) {
questionTitle = res.getString("question_title")+"";
questionTitle = res.getString("question_title") + "";
if (questionTitle.length() > 300) {
questionTitle = questionTitle.substring(0, 300);
}
}
if (questionContent.isEmpty()) {
questionContent = res.getString("question_content")+"";
questionContent = res.getString("question_content") + "";
if (questionContent.length() > 300) {
questionContent = questionContent.substring(0, 300);
......@@ -589,7 +589,7 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
}
if (answerContent.isEmpty()) {
answerContent = res.getString("answer_content")+"";
answerContent = res.getString("answer_content") + "";
if (answerContent.length() > 300) {
answerContent = answerContent.substring(0, 300);
}
......@@ -599,12 +599,13 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
res.put("question_content", questionContent.replaceAll("<img[^>]*>", ""));
res.put("answer_content", answerContent.replaceAll("<img[^>]*>", ""));
res.put("img", img);
if (res.getString("img_url") != null) {
String imgUrl = res.getString("img_url");
if (null != imgUrl && "".equals(imgUrl)) {
res.put("img", imgUrl);;
res.put("img", imgUrl);
;
}
}
return res;
......@@ -615,20 +616,24 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
Map<String, HighlightField> highlightFieldsHitMap = searchHit.getHighlightFields();
res.put("markPt", "视频");
res.put("_id", searchHit.getId());
res.put("_id", searchHit.getId());
try {
String vtime = TimeUtil
.parseTime(sourceHitMap.get("time").toString().replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss")
.toString();
.parseTime(sourceHitMap.get("time").toString().replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss")
.toString();
res.put("time", vtime);
} catch (Exception e) {
log.error("videotime为空" + searchHit.getId());
}
String title = "";
String content = "";
Text[] titlelist = highlightFieldsHitMap.containsKey("title")
? highlightFieldsHitMap.get("title").getFragments()
: null;
Text[] contentlist = highlightFieldsHitMap.containsKey("content")
? highlightFieldsHitMap.get("content").getFragments()
: null;
if (titlelist != null && titlelist.length > 0) {
for (Text string : titlelist) {
// 通过高亮位置截取过长字符串
......@@ -636,12 +641,34 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
title = title + cutStr;
}
}
if (contentlist != null && contentlist.length > 0) {
for (Text string : contentlist) {
// 通过高亮位置截取过长字符串
String cutStr = string.string();
if (cutStr.length() > 150) {
int i = cutStr.indexOf("<font");
if (i > 10) {
cutStr = cutStr.substring(i - 10, cutStr.length());
cutStr = "……" + cutStr;
}
}
content = content + cutStr;
}
}
if (content.isEmpty()) {
content = sourceHitMap.containsKey("content") ? sourceHitMap.get("content") + "" : "";
if (content.length() > 300) {
content = content.substring(0, 300);
}
}
if (title.isEmpty()) {
title = String.valueOf(sourceHitMap.get("title"));
}
res.put("title", title);
res.put("content", content);
res.put("title", title);
return res;
}
......@@ -652,9 +679,9 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
res.put("markPt", "网媒");
res.put("_id", searchHit.getId());
try {
String mtime=TimeUtil
.parseTime(sourceHitMap.get("time").toString().replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss")
.toString();
String mtime = TimeUtil
.parseTime(sourceHitMap.get("time").toString().replaceFirst("000", ""), "yyyy-MM-dd'T'HH:mm:ss")
.toString();
res.put("time", mtime);
} catch (Exception e) {
log.error("mediatime为空" + searchHit.getId());
......@@ -699,13 +726,14 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
}
}
if (title.isEmpty()) {
title = sourceHitMap.containsKey("title") ? String.valueOf(sourceHitMap.get("title")) : "";;
title = sourceHitMap.containsKey("title") ? String.valueOf(sourceHitMap.get("title")) : "";
;
}
res.put("content", content);
res.put("title", title);
String source = res.getString("source")+"";
String source = res.getString("source") + "";
if ((sourceHitMap.get("pt") + "").equals("微信")) {
HuserInfoWeixin huserInfoWeixin = huserInfoDao.getHuserInfoWeixin(source);
if (huserInfoWeixin != null) {
......@@ -724,5 +752,4 @@ public class HighLightFillingServiceImpl implements HighLightFillingService {
return res;
}
}
......@@ -10,12 +10,15 @@ import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.messageflow.DirectES4RedisThread;
import com.zhiwei.messageflow.bean.MediaMessage;
import com.zhiwei.messageflow.bean.VideoMessage;
import com.zhiwei.messageflow.bean.WeiboMessage;
......@@ -29,6 +32,7 @@ import com.zhiwei.messageflow.util.ESQueryUtil;
@Component
public class NoiseProcessingServiceImpl implements NoiseProcessingService {
private final static Logger log = LogManager.getLogger(NoiseProcessingServiceImpl.class);
@Autowired
private HighLightFillingService highLightFillingService;
......@@ -637,7 +641,7 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
private boolean isTimeOver(String time) {
long current=System.currentTimeMillis();//当前时间毫秒数
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数
long zero=current-(24*3600*1000L);//前一天毫秒数
Date date = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
// SimpleDateFormat dateFormat = new SimpleDateFormat("EEE MMM d HH:mm:ss 'CST'
......@@ -647,7 +651,7 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
try {
date = dateFormat.parse(time);
if ((date.getTime() + (8 * 3600 * 1000L)) < zero) {
System.err.println(date.getTime() + (8 * 3600 * 1000L)+"|"+zero);
log.info(date.getTime() + (8 * 3600 * 1000L)+"|"+zero);
return true;
}
} catch (ParseException e) {
......@@ -677,15 +681,15 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
String time = map.get("time") != null ? map.get("time").toString() : null;
if (isTimeOver(time)) {
System.err.println("weibo:time:" + time + map.get("id").toString());
return isnoise;
log.info("weibo:time:" + time +"\t"+ map.get("id").toString());
return true;
}
String text = map.get("text") != null ? map.get("text").toString() : null;
String roottext = map.get("roottext") != null ? map.get("roottext").toString() : null;
String username = map.get("username") != null ? map.get("username").toString() : null;
if (null == username) {
return isnoise;
return true;
}
if (noiseRules != null) {
for (NoiseRule n : noiseRules) {
......@@ -782,8 +786,8 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
String platform = platformNew.getPlatformName();
String time = map.get("created_at") != null ? map.get("created_at").toString() : null;
if (isTimeOver(time)) {
System.err.println("zhihu:time:" + time + map.get("id").toString());
return isnoise;
log.info("zhihu:time:" + time +"\t"+ map.get("id").toString());
return true;
}
String text = map.get("question_title") != null ? map.get("question_title").toString() : null;
String roottext = map.get("answer_content") != null ? map.get("answer_content").toString() : null;
......@@ -913,8 +917,8 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
String platform = platformNew.getPlatformName();
String time = map.get("time") != null ? map.get("time").toString() : null;
if (isTimeOver(time)) {
System.err.println("video:time:" + time + map.get("id").toString());
return isnoise;
log.info("video:time:" + time +"\t"+ map.get("id").toString());
return true;
}
String text = map.get("title") != null ? map.get("title").toString() : null;
String roottext = null;
......@@ -1043,15 +1047,15 @@ public class NoiseProcessingServiceImpl implements NoiseProcessingService {
String platform = platformNew.getPlatformName();
String time = map.get("time") != null ? map.get("time").toString() : null;
if (isTimeOver(time)) {
System.err.println("media:time:" + time + map.get("id").toString());
return isnoise;
log.info("media:time:" + time +"\t"+ map.get("id").toString());
return true;
}
String text = map.get("title") != null ? map.get("title").toString() : null;
String roottext = map.get("content") != null ? map.get("content").toString() : null;
String username = map.get("source") != null ? map.get("source").toString() : null;
if (null == username) {
return isnoise;
return true;
}
if (noiseRules != null) {
for (NoiseRule n : noiseRules) {
......
......@@ -136,7 +136,7 @@ public class RedisPoolAndTools {
}
/**
* 删除超出上限的数据
* 删除超出上限的数据(弃用
*
* @param key
* @param removeIndex
......@@ -153,9 +153,26 @@ public class RedisPoolAndTools {
jedis.zremrangeByRank(key, 0, removeIndex);
returnResource(jedis);
}
/**
* 获取有序集合消息数量
* 删除超出上限的数据
*
* @param key
* @param removeIndex
*/
public void removeDataByName(String key, int removeIndex) {
Jedis jedis = getJedis();
while (true) {
if (null != jedis) {
break;
} else {
jedis = getJedis();
}
}
jedis.zremrangeByRank(key, 0, removeIndex);
returnResource(jedis);
}
/**
* 获取有序集合消息数量(弃用
*
* @param key
* @return
......@@ -173,6 +190,25 @@ public class RedisPoolAndTools {
returnResource(jedis);
return nowCount;
}
/**
* 获取有序集合消息数量
*
* @param key
* @return
*/
public Long getNowCount(String key) {
Jedis jedis = getJedis();
while (true) {
if (null != jedis) {
break;
} else {
jedis = getJedis();
}
}
Long nowCount = jedis.zcard(key);
returnResource(jedis);
return nowCount;
}
/**
* 分页获取redis数据
......
......@@ -61,10 +61,10 @@ public class RedisServiceImpl implements RedisService {
/**
* 删除超出存储上限的数据
*/
long nowCount = redisPoolAndTools.getNowCount(redisKey.getBytes());// 当前数据量
long nowCount = redisPoolAndTools.getNowCount(redisKey);// 当前数据量
int removeIndex = (int) (nowCount - maxSize);// 需移除数据数量
if (maxSize > 0 && nowCount > maxSize) {
redisPoolAndTools.removeDataByName(redisKey.getBytes(), removeIndex);
redisPoolAndTools.removeDataByName(redisKey, removeIndex);
}
}
......@@ -84,10 +84,10 @@ public class RedisServiceImpl implements RedisService {
/**
* 删除超出存储上限的数据
*/
long nowCount = redisPoolAndTools.getNowCount(redisKey.getBytes());// 当前数据量
long nowCount = redisPoolAndTools.getNowCount(redisKey);// 当前数据量
int removeIndex = (int) (nowCount - maxSize);// 需移除数据数量
if (maxSize > 0 && nowCount > maxSize) {
redisPoolAndTools.removeDataByName(redisKey.getBytes(), removeIndex);
redisPoolAndTools.removeDataByName(redisKey, removeIndex);
}
}
......@@ -107,10 +107,10 @@ public class RedisServiceImpl implements RedisService {
/**
* 删除超出存储上限的数据
*/
long nowCount = redisPoolAndTools.getNowCount(redisKey.getBytes());// 当前数据量
long nowCount = redisPoolAndTools.getNowCount(redisKey);// 当前数据量
int removeIndex = (int) (nowCount - maxSize);// 需移除数据数量
if (maxSize > 0 && nowCount > maxSize) {
redisPoolAndTools.removeDataByName(redisKey.getBytes(), removeIndex);
redisPoolAndTools.removeDataByName(redisKey, removeIndex);
}
}
......@@ -130,10 +130,10 @@ public class RedisServiceImpl implements RedisService {
/**
* 删除超出存储上限的数据
*/
long nowCount = redisPoolAndTools.getNowCount(redisKey.getBytes());// 当前数据量
long nowCount = redisPoolAndTools.getNowCount(redisKey);// 当前数据量
int removeIndex = (int) (nowCount - maxSize);// 需移除数据数量
if (maxSize > 0 && nowCount > maxSize) {
redisPoolAndTools.removeDataByName(redisKey.getBytes(), removeIndex);
redisPoolAndTools.removeDataByName(redisKey, removeIndex);
}
}
......@@ -148,10 +148,10 @@ public class RedisServiceImpl implements RedisService {
/**
* 删除超出存储上限的数据
*/
long nowCount = redisPoolAndTools.getNowCount(redisKey.getBytes());// 当前数据量
long nowCount = redisPoolAndTools.getNowCount(redisKey);// 当前数据量
int removeIndex = (int) (nowCount - maxSize);// 需移除数据数量
if (maxSize > 0 && nowCount > maxSize) {
redisPoolAndTools.removeDataByName(redisKey.getBytes(), removeIndex);
redisPoolAndTools.removeDataByName(redisKey, removeIndex);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment