Commit cb0139b9 by leiliangliang

微博热搜新增真实字段

parent e07e6507
...@@ -28,12 +28,14 @@ public class HotSearchCacheDAO { ...@@ -28,12 +28,14 @@ public class HotSearchCacheDAO {
/** /**
* 存储数据 * 存储数据
*
* @param dataList * @param dataList
* @return * @return
*/ */
public List<Document> addData(List<HotSearchList> dataList){ public List<Document> addData(List<HotSearchList> dataList) {
List<Document> dataes = new ArrayList<>(); List<Document> dataes = new ArrayList<>();
dataList.forEach(hotSearch ->{ int realRank = 0;
for (HotSearchList hotSearch : dataList) {
Document document = new Document(); Document document = new Document();
document.put("_id", hotSearch.getId()); document.put("_id", hotSearch.getId());
document.put("name", hotSearch.getName()); document.put("name", hotSearch.getName());
...@@ -46,126 +48,140 @@ public class HotSearchCacheDAO { ...@@ -46,126 +48,140 @@ public class HotSearchCacheDAO {
document.put("type", hotSearch.getType()); document.put("type", hotSearch.getType());
document.put("icon", hotSearch.getIcon()); document.put("icon", hotSearch.getIcon());
document.put("rankPic", hotSearch.getRankPic()); document.put("rankPic", hotSearch.getRankPic());
if("微博话题".equals(hotSearch.getType())){ if ("微博话题".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead()); document.put("topic_lead", hotSearch.getTopicLead());
document.put("comment_count", hotSearch.getCommentCount()); document.put("comment_count", hotSearch.getCommentCount());
} }
if("微博热搜".equals(hotSearch.getType())){ if ("微博热搜".equals(hotSearch.getType())) {
document.put("iconUrl", hotSearch.getIconUrl()); document.put("iconUrl", hotSearch.getIconUrl());
document.put("heatLabel", hotSearch.getHeatLabel()); document.put("heatLabel", hotSearch.getHeatLabel());
if (Objects.nonNull(document.getLong("count")) && 0L != document.getLong("count")) {
realRank = realRank + 1;
document.put("realRank", realRank);
} else if (Objects.isNull(document.getLong("count"))) {
document.put("realRank", 0);
} else if (0L == document.getLong("count")) {
document.put("realRank", -1);
}
} }
if("微博娱乐榜".equals(hotSearch.getType())){ if ("微博娱乐榜".equals(hotSearch.getType())) {
document.put("iconUrl", hotSearch.getIconUrl()); document.put("iconUrl", hotSearch.getIconUrl());
} }
// if("今日头条热搜".equals(hotSearch.getType())){ // if("今日头条热搜".equals(hotSearch.getType())){
// document.put("comment_count", hotSearch.getCommentCount()); // document.put("comment_count", hotSearch.getCommentCount());
// } // }
if("虎嗅热文推荐".equals(hotSearch.getType())){ if ("虎嗅热文推荐".equals(hotSearch.getType())) {
document.put("comment_count", hotSearch.getCommentCount()); document.put("comment_count", hotSearch.getCommentCount());
} }
if("微博要闻榜".equals(hotSearch.getType())){ if ("微博要闻榜".equals(hotSearch.getType())) {
document.put("comment_count", hotSearch.getCommentCount()); document.put("comment_count", hotSearch.getCommentCount());
document.put("iconUrl", hotSearch.getIconUrl()); document.put("iconUrl", hotSearch.getIconUrl());
document.put("downtext", hotSearch.getDowntext()); document.put("downtext", hotSearch.getDowntext());
} }
if("百度热搜".equals(hotSearch.getType())){ if ("百度热搜".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead()); document.put("topic_lead", hotSearch.getTopicLead());
} }
if("腾讯较真榜".equals(hotSearch.getType())){ if ("腾讯较真榜".equals(hotSearch.getType())) {
document.put("topic_result",hotSearch.getTopicResult()); document.put("topic_result", hotSearch.getTopicResult());
} }
if ("B站排行榜".equals(hotSearch.getType())){ if ("B站排行榜".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead()); document.put("topic_lead", hotSearch.getTopicLead());
document.put("view",hotSearch.getView()); document.put("view", hotSearch.getView());
document.put("barrage",hotSearch.getBarrage()); document.put("barrage", hotSearch.getBarrage());
document.put("pictureUrl",hotSearch.getPictureUrl()); document.put("pictureUrl", hotSearch.getPictureUrl());
} }
addAndUpdateData(document); addAndUpdateData(document);
if("百度热搜".equals(hotSearch.getType())){ if ("百度热搜".equals(hotSearch.getType())) {
document.remove("topic_lead"); document.remove("topic_lead");
} }
if("微博要闻榜".equals(hotSearch.getType())){ if ("微博要闻榜".equals(hotSearch.getType())) {
document.remove("downtext"); document.remove("downtext");
} }
dataes.add(document); dataes.add(document);
}); }
return dataes; return dataes;
} }
/** /**
* 添加及更新相应数据表中的数据 * 添加及更新相应数据表中的数据
* @param document * @param document
*/ */
public void addAndUpdateData(Document document){ public void addAndUpdateData(Document document) {
try { try {
String name = document.getString("name"); String name = document.getString("name");
String type = document.getString("type"); String type = document.getString("type");
Integer lastRank = document.getInteger("rank")!=null?document.getInteger("rank"): null; Integer lastRank = document.getInteger("rank") != null ? document.getInteger("rank") : null;
Long lastCount = document.getLong("count")!=null?document.getLong("count"): null; Integer realLastRank = document.getInteger("realRank") != null ? document.getInteger("realRank") : null;
Long lastCount = document.getLong("count") != null ? document.getLong("count") : null;
Date startTime = document.getDate("time"); Date startTime = document.getDate("time");
Date endTime = getEndTime(type, startTime); Date endTime = getEndTime(type, startTime);
String topicLead = document.getString("topic_lead")!=null?document.getString("topic_lead"):null; String topicLead = document.getString("topic_lead") != null ? document.getString("topic_lead") : null;
boolean hot = document.getBoolean("hot")!=null?document.getBoolean("hot"):true; boolean hot = document.getBoolean("hot") != null ? document.getBoolean("hot") : true;
String url = document.getString("url")!=null?document.getString("url"):null; String url = document.getString("url") != null ? document.getString("url") : null;
String topicResult = document.getString("topic_result")!=null?document.getString("topic_result"):null; String topicResult = document.getString("topic_result") != null ? document.getString("topic_result") : null;
String pictureUrl = document.getString("pictureUrl")!=null?document.getString("pictureUrl"):null; String pictureUrl = document.getString("pictureUrl") != null ? document.getString("pictureUrl") : null;
String id = name + "_" + type; String id = name + "_" + type;
boolean recommend = false; boolean recommend = false;
// Integer readCount = document.getInteger("comment_count"); // Integer readCount = document.getInteger("comment_count");
if("微博热搜".equals(type)){ if ("微博热搜".equals(type)) {
//排位标判断 例如 https://simg.s.weibo.com/20210408_search_point_orange.png //排位标判断 例如 https://simg.s.weibo.com/20210408_search_point_orange.png
String rankPic = document.getString("rankPic"); String rankPic = document.getString("rankPic");
if (nonNull(rankPic)&&rankPic.contains("search_point")){ if (nonNull(rankPic) && rankPic.contains("search_point")) {
recommend = true; recommend = true;
} }
//后标判断 //后标判断
String icon = document.getString("icon"); String icon = document.getString("icon");
if("recom".equals(icon) || "jian".equals(icon)){ if ("recom".equals(icon) || "jian".equals(icon)) {
recommend = true; recommend = true;
} }
} }
Document query = new Document("_id", id); Document query = new Document("_id", id);
//判断是否为微博推荐位,推荐位微博无排名,所以不纳入总的缓存表 //判断是否为微博推荐位,推荐位微博无排名,所以不纳入总的缓存表
if(nonNull(lastRank) && lastRank >0 ){ if (nonNull(lastRank) && lastRank > 0) {
Document nowDoc = (Document) collection.find(query).first(); Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) { if (nonNull(nowDoc)) {
Integer highestRank = nowDoc.getInteger("highestRank"); Integer highestRank = nowDoc.getInteger("highestRank");
Long highestCount = nonNull(nowDoc.get("highestCount"))?Long.valueOf(nowDoc.get("highestCount").toString()):null; Integer realHighestRank = nowDoc.getInteger("realHighestRank");
Long highestCount = nonNull(nowDoc.get("highestCount")) ? Long.valueOf(nowDoc.get("highestCount").toString()) : null;
Integer preRank = nowDoc.getInteger("lastRank"); Integer preRank = nowDoc.getInteger("lastRank");
Long preCount = nonNull(nowDoc.get("lastCount"))?Long.valueOf(nowDoc.get("lastCount").toString()):null; Long preCount = nonNull(nowDoc.get("lastCount")) ? Long.valueOf(nowDoc.get("lastCount").toString()) : null;
String lastUrl = nowDoc.getString("url"); String lastUrl = nowDoc.getString("url");
//判断最大热度值 //判断最大热度值
if (nonNull(lastCount) && nonNull(highestCount) && lastCount > highestCount) { if (nonNull(lastCount) && nonNull(highestCount) && lastCount > highestCount) {
highestCount = lastCount; highestCount = lastCount;
} }
//判断最高排名 //判断最高排名
if (nonNull(lastRank) && highestRank<0){ if (nonNull(lastRank) && highestRank < 0) {
highestRank = lastRank; highestRank = lastRank;
} }
if (lastRank>0 && highestRank>0 && lastRank < highestRank) { if (lastRank > 0 && highestRank > 0 && lastRank < highestRank) {
highestRank = lastRank; highestRank = lastRank;
} }
//判断真实最高排名
if (nonNull(realLastRank) && realHighestRank < 0) {
realHighestRank = realLastRank;
}
if (realLastRank > 0 && realHighestRank > 0 && realLastRank < realHighestRank) {
realHighestRank = realLastRank;
}
//计算热搜时长 //计算热搜时长
int duration = nowDoc.getInteger("duration"); int duration = nowDoc.getInteger("duration");
int durationNow = getDuration(type, duration); int durationNow = getDuration(type, duration);
//计算上升速度 //计算上升速度
double riseSpeed = nowDoc.containsKey("riseSpeed")?nowDoc.getDouble("riseSpeed"):0.00; double riseSpeed = nowDoc.containsKey("riseSpeed") ? nowDoc.getDouble("riseSpeed") : 0.00;
if(nonNull(lastCount) && nowDoc.containsKey("firstCount")) { if (nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
long firstCount =Long.parseLong(nowDoc.get("firstCount").toString()); long firstCount = Long.parseLong(nowDoc.get("firstCount").toString());
riseSpeed = ((double)(lastCount - firstCount)/(double)firstCount)*1000/((double)duration); riseSpeed = ((double) (lastCount - firstCount) / (double) firstCount) * 1000 / ((double) duration);
} }
// endTime = getEndTime(type, new Date()); // endTime = getEndTime(type, new Date());
//更新相应信息 //更新相应信息
if(url != null && !url.equals(lastUrl)){ if (url != null && !url.equals(lastUrl)) {
nowDoc.put("url",url); nowDoc.put("url", url);
} }
nowDoc.put("endTime", endTime); nowDoc.put("endTime", endTime);
nowDoc.put("lastRank", lastRank); nowDoc.put("lastRank", lastRank);
...@@ -175,17 +191,21 @@ public class HotSearchCacheDAO { ...@@ -175,17 +191,21 @@ public class HotSearchCacheDAO {
nowDoc.put("preRank", preRank); nowDoc.put("preRank", preRank);
nowDoc.put("preCount", preCount); nowDoc.put("preCount", preCount);
nowDoc.put("duration", durationNow); nowDoc.put("duration", durationNow);
nowDoc.put("recommend",recommend); nowDoc.put("recommend", recommend);
nowDoc.put("riseSpeed",riseSpeed); nowDoc.put("riseSpeed", riseSpeed);
if ("微博热搜".equals(type)){
nowDoc.put("realLastRank", realLastRank);
nowDoc.put("realHighestRank", realHighestRank);
}
// if(readCount != null){ // if(readCount != null){
// nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
// } // }
if(topicResult != null){ if (topicResult != null) {
nowDoc.put("topicResult",topicResult); nowDoc.put("topicResult", topicResult);
} }
if(picTypes.contains(type)){ if (picTypes.contains(type)) {
if(Strings.isNotEmpty(pictureUrl)){ if (Strings.isNotEmpty(pictureUrl)) {
if(!nowDoc.containsKey("pictureUrl") || !nowDoc.getString("pictureUrl").equals(pictureUrl)) { if (!nowDoc.containsKey("pictureUrl") || !nowDoc.getString("pictureUrl").equals(pictureUrl)) {
nowDoc.put("pictureUrl", pictureUrl); nowDoc.put("pictureUrl", pictureUrl);
} }
} }
...@@ -202,6 +222,10 @@ public class HotSearchCacheDAO { ...@@ -202,6 +222,10 @@ public class HotSearchCacheDAO {
nowDoc.put("type", type); nowDoc.put("type", type);
nowDoc.put("lastRank", lastRank); nowDoc.put("lastRank", lastRank);
nowDoc.put("highestRank", lastRank); nowDoc.put("highestRank", lastRank);
if ("微博热搜".equals(type)){
nowDoc.put("realLastRank", realLastRank);
nowDoc.put("realHighestRank", realLastRank);
}
nowDoc.put("lastCount", lastCount); nowDoc.put("lastCount", lastCount);
nowDoc.put("highestCount", lastCount); nowDoc.put("highestCount", lastCount);
nowDoc.put("startTime", startTime); nowDoc.put("startTime", startTime);
...@@ -209,31 +233,31 @@ public class HotSearchCacheDAO { ...@@ -209,31 +233,31 @@ public class HotSearchCacheDAO {
nowDoc.put("duration", durationNow); nowDoc.put("duration", durationNow);
nowDoc.put("preRank", null); nowDoc.put("preRank", null);
nowDoc.put("preCount", null); nowDoc.put("preCount", null);
nowDoc.put("recommend",recommend); nowDoc.put("recommend", recommend);
nowDoc.put("firstCount",lastCount); nowDoc.put("firstCount", lastCount);
nowDoc.put("riseSpeed",0.00); nowDoc.put("riseSpeed", 0.00);
// if(readCount != null){ // if(readCount != null){
// nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
// } // }
if("虎嗅热文推荐".equals(type)){ if ("虎嗅热文推荐".equals(type)) {
nowDoc.put("comment_count",document.getLong("comment_count")); nowDoc.put("comment_count", document.getLong("comment_count"));
} }
if("微博要闻榜".equals(type)){ if ("微博要闻榜".equals(type)) {
nowDoc.put("downtext",document.getString("downtext")); nowDoc.put("downtext", document.getString("downtext"));
nowDoc.put("comment_count",document.getLong("comment_count")); nowDoc.put("comment_count", document.getLong("comment_count"));
} }
if(topicResult != null){ if (topicResult != null) {
nowDoc.put("topicResult",topicResult); nowDoc.put("topicResult", topicResult);
} }
if(picTypes.contains(type)){ if (picTypes.contains(type)) {
nowDoc.put("pictureUrl",pictureUrl); nowDoc.put("pictureUrl", pictureUrl);
} }
if("微博热搜".equals(type)){ if ("微博热搜".equals(type)) {
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc); nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//更新微博话题贡献者,关于功能 //更新微博话题贡献者,关于功能
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc); Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) { if (documentPC.containsKey("分类")) {
nowDoc.put("classify",documentPC.get("分类")); nowDoc.put("classify", documentPC.get("分类"));
} }
if (documentPC.containsKey("地区")) { if (documentPC.containsKey("地区")) {
nowDoc.put("region", documentPC.get("地区")); nowDoc.put("region", documentPC.get("地区"));
...@@ -241,24 +265,24 @@ public class HotSearchCacheDAO { ...@@ -241,24 +265,24 @@ public class HotSearchCacheDAO {
if (documentPC.containsKey("标签")) { if (documentPC.containsKey("标签")) {
nowDoc.put("label", documentPC.get("标签")); nowDoc.put("label", documentPC.get("标签"));
} }
if(nowDoc.containsKey("topicLead")){ if (nowDoc.containsKey("topicLead")) {
nowDoc.put("topicLead", nowDoc.getString("topicLead")); nowDoc.put("topicLead", nowDoc.getString("topicLead"));
} }
if(nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) { if (nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount"))?Long.valueOf(nowDoc.get("readCount").toString()):null); nowDoc.put("readCount", nonNull(nowDoc.get("readCount")) ? Long.valueOf(nowDoc.get("readCount").toString()) : null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount"))?Long.valueOf(nowDoc.get("discussCount").toString()):null); nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount")) ? Long.valueOf(nowDoc.get("discussCount").toString()) : null);
} }
if (nowDoc.containsKey("pictureUrl")) { if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl")); nowDoc.put("pictureUrl", nowDoc.getString("pictureUrl"));
} }
if (nowDoc.containsKey("downtext")) { if (nowDoc.containsKey("downtext")) {
nowDoc.put("downtext",nowDoc.getString("downtext")); nowDoc.put("downtext", nowDoc.getString("downtext"));
} }
} }
collection.insertOne(nowDoc); collection.insertOne(nowDoc);
} }
} }
}catch (Exception e){ } catch (Exception e) {
log.error("数据存储时出错:", e); log.error("数据存储时出错:", e);
} }
} }
...@@ -267,12 +291,12 @@ public class HotSearchCacheDAO { ...@@ -267,12 +291,12 @@ public class HotSearchCacheDAO {
* 抖音链接更新 * 抖音链接更新
* @param document * @param document
*/ */
public void updateDouyinUrl(Document document){ public void updateDouyinUrl(Document document) {
String id = (String) document.get("id"); String id = (String) document.get("id");
Document query = new Document("_id", id); Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first(); Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) { if (nonNull(nowDoc)) {
nowDoc.put("url",document.get("url")); nowDoc.put("url", document.get("url"));
collection.replaceOne(query, nowDoc); collection.replaceOne(query, nowDoc);
} }
} }
...@@ -297,7 +321,7 @@ public class HotSearchCacheDAO { ...@@ -297,7 +321,7 @@ public class HotSearchCacheDAO {
* @param duration * @param duration
* @return * @return
*/ */
private int getDuration(String type, int duration){ private int getDuration(String type, int duration) {
// switch (type){ // switch (type){
// case "微博热搜" : // case "微博热搜" :
// duration = duration + 1; // duration = duration + 1;
...@@ -323,9 +347,9 @@ public class HotSearchCacheDAO { ...@@ -323,9 +347,9 @@ public class HotSearchCacheDAO {
// default : // default :
// duration = duration + 1; // duration = duration + 1;
// } // }
if("脉脉热榜".equals(type)){ if ("脉脉热榜".equals(type)) {
duration = duration + 30; duration = duration + 30;
}else { } else {
duration = duration + 1; duration = duration + 1;
} }
return duration; return duration;
...@@ -338,8 +362,8 @@ public class HotSearchCacheDAO { ...@@ -338,8 +362,8 @@ public class HotSearchCacheDAO {
* @param time * @param time
* @return * @return
*/ */
private Date getEndTime(String type, Date time){ private Date getEndTime(String type, Date time) {
long timeLong = time.getTime() + 1*60*1000; long timeLong = time.getTime() + 1 * 60 * 1000;
// switch (type){ // switch (type){
// case "微博热搜" : // case "微博热搜" :
// timeLong = timeLong + 1*60*1000; // timeLong = timeLong + 1*60*1000;
...@@ -373,34 +397,33 @@ public class HotSearchCacheDAO { ...@@ -373,34 +397,33 @@ public class HotSearchCacheDAO {
* @param id * @param id
* @return * @return
*/ */
public Document getHotSearchById(String id){ public Document getHotSearchById(String id) {
Document query = new Document("_id", id); Document query = new Document("_id", id);
return (Document) collection.find(query).first(); return (Document) collection.find(query).first();
} }
/** /**
*
* @param document * @param document
* @param id * @param id
*/ */
public void updateWeibo(Document document,String id){ public void updateWeibo(Document document, String id) {
Document query = new Document("_id", id); Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first(); Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) { if (nonNull(nowDoc)) {
if(Objects.isNull(nowDoc.get("topicLead"))) { if (Objects.isNull(nowDoc.get("topicLead"))) {
if (document.containsKey("topicLead") && document.getString("topicLead") != null) { if (document.containsKey("topicLead") && document.getString("topicLead") != null) {
nowDoc.put("topicLead", document.getString("topicLead")); nowDoc.put("topicLead", document.getString("topicLead"));
} }
} }
if(document.containsKey("readCount") && document.containsKey("discussCount")) { if (document.containsKey("readCount") && document.containsKey("discussCount")) {
nowDoc.put("readCount", document.getInteger("readCount")); nowDoc.put("readCount", document.getInteger("readCount"));
nowDoc.put("discussCount", document.getInteger("discussCount")); nowDoc.put("discussCount", document.getInteger("discussCount"));
} }
if(document.containsKey("pictureUrl")){ if (document.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",document.getString("pictureUrl")); nowDoc.put("pictureUrl", document.getString("pictureUrl"));
} }
if (Objects.isNull(nowDoc.get("downtext")) && document.containsKey("downtext")){ if (Objects.isNull(nowDoc.get("downtext")) && document.containsKey("downtext")) {
nowDoc.put("downtext",document.getString("downtext")); nowDoc.put("downtext", document.getString("downtext"));
} }
collection.replaceOne(query, nowDoc); collection.replaceOne(query, nowDoc);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment