Commit a1250ffa by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !137
parents 3e5c72ea e30914f7
......@@ -31,9 +31,10 @@ public class HotSearchCacheDAO {
* @param dataList
* @return
*/
public List<Document> addData(List<HotSearchList> dataList){
public List<Document> addData(List<HotSearchList> dataList) {
List<Document> dataes = new ArrayList<>();
dataList.forEach(hotSearch ->{
int realRank = 0;
for (HotSearchList hotSearch : dataList) {
Document document = new Document();
document.put("_id", hotSearch.getId());
document.put("name", hotSearch.getName());
......@@ -46,126 +47,140 @@ public class HotSearchCacheDAO {
document.put("type", hotSearch.getType());
document.put("icon", hotSearch.getIcon());
document.put("rankPic", hotSearch.getRankPic());
if("微博话题".equals(hotSearch.getType())){
if ("微博话题".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead());
document.put("comment_count", hotSearch.getCommentCount());
}
if("微博热搜".equals(hotSearch.getType())){
if ("微博热搜".equals(hotSearch.getType())) {
document.put("iconUrl", hotSearch.getIconUrl());
document.put("heatLabel", hotSearch.getHeatLabel());
if (Objects.nonNull(document.getLong("count")) && 0L != document.getLong("count")) {
realRank = realRank + 1;
document.put("realRank", realRank);
} else if (Objects.isNull(document.getLong("count"))) {
document.put("realRank", 0);
} else if (0L == document.getLong("count")) {
document.put("realRank", -1);
}
if("微博娱乐榜".equals(hotSearch.getType())){
}
if ("微博娱乐榜".equals(hotSearch.getType())) {
document.put("iconUrl", hotSearch.getIconUrl());
}
// if("今日头条热搜".equals(hotSearch.getType())){
// document.put("comment_count", hotSearch.getCommentCount());
// }
if("虎嗅热文推荐".equals(hotSearch.getType())){
if ("虎嗅热文推荐".equals(hotSearch.getType())) {
document.put("comment_count", hotSearch.getCommentCount());
}
if("微博要闻榜".equals(hotSearch.getType())){
if ("微博要闻榜".equals(hotSearch.getType())) {
document.put("comment_count", hotSearch.getCommentCount());
document.put("iconUrl", hotSearch.getIconUrl());
document.put("downtext", hotSearch.getDowntext());
}
if("百度热搜".equals(hotSearch.getType())){
if ("百度热搜".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead());
}
if("腾讯较真榜".equals(hotSearch.getType())){
document.put("topic_result",hotSearch.getTopicResult());
if ("腾讯较真榜".equals(hotSearch.getType())) {
document.put("topic_result", hotSearch.getTopicResult());
}
if ("B站排行榜".equals(hotSearch.getType())){
if ("B站排行榜".equals(hotSearch.getType())) {
document.put("topic_lead", hotSearch.getTopicLead());
document.put("view",hotSearch.getView());
document.put("barrage",hotSearch.getBarrage());
document.put("pictureUrl",hotSearch.getPictureUrl());
document.put("view", hotSearch.getView());
document.put("barrage", hotSearch.getBarrage());
document.put("pictureUrl", hotSearch.getPictureUrl());
}
addAndUpdateData(document);
if("百度热搜".equals(hotSearch.getType())){
if ("百度热搜".equals(hotSearch.getType())) {
document.remove("topic_lead");
}
if("微博要闻榜".equals(hotSearch.getType())){
if ("微博要闻榜".equals(hotSearch.getType())) {
document.remove("downtext");
}
dataes.add(document);
});
}
return dataes;
}
/**
* 添加及更新相应数据表中的数据
* @param document
*/
public void addAndUpdateData(Document document){
public void addAndUpdateData(Document document) {
try {
String name = document.getString("name");
String type = document.getString("type");
Integer lastRank = document.getInteger("rank")!=null?document.getInteger("rank"): null;
Long lastCount = document.getLong("count")!=null?document.getLong("count"): null;
Integer lastRank = document.getInteger("rank") != null ? document.getInteger("rank") : null;
Integer realLastRank = document.getInteger("realRank") != null ? document.getInteger("realRank") : null;
Long lastCount = document.getLong("count") != null ? document.getLong("count") : null;
Date startTime = document.getDate("time");
Date endTime = getEndTime(type, startTime);
String topicLead = document.getString("topic_lead")!=null?document.getString("topic_lead"):null;
boolean hot = document.getBoolean("hot")!=null?document.getBoolean("hot"):true;
String url = document.getString("url")!=null?document.getString("url"):null;
String topicResult = document.getString("topic_result")!=null?document.getString("topic_result"):null;
String pictureUrl = document.getString("pictureUrl")!=null?document.getString("pictureUrl"):null;
String topicLead = document.getString("topic_lead") != null ? document.getString("topic_lead") : null;
boolean hot = document.getBoolean("hot") != null ? document.getBoolean("hot") : true;
String url = document.getString("url") != null ? document.getString("url") : null;
String topicResult = document.getString("topic_result") != null ? document.getString("topic_result") : null;
String pictureUrl = document.getString("pictureUrl") != null ? document.getString("pictureUrl") : null;
String id = name + "_" + type;
boolean recommend = false;
// Integer readCount = document.getInteger("comment_count");
if("微博热搜".equals(type)){
if ("微博热搜".equals(type)) {
//排位标判断 例如 https://simg.s.weibo.com/20210408_search_point_orange.png
String rankPic = document.getString("rankPic");
if (nonNull(rankPic)&&rankPic.contains("search_point")){
if (nonNull(rankPic) && rankPic.contains("search_point")) {
recommend = true;
}
//后标判断
String icon = document.getString("icon");
if("recom".equals(icon) || "jian".equals(icon)){
if ("recom".equals(icon) || "jian".equals(icon)) {
recommend = true;
}
}
Document query = new Document("_id", id);
//判断是否为微博推荐位,推荐位微博无排名,所以不纳入总的缓存表
if(nonNull(lastRank) && lastRank >0 ){
if (nonNull(lastRank) && lastRank > 0) {
Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) {
Integer highestRank = nowDoc.getInteger("highestRank");
Long highestCount = nonNull(nowDoc.get("highestCount"))?Long.valueOf(nowDoc.get("highestCount").toString()):null;
Integer realHighestRank = nowDoc.getInteger("realHighestRank");
Long highestCount = nonNull(nowDoc.get("highestCount")) ? Long.valueOf(nowDoc.get("highestCount").toString()) : null;
Integer preRank = nowDoc.getInteger("lastRank");
Long preCount = nonNull(nowDoc.get("lastCount"))?Long.valueOf(nowDoc.get("lastCount").toString()):null;
Long preCount = nonNull(nowDoc.get("lastCount")) ? Long.valueOf(nowDoc.get("lastCount").toString()) : null;
String lastUrl = nowDoc.getString("url");
//判断最大热度值
if (nonNull(lastCount) && nonNull(highestCount) && lastCount > highestCount) {
highestCount = lastCount;
}
//判断最高排名
if (nonNull(lastRank) && highestRank<0){
if (nonNull(lastRank) && highestRank < 0) {
highestRank = lastRank;
}
if (lastRank>0 && highestRank>0 && lastRank < highestRank) {
if (lastRank > 0 && highestRank > 0 && lastRank < highestRank) {
highestRank = lastRank;
}
//判断真实最高排名
if (nonNull(realLastRank) && realHighestRank < 0) {
realHighestRank = realLastRank;
}
if (realLastRank > 0 && realHighestRank > 0 && realLastRank < realHighestRank) {
realHighestRank = realLastRank;
}
//计算热搜时长
int duration = nowDoc.getInteger("duration");
int durationNow = getDuration(type, duration);
//计算上升速度
double riseSpeed = nowDoc.containsKey("riseSpeed")?nowDoc.getDouble("riseSpeed"):0.00;
if(nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
long firstCount =Long.parseLong(nowDoc.get("firstCount").toString());
riseSpeed = ((double)(lastCount - firstCount)/(double)firstCount)*1000/((double)duration);
double riseSpeed = nowDoc.containsKey("riseSpeed") ? nowDoc.getDouble("riseSpeed") : 0.00;
if (nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
long firstCount = Long.parseLong(nowDoc.get("firstCount").toString());
riseSpeed = ((double) (lastCount - firstCount) / (double) firstCount) * 1000 / ((double) duration);
}
// endTime = getEndTime(type, new Date());
//更新相应信息
if(url != null && !url.equals(lastUrl)){
nowDoc.put("url",url);
if (url != null && !url.equals(lastUrl)) {
nowDoc.put("url", url);
}
nowDoc.put("endTime", endTime);
nowDoc.put("lastRank", lastRank);
......@@ -175,17 +190,21 @@ public class HotSearchCacheDAO {
nowDoc.put("preRank", preRank);
nowDoc.put("preCount", preCount);
nowDoc.put("duration", durationNow);
nowDoc.put("recommend",recommend);
nowDoc.put("riseSpeed",riseSpeed);
nowDoc.put("recommend", recommend);
nowDoc.put("riseSpeed", riseSpeed);
if ("微博热搜".equals(type)){
nowDoc.put("realLastRank", realLastRank);
nowDoc.put("realHighestRank", realHighestRank);
}
// if(readCount != null){
// nowDoc.put("readCount",readCount);
// }
if(topicResult != null){
nowDoc.put("topicResult",topicResult);
if (topicResult != null) {
nowDoc.put("topicResult", topicResult);
}
if(picTypes.contains(type)){
if(Strings.isNotEmpty(pictureUrl)){
if(!nowDoc.containsKey("pictureUrl") || !nowDoc.getString("pictureUrl").equals(pictureUrl)) {
if (picTypes.contains(type)) {
if (Strings.isNotEmpty(pictureUrl)) {
if (!nowDoc.containsKey("pictureUrl") || !nowDoc.getString("pictureUrl").equals(pictureUrl)) {
nowDoc.put("pictureUrl", pictureUrl);
}
}
......@@ -202,6 +221,10 @@ public class HotSearchCacheDAO {
nowDoc.put("type", type);
nowDoc.put("lastRank", lastRank);
nowDoc.put("highestRank", lastRank);
if ("微博热搜".equals(type)){
nowDoc.put("realLastRank", realLastRank);
nowDoc.put("realHighestRank", realLastRank);
}
nowDoc.put("lastCount", lastCount);
nowDoc.put("highestCount", lastCount);
nowDoc.put("startTime", startTime);
......@@ -209,31 +232,31 @@ public class HotSearchCacheDAO {
nowDoc.put("duration", durationNow);
nowDoc.put("preRank", null);
nowDoc.put("preCount", null);
nowDoc.put("recommend",recommend);
nowDoc.put("firstCount",lastCount);
nowDoc.put("riseSpeed",0.00);
nowDoc.put("recommend", recommend);
nowDoc.put("firstCount", lastCount);
nowDoc.put("riseSpeed", 0.00);
// if(readCount != null){
// nowDoc.put("readCount",readCount);
// }
if("虎嗅热文推荐".equals(type)){
nowDoc.put("comment_count",document.getLong("comment_count"));
if ("虎嗅热文推荐".equals(type)) {
nowDoc.put("comment_count", document.getLong("comment_count"));
}
if("微博要闻榜".equals(type)){
nowDoc.put("downtext",document.getString("downtext"));
nowDoc.put("comment_count",document.getLong("comment_count"));
if ("微博要闻榜".equals(type)) {
nowDoc.put("downtext", document.getString("downtext"));
nowDoc.put("comment_count", document.getLong("comment_count"));
}
if(topicResult != null){
nowDoc.put("topicResult",topicResult);
if (topicResult != null) {
nowDoc.put("topicResult", topicResult);
}
if(picTypes.contains(type)){
nowDoc.put("pictureUrl",pictureUrl);
if (picTypes.contains(type)) {
nowDoc.put("pictureUrl", pictureUrl);
}
if("微博热搜".equals(type)){
if ("微博热搜".equals(type)) {
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//更新微博话题贡献者,关于功能
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) {
nowDoc.put("classify",documentPC.get("分类"));
nowDoc.put("classify", documentPC.get("分类"));
}
if (documentPC.containsKey("地区")) {
nowDoc.put("region", documentPC.get("地区"));
......@@ -241,24 +264,24 @@ public class HotSearchCacheDAO {
if (documentPC.containsKey("标签")) {
nowDoc.put("label", documentPC.get("标签"));
}
if(nowDoc.containsKey("topicLead")){
if (nowDoc.containsKey("topicLead")) {
nowDoc.put("topicLead", nowDoc.getString("topicLead"));
}
if(nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount"))?Long.valueOf(nowDoc.get("readCount").toString()):null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount"))?Long.valueOf(nowDoc.get("discussCount").toString()):null);
if (nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount")) ? Long.valueOf(nowDoc.get("readCount").toString()) : null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount")) ? Long.valueOf(nowDoc.get("discussCount").toString()) : null);
}
if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl"));
nowDoc.put("pictureUrl", nowDoc.getString("pictureUrl"));
}
if (nowDoc.containsKey("downtext")) {
nowDoc.put("downtext",nowDoc.getString("downtext"));
nowDoc.put("downtext", nowDoc.getString("downtext"));
}
}
collection.insertOne(nowDoc);
}
}
}catch (Exception e){
} catch (Exception e) {
log.error("数据存储时出错:", e);
}
}
......@@ -267,12 +290,12 @@ public class HotSearchCacheDAO {
* 抖音链接更新
* @param document
*/
public void updateDouyinUrl(Document document){
public void updateDouyinUrl(Document document) {
String id = (String) document.get("id");
Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) {
nowDoc.put("url",document.get("url"));
nowDoc.put("url", document.get("url"));
collection.replaceOne(query, nowDoc);
}
}
......@@ -297,7 +320,7 @@ public class HotSearchCacheDAO {
* @param duration
* @return
*/
private int getDuration(String type, int duration){
private int getDuration(String type, int duration) {
// switch (type){
// case "微博热搜" :
// duration = duration + 1;
......@@ -323,9 +346,9 @@ public class HotSearchCacheDAO {
// default :
// duration = duration + 1;
// }
if("脉脉热榜".equals(type)){
if ("脉脉热榜".equals(type)) {
duration = duration + 30;
}else {
} else {
duration = duration + 1;
}
return duration;
......@@ -338,8 +361,8 @@ public class HotSearchCacheDAO {
* @param time
* @return
*/
private Date getEndTime(String type, Date time){
long timeLong = time.getTime() + 1*60*1000;
private Date getEndTime(String type, Date time) {
long timeLong = time.getTime() + 1 * 60 * 1000;
// switch (type){
// case "微博热搜" :
// timeLong = timeLong + 1*60*1000;
......@@ -373,34 +396,33 @@ public class HotSearchCacheDAO {
* @param id
* @return
*/
public Document getHotSearchById(String id){
public Document getHotSearchById(String id) {
Document query = new Document("_id", id);
return (Document) collection.find(query).first();
}
/**
*
* @param document
* @param id
*/
public void updateWeibo(Document document,String id){
public void updateWeibo(Document document, String id) {
Document query = new Document("_id", id);
Document nowDoc = (Document) collection.find(query).first();
if (nonNull(nowDoc)) {
if(Objects.isNull(nowDoc.get("topicLead"))) {
if (Objects.isNull(nowDoc.get("topicLead"))) {
if (document.containsKey("topicLead") && document.getString("topicLead") != null) {
nowDoc.put("topicLead", document.getString("topicLead"));
}
}
if(document.containsKey("readCount") && document.containsKey("discussCount")) {
if (document.containsKey("readCount") && document.containsKey("discussCount")) {
nowDoc.put("readCount", document.getInteger("readCount"));
nowDoc.put("discussCount", document.getInteger("discussCount"));
}
if(document.containsKey("pictureUrl")){
nowDoc.put("pictureUrl",document.getString("pictureUrl"));
if (document.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl", document.getString("pictureUrl"));
}
if (Objects.isNull(nowDoc.get("downtext")) && document.containsKey("downtext")){
nowDoc.put("downtext",document.getString("downtext"));
if (Objects.isNull(nowDoc.get("downtext")) && document.containsKey("downtext")) {
nowDoc.put("downtext", document.getString("downtext"));
}
collection.replaceOne(query, nowDoc);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment