Commit 77a8f1d8 by 马黎滨

Merge branch 'mlb-template-local' into 'mlbWork'

Mlb template local

See merge request !64
parents 977f9678 5b443a53
...@@ -95,6 +95,19 @@ public class HotSearchCache { ...@@ -95,6 +95,19 @@ public class HotSearchCache {
*/ */
private String topicResult; private String topicResult;
/**
* 首次上榜热度
*/
private Integer firstCount;
/** 详情页图片(微博平台) */
private String pictureUrl;
/**
* 上升速度
*/
private double riseSpeed;
public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot, public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot,
Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){ Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){
this.id = name + "_" + type; this.id = name + "_" + type;
...@@ -113,35 +126,31 @@ public class HotSearchCache { ...@@ -113,35 +126,31 @@ public class HotSearchCache {
this.duration = duration; this.duration = duration;
} }
public Boolean getRecommend() { public Boolean getRecommend() { return recommend; }
return recommend;
}
public void setRecommend(Boolean recommend) { public void setRecommend(Boolean recommend) { this.recommend = recommend; }
this.recommend = recommend;
}
public Integer getReadCount() { public Integer getReadCount() { return readCount; }
return readCount;
}
public void setReadCount(Integer readCount) { public void setReadCount(Integer readCount) { this.readCount = readCount; }
this.readCount = readCount;
}
public Integer getDiscussCount() { public Integer getDiscussCount() { return discussCount; }
return discussCount;
}
public void setDiscussCount(Integer discussCount) { public void setDiscussCount(Integer discussCount) { this.discussCount = discussCount; }
this.discussCount = discussCount;
}
public String getTopicLead() { public String getTopicLead() { return topicLead; }
return topicLead;
}
public void setTopicLead(String topicLead) { public void setTopicLead(String topicLead) { this.topicLead = topicLead; }
this.topicLead = topicLead;
} public Integer getFirstCount() { return firstCount; }
public void setFirstCount(Integer firstCount) { this.firstCount = firstCount; }
public String getPictureUrl() { return pictureUrl; }
public void setPictureUrl(String pictureUrl) { this.pictureUrl = pictureUrl; }
public double getRiseSpeed() { return riseSpeed; }
public void setRiseSpeed(double riseSpeed) { this.riseSpeed = riseSpeed; }
} }
...@@ -84,6 +84,16 @@ public class HotSearchList implements Serializable{ ...@@ -84,6 +84,16 @@ public class HotSearchList implements Serializable{
*/ */
private String topicResult; private String topicResult;
/**
* 观看数(目前近B站排行榜使用)
*/
private Integer view;
/**
* 弹幕数(目前仅B站排行榜使用)
*/
private Integer barrage;
public HotSearchList(){} public HotSearchList(){}
public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){ public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){
...@@ -140,4 +150,19 @@ public class HotSearchList implements Serializable{ ...@@ -140,4 +150,19 @@ public class HotSearchList implements Serializable{
this.icon = icon; this.icon = icon;
this.topicResult = topicResult; this.topicResult = topicResult;
} }
public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage) {
this.id = name + "_" + new Date().getTime()+ "_" + type;
this.url = url;
this.name = name;
this.topicLead = topicLead;
this.count = count;
this.hot = hot;
this.time = time;
this.rank = rank;
this.day = TimeParse.dateFormartString(time, "yyyy-MM-dd");
this.type = type;
this.view = view;
this.barrage = barrage;
}
} }
...@@ -20,5 +20,7 @@ public enum HotSearchType { ...@@ -20,5 +20,7 @@ public enum HotSearchType {
网易跟帖热议, 网易跟帖热议,
微博预热榜, 微博预热榜,
腾讯较真榜, 腾讯较真榜,
脉脉热榜 脉脉热榜,
B站排行榜,
B站热搜
} }
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@Log4j2
public class BililiCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* B站排行榜的采集
* @param date
* @return
*/
public static List<HotSearchList> getBilibiliHotSearch(Date date){
List<HotSearchList> hotSearchLists = new ArrayList<>();
log.info("bilibili排行榜开始采集...");
JSONArray dataJson = null;
String htmlBody = null;
String url = "https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站排行榜页面连接失败",e.fillInStackTrace());
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
JSONObject jsonObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
dataJson = jsonObject.getJSONArray("list");
if(dataJson != null) {
for (int i=0; i<dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i);
int rank = i+1;
String name = data.getString("title");
String topicLead = data.getString("desc");
int count = data.getIntValue("score");
String bvid = data.getString("bvid");
String bUrl = "https://www.bilibili.com/video/"+bvid;
Integer view = null;
Integer barrage = null;
if(data.containsKey("stat")) {
JSONObject stat = data.getJSONObject("stat");
view = stat.getIntValue("view");
barrage = stat.getIntValue("danmaku");
}
HotSearchList hotSearchList = new HotSearchList(bUrl,name,topicLead,count,null,date,rank,HotSearchType.B站排行榜.name(),view,barrage);
hotSearchLists.add(hotSearchList);
}
}
}
ZhiWeiTools.sleep(3000L);
}
log.info("{}, B站排行榜此轮采集到的数据量为:{}", new Date(), hotSearchLists != null ? hotSearchLists.size() : 0);
log.info("B站排行榜采集结束");
return hotSearchLists;
}
/**
* B站热搜的采集
* @param date
* @return
*/
public static List<HotSearchList> getBiHotData(Date date) {
List<HotSearchList> hotSearchLists = new ArrayList<>();
log.info("B站热搜榜开始采集...");
JSONArray dataJson = null;
String htmlBody = null;
String url = "https://app.biliapi.com/x/v2/search/square?build=616050&limit=10";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站热搜页面连接失败",e.fillInStackTrace());
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
dataJson = JSONObject.parseObject(htmlBody).getJSONArray("data").getJSONObject(0).getJSONObject("data").getJSONArray("list");
if(dataJson != null) {
for (int i=0; i<dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i);
int rank = i+1;
String name = data.getString("show_name");
String icon = null;
if(data.containsKey("icon")){
String iconPicture = data.getString("icon");
if(iconPicture.contains("e9e7a2d8497d4063421b685e72680bf1cfb99a0d")){
icon = "热";
}else if(iconPicture.contains("4d579fb61f9655316582db193118bba3a721eec0")){
icon = "新";
} else {
icon = "沸";
}
}
String keyWord = data.getString("keyword");
String biliUrl = "https://search.bilibili.com/all?keyword="+ URLCodeUtil.getURLEncode(keyWord,"utf-8");
HotSearchList hotSearchList = new HotSearchList(biliUrl,name,null,rank,HotSearchType.B站热搜.name(),date);
hotSearchLists.add(hotSearchList);
}
}
}
ZhiWeiTools.sleep(3000L);
}
log.info("{}, B站热搜榜此轮采集到的数据量为:{}", new Date(), hotSearchLists != null ? hotSearchLists.size() : 0);
log.info("B站热搜榜采集结束");
return hotSearchLists;
}
}
...@@ -254,8 +254,10 @@ public class WeiboHotSearchCrawler { ...@@ -254,8 +254,10 @@ public class WeiboHotSearchCrawler {
String midText = readJson.getJSONObject("head_data").getString("midtext"); String midText = readJson.getJSONObject("head_data").getString("midtext");
String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim(); String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim();
String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim(); String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim();
String pictureUrl = readJson.getJSONObject("head_data").getString("portrait_url");
document.put("readCount", TipsUtils.getHotCount(read)); document.put("readCount", TipsUtils.getHotCount(read));
document.put("discussCount", TipsUtils.getHotCount(discussCount)); document.put("discussCount", TipsUtils.getHotCount(discussCount));
document.put("pictureUrl",pictureUrl);
} }
} }
return document; return document;
......
...@@ -55,6 +55,11 @@ public class HotSearchCacheDAO { ...@@ -55,6 +55,11 @@ public class HotSearchCacheDAO {
if("腾讯较真榜".equals(hotSearch.getType())){ if("腾讯较真榜".equals(hotSearch.getType())){
document.put("topic_result",hotSearch.getTopicResult()); document.put("topic_result",hotSearch.getTopicResult());
} }
if ("B站排行榜".equals(hotSearch.getType())){
document.put("topic_lead", hotSearch.getTopicLead());
document.put("view",hotSearch.getView());
document.put("barrage",hotSearch.getBarrage());
}
addAndUpdateData(document); addAndUpdateData(document);
dataes.add(document); dataes.add(document);
}); });
...@@ -115,6 +120,12 @@ public class HotSearchCacheDAO { ...@@ -115,6 +120,12 @@ public class HotSearchCacheDAO {
//计算热搜时长 //计算热搜时长
int duration = nowDoc.getInteger("duration"); int duration = nowDoc.getInteger("duration");
int durationNow = getDuration(type, duration); int durationNow = getDuration(type, duration);
//计算上升速度
double riseSpeed = nowDoc.containsKey("riseSpeed")?nowDoc.getDouble("riseSpeed"):0.00;
if(Objects.nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
int firstCount = nowDoc.getInteger("firstCount");
riseSpeed = ((double)(lastCount - firstCount)/(double)firstCount)*1000/((double)duration);
}
// endTime = getEndTime(type, new Date()); // endTime = getEndTime(type, new Date());
//更新相应信息 //更新相应信息
if(url != null && !url.equals(lastUrl)){ if(url != null && !url.equals(lastUrl)){
...@@ -129,6 +140,7 @@ public class HotSearchCacheDAO { ...@@ -129,6 +140,7 @@ public class HotSearchCacheDAO {
nowDoc.put("preCount", preCount); nowDoc.put("preCount", preCount);
nowDoc.put("duration", durationNow); nowDoc.put("duration", durationNow);
nowDoc.put("recommend",recommend); nowDoc.put("recommend",recommend);
nowDoc.put("riseSpeed",riseSpeed);
// if(readCount != null){ // if(readCount != null){
// nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
// } // }
...@@ -155,6 +167,8 @@ public class HotSearchCacheDAO { ...@@ -155,6 +167,8 @@ public class HotSearchCacheDAO {
nowDoc.put("preRank", null); nowDoc.put("preRank", null);
nowDoc.put("preCount", null); nowDoc.put("preCount", null);
nowDoc.put("recommend",recommend); nowDoc.put("recommend",recommend);
nowDoc.put("firstCount",lastCount);
nowDoc.put("riseSpeed",0.00);
// if(readCount != null){ // if(readCount != null){
// nowDoc.put("readCount",readCount); // nowDoc.put("readCount",readCount);
// } // }
...@@ -170,6 +184,9 @@ public class HotSearchCacheDAO { ...@@ -170,6 +184,9 @@ public class HotSearchCacheDAO {
nowDoc.put("readCount", nowDoc.getInteger("readCount")); nowDoc.put("readCount", nowDoc.getInteger("readCount"));
nowDoc.put("discussCount", nowDoc.getInteger("discussCount")); nowDoc.put("discussCount", nowDoc.getInteger("discussCount"));
} }
if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl"));
}
} }
collection.insertOne(nowDoc); collection.insertOne(nowDoc);
} }
......
...@@ -373,6 +373,28 @@ public class GatherTimer { ...@@ -373,6 +373,28 @@ public class GatherTimer {
} }
/** /**
* B站排行榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
}
/**
* B站热搜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotData() {
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = BililiCrawler.getBiHotData(date);
TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
}
/**
* 微博超话的采集 * 微博超话的采集
*/ */
@Async(value = "myScheduler") @Async(value = "myScheduler")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment