Commit 77a8f1d8 by 马黎滨

Merge branch 'mlb-template-local' into 'mlbWork'

Mlb template local

See merge request !64
parents 977f9678 5b443a53
......@@ -95,6 +95,19 @@ public class HotSearchCache {
*/
private String topicResult;
/**
* 首次上榜热度
*/
private Integer firstCount;
/** 详情页图片(微博平台) */
private String pictureUrl;
/**
* 上升速度
*/
private double riseSpeed;
public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot,
Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){
this.id = name + "_" + type;
......@@ -113,35 +126,31 @@ public class HotSearchCache {
this.duration = duration;
}
public Boolean getRecommend() {
return recommend;
}
public Boolean getRecommend() { return recommend; }
public void setRecommend(Boolean recommend) {
this.recommend = recommend;
}
public void setRecommend(Boolean recommend) { this.recommend = recommend; }
public Integer getReadCount() {
return readCount;
}
public Integer getReadCount() { return readCount; }
public void setReadCount(Integer readCount) {
this.readCount = readCount;
}
public void setReadCount(Integer readCount) { this.readCount = readCount; }
public Integer getDiscussCount() {
return discussCount;
}
public Integer getDiscussCount() { return discussCount; }
public void setDiscussCount(Integer discussCount) {
this.discussCount = discussCount;
}
public void setDiscussCount(Integer discussCount) { this.discussCount = discussCount; }
public String getTopicLead() {
return topicLead;
}
public String getTopicLead() { return topicLead; }
public void setTopicLead(String topicLead) {
this.topicLead = topicLead;
}
public void setTopicLead(String topicLead) { this.topicLead = topicLead; }
public Integer getFirstCount() { return firstCount; }
public void setFirstCount(Integer firstCount) { this.firstCount = firstCount; }
public String getPictureUrl() { return pictureUrl; }
public void setPictureUrl(String pictureUrl) { this.pictureUrl = pictureUrl; }
public double getRiseSpeed() { return riseSpeed; }
public void setRiseSpeed(double riseSpeed) { this.riseSpeed = riseSpeed; }
}
......@@ -84,6 +84,16 @@ public class HotSearchList implements Serializable{
*/
private String topicResult;
/**
* 观看数(目前近B站排行榜使用)
*/
private Integer view;
/**
* 弹幕数(目前仅B站排行榜使用)
*/
private Integer barrage;
public HotSearchList(){}
public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){
......@@ -140,4 +150,19 @@ public class HotSearchList implements Serializable{
this.icon = icon;
this.topicResult = topicResult;
}
public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage) {
this.id = name + "_" + new Date().getTime()+ "_" + type;
this.url = url;
this.name = name;
this.topicLead = topicLead;
this.count = count;
this.hot = hot;
this.time = time;
this.rank = rank;
this.day = TimeParse.dateFormartString(time, "yyyy-MM-dd");
this.type = type;
this.view = view;
this.barrage = barrage;
}
}
......@@ -20,5 +20,7 @@ public enum HotSearchType {
网易跟帖热议,
微博预热榜,
腾讯较真榜,
脉脉热榜
脉脉热榜,
B站排行榜,
B站热搜
}
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@Log4j2
public class BililiCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* B站排行榜的采集
* @param date
* @return
*/
public static List<HotSearchList> getBilibiliHotSearch(Date date){
List<HotSearchList> hotSearchLists = new ArrayList<>();
log.info("bilibili排行榜开始采集...");
JSONArray dataJson = null;
String htmlBody = null;
String url = "https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站排行榜页面连接失败",e.fillInStackTrace());
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
JSONObject jsonObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
dataJson = jsonObject.getJSONArray("list");
if(dataJson != null) {
for (int i=0; i<dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i);
int rank = i+1;
String name = data.getString("title");
String topicLead = data.getString("desc");
int count = data.getIntValue("score");
String bvid = data.getString("bvid");
String bUrl = "https://www.bilibili.com/video/"+bvid;
Integer view = null;
Integer barrage = null;
if(data.containsKey("stat")) {
JSONObject stat = data.getJSONObject("stat");
view = stat.getIntValue("view");
barrage = stat.getIntValue("danmaku");
}
HotSearchList hotSearchList = new HotSearchList(bUrl,name,topicLead,count,null,date,rank,HotSearchType.B站排行榜.name(),view,barrage);
hotSearchLists.add(hotSearchList);
}
}
}
ZhiWeiTools.sleep(3000L);
}
log.info("{}, B站排行榜此轮采集到的数据量为:{}", new Date(), hotSearchLists != null ? hotSearchLists.size() : 0);
log.info("B站排行榜采集结束");
return hotSearchLists;
}
/**
* B站热搜的采集
* @param date
* @return
*/
public static List<HotSearchList> getBiHotData(Date date) {
List<HotSearchList> hotSearchLists = new ArrayList<>();
log.info("B站热搜榜开始采集...");
JSONArray dataJson = null;
String htmlBody = null;
String url = "https://app.biliapi.com/x/v2/search/square?build=616050&limit=10";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站热搜页面连接失败",e.fillInStackTrace());
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
dataJson = JSONObject.parseObject(htmlBody).getJSONArray("data").getJSONObject(0).getJSONObject("data").getJSONArray("list");
if(dataJson != null) {
for (int i=0; i<dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i);
int rank = i+1;
String name = data.getString("show_name");
String icon = null;
if(data.containsKey("icon")){
String iconPicture = data.getString("icon");
if(iconPicture.contains("e9e7a2d8497d4063421b685e72680bf1cfb99a0d")){
icon = "热";
}else if(iconPicture.contains("4d579fb61f9655316582db193118bba3a721eec0")){
icon = "新";
} else {
icon = "沸";
}
}
String keyWord = data.getString("keyword");
String biliUrl = "https://search.bilibili.com/all?keyword="+ URLCodeUtil.getURLEncode(keyWord,"utf-8");
HotSearchList hotSearchList = new HotSearchList(biliUrl,name,null,rank,HotSearchType.B站热搜.name(),date);
hotSearchLists.add(hotSearchList);
}
}
}
ZhiWeiTools.sleep(3000L);
}
log.info("{}, B站热搜榜此轮采集到的数据量为:{}", new Date(), hotSearchLists != null ? hotSearchLists.size() : 0);
log.info("B站热搜榜采集结束");
return hotSearchLists;
}
}
......@@ -254,8 +254,10 @@ public class WeiboHotSearchCrawler {
String midText = readJson.getJSONObject("head_data").getString("midtext");
String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim();
String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim();
String pictureUrl = readJson.getJSONObject("head_data").getString("portrait_url");
document.put("readCount", TipsUtils.getHotCount(read));
document.put("discussCount", TipsUtils.getHotCount(discussCount));
document.put("pictureUrl",pictureUrl);
}
}
return document;
......
......@@ -55,6 +55,11 @@ public class HotSearchCacheDAO {
if("腾讯较真榜".equals(hotSearch.getType())){
document.put("topic_result",hotSearch.getTopicResult());
}
if ("B站排行榜".equals(hotSearch.getType())){
document.put("topic_lead", hotSearch.getTopicLead());
document.put("view",hotSearch.getView());
document.put("barrage",hotSearch.getBarrage());
}
addAndUpdateData(document);
dataes.add(document);
});
......@@ -115,6 +120,12 @@ public class HotSearchCacheDAO {
//计算热搜时长
int duration = nowDoc.getInteger("duration");
int durationNow = getDuration(type, duration);
//计算上升速度
double riseSpeed = nowDoc.containsKey("riseSpeed")?nowDoc.getDouble("riseSpeed"):0.00;
if(Objects.nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
int firstCount = nowDoc.getInteger("firstCount");
riseSpeed = ((double)(lastCount - firstCount)/(double)firstCount)*1000/((double)duration);
}
// endTime = getEndTime(type, new Date());
//更新相应信息
if(url != null && !url.equals(lastUrl)){
......@@ -129,6 +140,7 @@ public class HotSearchCacheDAO {
nowDoc.put("preCount", preCount);
nowDoc.put("duration", durationNow);
nowDoc.put("recommend",recommend);
nowDoc.put("riseSpeed",riseSpeed);
// if(readCount != null){
// nowDoc.put("readCount",readCount);
// }
......@@ -155,6 +167,8 @@ public class HotSearchCacheDAO {
nowDoc.put("preRank", null);
nowDoc.put("preCount", null);
nowDoc.put("recommend",recommend);
nowDoc.put("firstCount",lastCount);
nowDoc.put("riseSpeed",0.00);
// if(readCount != null){
// nowDoc.put("readCount",readCount);
// }
......@@ -170,6 +184,9 @@ public class HotSearchCacheDAO {
nowDoc.put("readCount", nowDoc.getInteger("readCount"));
nowDoc.put("discussCount", nowDoc.getInteger("discussCount"));
}
if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl"));
}
}
collection.insertOne(nowDoc);
}
......
......@@ -373,6 +373,28 @@ public class GatherTimer {
}
/**
* B站排行榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
}
/**
* B站热搜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotData() {
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = BililiCrawler.getBiHotData(date);
TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
}
/**
* 微博超话的采集
*/
@Async(value = "myScheduler")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment