Commit 2782632a by chenweitao

Merge branch 'working' into 'master'

抖音娱乐榜,微博品牌(9个子榜单)榜上线,更新微博pc端游客cookie

See merge request !200
parents a7df23c8 ad09acf2
......@@ -150,6 +150,16 @@ public class HotSearchList implements Serializable{
*/
private Double exponent;
/**
* 阅读量(微博品牌在用)
*/
private Long readCount;
/**
* 讨论量(微博品牌在用)
*/
private Long discussCount;
public HotSearchList(){}
public HotSearchList(String url, String name, Long count,Boolean hot,Integer rank,String type,String icon,Date date){
......
......@@ -34,5 +34,15 @@ public enum HotSearchType {
微视热榜,
微博出圈榜,
微博视频榜,
抖音娱乐榜,
微博品牌总榜,
微博品牌汽车榜,
微博品牌手机榜,
微博品牌美妆榜,
微博品牌奢侈品榜,
微博品牌食品饮料榜,
微博品牌家电榜,
微博品牌服装鞋帽榜,
微博品牌母婴榜,
}
package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.*;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
......@@ -34,6 +32,8 @@ public class DouyinHotSearchCrawler {
public static List<HotSearchList> list = new ArrayList<>();
public static Set<String> set = new HashSet<>();
/**
* @Title: getMobileDouyinHotList
* @author hero
......@@ -113,4 +113,48 @@ public class DouyinHotSearchCrawler {
return resultUrl;
}
/**
* @Title: getMobileDouyinEntertainmentList
* @author hero
* @Description: 移动端抖音娱乐榜榜
* @param @return 设定文件
* @return List<HotSearchList> 返回类型
*/
public static List<HotSearchList> getMobileDouyinEntertainmentList(Date date){
List<HotSearchList> entertainmentList = new ArrayList<>();
String url = "https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音娱乐榜榜时出现问题:{}", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")) {
JSONObject data = JSONObject.parseObject(htmlBody);
JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list");
String positionStr = null;
String word = null;
String hotValueStr = null;
for (int i = 0; i < wordList.size(); i++) {
JSONObject wl = wordList.getJSONObject(i);
//获取排名
positionStr = wl.getString("position");
Integer position = null;
position = Integer.valueOf(positionStr);
//获取关键词
word = wl.getString("word");
//获取热度值
hotValueStr = wl.getString("hot_value");
Long hotValue = null;
hotValue = Long.valueOf(hotValueStr);
HotSearchList douyinEntertainment = new HotSearchList(null, word, hotValue, position, HotSearchType.抖音娱乐榜.name(),date);
entertainmentList.add(douyinEntertainment);
set.add(word);
}
}
return entertainmentList;
}
}
......@@ -505,7 +505,7 @@ public class WeiboHotSearchCrawler {
String url = "https://s.weibo.com/weibo?q=" + encode + "&Refer=top";
String htmlBody = null;
Map<String, String> headerMap = new HashMap<>();
headerMap.put("Cookie", "SUB=_2AkMWEQNHf8NxqwFRmPwdzmrnaYl_zgzEieKgTfKcJRMxHRl-yT9jqmkjtRB6PZEtqE0muNq5OZJPytvesIwD-Kh1dwIz; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFKfwIoPvvYaew277IR3CUN");
headerMap.put("Cookie", "SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx");
Request request = RequestUtils.wrapGet(url,headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
......
......@@ -120,7 +120,10 @@ public class HotSearchCacheDAO {
document.put("exponent", hotSearch.getExponent());
document.put("iconUrl", hotSearch.getIconUrl());
}
if (hotSearch.getType().contains("微博品牌")) {
document.put("readCount", hotSearch.getReadCount());
document.put("discussCount", hotSearch.getDiscussCount());
}
if ("微视热榜".equals(hotSearch.getType())) {
document.put("iconUrl", hotSearch.getIconUrl());
addAndUpdateData(document,true);
......@@ -140,6 +143,10 @@ public class HotSearchCacheDAO {
if ("B站排行榜".equals(hotSearch.getType())) {
document.remove("downtext");
}
if (hotSearch.getType().contains("微博品牌")) {
document.remove("readCount");
document.remove("discussCount");
}
dataes.add(document);
}
return dataes;
......@@ -245,6 +252,10 @@ public class HotSearchCacheDAO {
nowDoc.put("duration", durationNow);
nowDoc.put("recommend", recommend);
nowDoc.put("riseSpeed", riseSpeed);
if (type.contains("微博品牌")) {
nowDoc.put("readCount",nonNull(document.get("readCount")) ? Long.valueOf(document.get("readCount").toString()) : null);
nowDoc.put("discussCount",nonNull(document.get("discussCount")) ? Long.valueOf(document.get("discussCount").toString()) : null);
}
if ("微博热搜".equals(type)) {
nowDoc.put("realLastRank", realLastRank);
nowDoc.put("realHighestRank", realHighestRank);
......@@ -320,6 +331,10 @@ public class HotSearchCacheDAO {
nowDoc.put("tag", nonNull(document.get("tag")) ? document.getString("tag") : null);
nowDoc.put("downtext", nonNull(document.get("downtext")) ? document.getString("downtext") : null);
}
if (type.contains("微博品牌")) {
nowDoc.put("readCount",nonNull(document.get("readCount")) ? Long.valueOf(document.get("readCount").toString()) : null);
nowDoc.put("discussCount",nonNull(document.get("discussCount")) ? Long.valueOf(document.get("discussCount").toString()) : null);
}
if ("微博热搜".equals(type)) {
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//更新微博话题贡献者,关于功能
......@@ -423,7 +438,9 @@ public class HotSearchCacheDAO {
duration = duration + 30;
} else if ("B站综合热门".equals(type)) {
duration = duration + 60;
} else {
}else if(type.contains("微博品牌")){
duration = duration + 60;
}else {
duration = duration + 1;
}
return duration;
......
......@@ -696,4 +696,112 @@ public class GatherTimer {
TipsUtils.addHotList("微博视频榜",weiBoVideoList);
log.info("微博视频榜采集结束........");
}
/**
* 抖音娱乐榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerDouYinEntertainment(){
log.info("抖音娱乐榜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> douyinList = DouyinHotSearchCrawler.getMobileDouyinEntertainmentList(date);
log.info("{}, 抖音娱乐榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(douyinList != null ? douyinList.size() : 0));
TipsUtils.addHotList(HotSearchType.抖音娱乐榜.name(),douyinList);
log.info("抖音娱乐榜采集结束...");
}
/**
* 抖音娱乐榜链接的更新
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0/5 * * * ? ")
public void updateDouYinEntertainmentUrl(){
log.info("抖音娱乐榜链接更新开始...");
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
Set<String> wordList = DouyinHotSearchCrawler.set;
Set<String> douyinEntertainmentList=new HashSet<>();
douyinEntertainmentList.addAll(wordList);
DouyinHotSearchCrawler.set.clear();
if(douyinEntertainmentList!=null && douyinEntertainmentList.size()>0){
for (String word : douyinEntertainmentList) {
String id = word+"_"+HotSearchType.抖音娱乐榜.name();
String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+word);
if(url != null) {
Document document = new Document();
document.put("id", id);
document.put("url", url);
hotSearchCacheDAO.updateDouyinUrl(document);
}
}
log.info("抖音娱乐榜链接更新结束");
}else{
log.info("抖音娱乐榜链接更新失败,抖音娱乐榜列表获取为空。");
}
}
/**
* 微博品牌榜采集(一小时采集一次)
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/1 * * ? ")
public void crawlerWeiBoBrandTotalList(){
log.info("微博品牌总榜采集开始........");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> weiBoBrandTotalList = WeiBoBrandCrawler.weiBoBrandTotalList(date);
log.info("{}, 此轮微博品牌总榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandTotalList != null ? weiBoBrandTotalList.size() : 0));
TipsUtils.addHotList("微博品牌总榜",weiBoBrandTotalList);
log.info("微博品牌总榜采集结束........");
log.info("微博品牌汽车榜采集开始........");
List<HotSearchList> weiBoBrandCarList = WeiBoBrandCrawler.weiBoBrandCar(date);
log.info("{}, 此轮微博品牌汽车榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandCarList != null ? weiBoBrandCarList.size() : 0));
TipsUtils.addHotList("微博品牌汽车榜",weiBoBrandCarList);
log.info("微博品牌汽车榜采集结束........");
log.info("微博品牌手机榜采集开始........");
List<HotSearchList> weiBoBrandPhoneList = WeiBoBrandCrawler.weiBoBrandPhone(date);
log.info("{}, 此轮微博品牌手机榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandPhoneList != null ? weiBoBrandPhoneList.size() : 0));
TipsUtils.addHotList("微博品牌手机榜",weiBoBrandPhoneList);
log.info("微博品牌手机榜采集结束........");
log.info("微博品牌美妆榜采集开始........");
List<HotSearchList> weiBoBrandMakeupList = WeiBoBrandCrawler.weiBoBrandMakeup(date);
log.info("{}, 此轮微博品牌美妆榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandMakeupList != null ? weiBoBrandMakeupList.size() : 0));
TipsUtils.addHotList("微博品牌美妆榜",weiBoBrandMakeupList);
log.info("微博品牌美妆榜采集结束........");
log.info("微博品牌奢侈品榜采集开始........");
List<HotSearchList> weiBoBrandLuxuryList = WeiBoBrandCrawler.weiBoBrandLuxury(date);
log.info("{}, 此轮微博品牌奢侈品榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandLuxuryList != null ? weiBoBrandLuxuryList.size() : 0));
TipsUtils.addHotList("微博品牌奢侈品榜",weiBoBrandLuxuryList);
log.info("微博品牌奢侈品榜采集结束........");
log.info("微博品牌食品饮料榜采集开始........");
List<HotSearchList> weiBoBrandFoodList = WeiBoBrandCrawler.weiBoBrandFood(date);
log.info("{}, 此轮微博品牌食品饮料榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandFoodList != null ? weiBoBrandFoodList.size() : 0));
TipsUtils.addHotList("微博品牌食品饮料榜",weiBoBrandFoodList);
log.info("微博品牌食品饮料榜采集结束........");
log.info("微博品牌家电榜采集开始........");
List<HotSearchList> weiBoBrandHomeApplianceList = WeiBoBrandCrawler.weiBoBrandHomeAppliance(date);
log.info("{}, 此轮微博品牌家电榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandHomeApplianceList != null ? weiBoBrandHomeApplianceList.size() : 0));
TipsUtils.addHotList("微博品牌家电榜",weiBoBrandHomeApplianceList);
log.info("微博品牌家电榜采集结束........");
log.info("微博品牌服装鞋帽榜采集开始........");
List<HotSearchList> weiBoBrandDressList = WeiBoBrandCrawler.weiBoBrandDress(date);
log.info("{}, 此轮微博品牌服装鞋帽榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandDressList != null ? weiBoBrandDressList.size() : 0));
TipsUtils.addHotList("微博品牌服装鞋帽榜",weiBoBrandDressList);
log.info("微博品牌服装鞋帽榜采集结束........");
log.info("微博品牌母婴榜采集开始........");
List<HotSearchList> weiBoBrandMotherAndInfantList = WeiBoBrandCrawler.weiBoBrandMotherAndInfant(date);
log.info("{}, 此轮微博品牌母婴榜采集到的数据量为:{}", new Date(), Integer.valueOf(weiBoBrandMotherAndInfantList != null ? weiBoBrandMotherAndInfantList.size() : 0));
TipsUtils.addHotList("微博品牌母婴榜",weiBoBrandMotherAndInfantList);
log.info("微博品牌母婴榜采集结束........");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment