Commit a0b140a0 by leiliangliang

脉脉采集程序上线

parent 1312177e
......@@ -44,5 +44,6 @@ public enum HotSearchType {
微博品牌家电榜,
微博品牌服装鞋帽榜,
微博品牌母婴榜,
脉脉话题,
}
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@Log4j2
public class MaiMaiTopicCrawler {
/**
* 获取maimai话题移动端
* @return
*/
public static List<HotSearchList> getMaiMaiTopicData(Date date){
log.info("脉脉话题开始采集");
List<HotSearchList> list = new ArrayList<>();
String url = "https://open.taou.com/maimai/feed/v6/hot_list_entry/feeds?page_version=2&tab_id=topic&version=6.2.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = HttpClientUtils.httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("脉脉话题页面连接异常...", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("topics")) {
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("topics");
if (jsonArray != null) {
for (int i = 0; i < jsonArray.size(); i++) {
Integer rank = i + 1 ;
JSONObject jsonObject = jsonArray.getJSONObject(i);
String id = jsonObject.getString("id");
String name = jsonObject.getString("name");
String maiMaiTopicUrl = "https://maimai.cn/web/gossip/topic/global_topic_list?topic_id="+id;
Long count = null;
HotSearchList hotSearchList = new HotSearchList(maiMaiTopicUrl, name, count, rank, HotSearchType.脉脉话题.name(),date);
list.add(hotSearchList);
}
}
}
log.info("{}, 此轮脉脉话题采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
log.info("脉脉话题采集结束");
return list;
}
}
......@@ -438,6 +438,8 @@ public class HotSearchCacheDAO {
duration = duration + 30;
} else if ("B站综合热门".equals(type)) {
duration = duration + 60;
}else if ("脉脉话题".equals(type)) {
duration = duration + 60;
}else if(type.contains("微博品牌")){
duration = duration + 60;
}else {
......
......@@ -808,5 +808,15 @@ public class GatherTimer {
log.info("微博品牌母婴榜采集结束........");
}
/**
* 脉脉话题采集(一小时一次)
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 0 0/1 * * ? ")
public void crawlerMaiMaiTopic(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = MaiMaiTopicCrawler.getMaiMaiTopicData(date);
TipsUtils.addHotList(HotSearchType.脉脉话题.name(),list);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment