Commit f23cfb22 by 马黎滨

脉脉热榜采集

parent c92f7282
...@@ -18,5 +18,6 @@ public enum HotSearchType { ...@@ -18,5 +18,6 @@ public enum HotSearchType {
网易热榜, 网易热榜,
网易跟帖热议, 网易跟帖热议,
微博预热榜, 微博预热榜,
腾讯较真榜 腾讯较真榜,
脉脉热榜
} }
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@Log4j2
public class MaiMaiHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
/**
* 获取maimai热榜
* @return
*/
public static List<HotSearchList> getMaiMaiHotData(Date date){
log.info("脉脉热榜开始采集");
List<HotSearchList> list = new ArrayList<>();
String url = "https://open.taou.com/maimai/feed/v6/hot_posts_list?tab=profession&count=15&version=5.3.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("脉脉热榜页面连接异常...", e);
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("feeds")) {
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("feeds");
if (jsonArray != null) {
for (int i = 0; i < jsonArray.size(); i++) {
Integer rank = i + 1 ;
JSONObject jsonObject = jsonArray.getJSONObject(i).getJSONObject("style35");
if(jsonObject != null) {
String name = jsonObject.getString("text");
log.info(name);
String maimaiUrl = jsonObject.getString("share_url");
String icon = null;
if (jsonObject.containsKey("hot_type_card")) {
icon = jsonObject.getJSONObject("hot_type_card").getString("text");
}
String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info");
Integer count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0;
HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date);
list.add(hotSearchList);
}
}
}
}
log.info("{}, 此轮脉脉热榜采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
log.info("脉脉热榜采集结束");
return list;
}
}
...@@ -239,7 +239,11 @@ public class HotSearchCacheDAO { ...@@ -239,7 +239,11 @@ public class HotSearchCacheDAO {
// default : // default :
// duration = duration + 1; // duration = duration + 1;
// } // }
if("脉脉热榜".equals(type)){
duration = duration + 30;
}else {
duration = duration + 1; duration = duration + 1;
}
return duration; return duration;
} }
......
...@@ -348,6 +348,17 @@ public class GatherTimer { ...@@ -348,6 +348,17 @@ public class GatherTimer {
} }
/** /**
* maimai采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 0/30 * * * ? ")
public void crawlerMaiMaiHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
TipsUtils.addHotList(HotSearchType.脉脉热榜.name(),list);
}
/**
* 微博超话的采集 * 微博超话的采集
*/ */
@Async(value = "myScheduler") @Async(value = "myScheduler")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment