Commit 952feb78 by leiliangliang

头条财经,汽车,科技,数码榜采集程序上线

parent d1547bb8
......@@ -39,37 +39,37 @@ public class ToutiaoHotSearchCrawler {
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @return void 返回类型
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @return void 返回类型
*/
public static List<HotSearchList> toutiaoHotSearchByPhone(Date date){
public static List<HotSearchList> toutiaoHotSearchByPhone(Date date) {
String origin = "hot_board";
String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js";
Request jsRequest = RequestUtils.wrapGet(jsUrl);
String jsBody = null;
Response response = httpBoot.syncCall(jsRequest, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("获取今日头条实时热搜头部信息标识失败",cause);
}else {
log.error("获取今日头条实时热搜头部信息标识失败", cause);
} else {
jsBody = response.bodyString();
}
if(jsBody != null && jsBody.contains("origin")){
String s = jsBody.substring(jsBody.indexOf("origin:")+"origin:".length());
origin = s.substring(1,s.indexOf("}")-1);
if (jsBody != null && jsBody.contains("origin")) {
String s = jsBody.substring(jsBody.indexOf("origin:") + "origin:".length());
origin = s.substring(1, s.indexOf("}") - 1);
}
//采集头条内容
String url = "https://i.snssdk.com/hot-event/hot-board/?origin="+origin;
String url = "https://i.snssdk.com/hot-event/hot-board/?origin=" + origin;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for(int count =0; count<=5; count++){
for (int count = 0; count <= 5; count++) {
Response response1 = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
if (response1.hasCause()) {
Throwable cause = response1.cause();
log.error("解析今日头条实时热搜时出现连接失败",cause);
}else {
log.error("解析今日头条实时热搜时出现连接失败", cause);
} else {
htmlBody = response1.bodyString();
}
List<HotSearchList> result = new ArrayList<HotSearchList>();
......@@ -87,7 +87,7 @@ public class ToutiaoHotSearchCrawler {
String wordsType = word.getString("Label");
String icon = getIcon(wordsType);
HotSearchList hotSearch = new HotSearchList(link, name, hotCount, true, rank, HotSearchType.今日头条热搜.name(), icon,date);
HotSearchList hotSearch = new HotSearchList(link, name, hotCount, true, rank, HotSearchType.今日头条热搜.name(), icon, date);
result.add(hotSearch);
rank++;
} catch (Exception e) {
......@@ -155,10 +155,11 @@ public class ToutiaoHotSearchCrawler {
/**
* 更新今日头条热搜阅读量
*
* @param hotSearchList
* @return
*/
public static HotSearchList toutiaoReadCount(HotSearchList hotSearchList){
public static HotSearchList toutiaoReadCount(HotSearchList hotSearchList) {
HotSearchListDAO hotSearchListDAO = new HotSearchListDAO();
if (hotSearchList.getUrl() != null) {
String htmlBody = null;
......@@ -166,10 +167,10 @@ public class ToutiaoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
for (int i = 0; i <= 5; i++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析今日头条热搜详情页面出现连接失败", cause);
}else {
} else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody)) {
......@@ -194,13 +195,14 @@ public class ToutiaoHotSearchCrawler {
/**
* 热搜类型
*
* @param wordsType
* @return
*/
private static String getIcon(String wordsType){
private static String getIcon(String wordsType) {
String icon = "无";
if(Objects.nonNull(wordsType)){
switch (wordsType){
if (Objects.nonNull(wordsType)) {
switch (wordsType) {
case "new":
icon = "新";
break;
......@@ -215,4 +217,88 @@ public class ToutiaoHotSearchCrawler {
return icon;
}
/**
* @return HashMap<String, List<HotSearchList>> 返回类型
* @Title: toutiaoOtherListByPhone
* @author lll
* @Description: 数码榜 科技榜 财经榜 汽车榜
*/
public static HashMap<String, List<HotSearchList>> toutiaoOtherListByPhone(Date date) {
String url = "https://api5-normal-lq.toutiaoapi.com/api/news/feed/v88/?list_count=20&category=news_hotspot&st_time=14761&refer=1&refresh_reason=1&session_refresh_idx=24&count=20&min_behot_time=1650525634&last_refresh_sub_entrance_interval=1650528640&last_ad_show_interval=-1&cached_item_num=0&last_response_extra=%7B%22data%22%3A%22eyJoYXNfZm9sbG93aW5nIjpmYWxzZSwib2Zmc2V0Ijo2MH0%22%7D&ad_ui_style=%7B%22is_crowd_generalization_style%22%3A2%2C%22van_package%22%3A11000005%7D&lynx_template_data=%5B%5D&lynx_version=2.3.1-rc.3.12-bugfix&tt_from=pull&client_extra_params=%7B%22last_ad_position%22%3A-1%2C%22har_state%22%3A-1%2C%22hand_state%22%3A0%2C%22playparam%22%3A%22codec_type%3A7%2Ccdn_type%3A1%2Cresolution%3A720*1520%2Cttm_version%3A78631%2Cenable_dash%3A0%2Cunwatermark%3A1%2Cv1_fitter_info%3A1%2Ctt_net_energy%3A3%2Cis_order_flow%3A-1%2Ctt_device_score%3A5.8%2Ctt_enable_adaptive%3A2%22%2C%22recommend_enable%22%3A1%2C%22immerse_pool_type%22%3A-2%2C%22immerse_candidate_version%22%3A0%2C%22forbid_loc_rec%22%3A2%2C%22forbid_search_history_rec%22%3A0%2C%22forbid_follow_user_rec%22%3A0%2C%22content_diversity_freq%22%3A0%2C%22ad_download%22%3A%7B%22su%22%3A33900%2C%22pure_mode%22%3A4%7D%2C%22catower_net_quality%22%3A3%2C%22catower_device_overall_performance%22%3A2%7D&device_platform=android&os=android&ssmix=a&_rticket=1650528640648&cdid=27e96bef-4359-4e95-ac6e-1a841c403ed0&channel=xiaomi_13_64&aid=13&app_name=news_article&version_code=877&version_name=8.7.7&manifest_version_code=8770&update_version_code=87707&ab_version=1859936%2C668779%2C3937400%2C668774%2C3937394%2C4004703%2C4025274%2C662176%2C3937387%2C662099%2C3937341%2C668775%2C3937405%2C3939384%2C4025638%2C4037284%2C4048192%2C660830%2C3937403%2C4009522%2C3658686%2C3817337%2C2235008%2C3470750%2C3567672%2C3596064&ab_feature=94563%2C102749&resolution=720*1369&dpi=320&device_type=Redmi+8&device_brand=Xiaomi&language=zh&os_api=28&os_version=9&ac=wifi&dq_param=0&plugin=0&client_vid=2816475%2C3691472%2C3194525%2C3406950%2C3383553%2C3944177%2C2827920&isTTWebView=0&session_id=58eceb7b-faff-4a1f-93bc-812ecb06641d&host_abi=arm64-v8a&tma_jssdk_version=2.14.0.50&rom_version=miui_v10_v10.3.6.0.pcncnxm&iid=4279737538583935&device_id=3869892362909614&openudid=c5bfcb5b24d2d109&oaid=1d21fa54246b1600&cmwz=%2526-%2522%2523ws2K4KJGJPPINRRMRUUQDOF3%2528%252B%2528%252B%252F0%253F3%257D%257E.G0DHKDCNJOQO%255CTaOW_TB%253F&cp=672167141b180q1";
Map<String, Object> headerMap = new HashMap<>();
headerMap.put("User-Agent", "com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br");
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("获取今日头条榜单出错", cause);
} else {
htmlBody = response.bodyString();
}
//四榜单目标数据
HashMap<String, List<HotSearchList>> map = new HashMap<>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("data");
//判断data中是否包含数据
if (Objects.nonNull(jsonArray) && !jsonArray.isEmpty()) {
//2代表财经榜,4代表科技榜,6代表汽车榜,15代表数码榜,
List<Integer> ty = Arrays.asList(2, 4, 6, 15);
for (Integer integer : ty) {
String type = null;
switch (integer) {
case 2:
type = "头条财经榜";
break;
case 4:
type = "头条科技榜";
break;
case 6:
type = "头条汽车榜";
break;
case 15:
type = "头条数码榜";
break;
}
//获取财经榜数据
JSONObject listObject = (JSONObject) jsonArray.get(integer);
String content = listObject.getString("content");
List<HotSearchList> list = parseTouTiaoList(content,date,type);
map.put(type,list);
}
} else {
log.info("页面结构有问题,目标数据为空");
}
} else {
log.info("解析头条榜单时出现解析错误,页面结构有问题");
}
return map;
}
//解析头条财经,科技,汽车,数码榜单数据
private static List<HotSearchList> parseTouTiaoList(String content,Date date,String type) {
List<HotSearchList> hotSearchList = new ArrayList<>();
try {
JSONObject object = JSONObject.parseObject(content);
JSONObject rawData = object.getJSONObject("raw_data");
JSONArray boardArray = rawData.getJSONArray("board");
JSONObject jsonObject = boardArray.getJSONObject(0);
JSONArray arr = jsonObject.getJSONArray("hot_board_items");
Integer rank = 0;
for (Object obj : arr) {
rank++;
JSONObject json = (JSONObject) JSONObject.toJSON(obj);
String title = json.getString("title");
HotSearchList hotList = new HotSearchList(null,title,null,rank,type,date);
hotSearchList.add(hotList);
}
} catch (Exception e) {
log.error("今日"+type+"解析出错", e);
}
return hotSearchList;
}
}
......@@ -652,4 +652,22 @@ public class GatherTimer {
TipsUtils.addHotList(HotSearchType.微博出圈榜.name(), weiBoOutCircleList);
log.info(" 微博出圈榜采集结束........");
}
/**
* 头条财经,科技,汽车,数码榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void toutiaoOtherListByPhone(){
log.info(" 头条财经,科技,汽车,数码榜采集开始........");
Date date = DateUtils.getMillSecondTime(new Date());
Map<String, List<HotSearchList>> map = ToutiaoHotSearchCrawler.toutiaoOtherListByPhone(date);
for (Map.Entry<String, List<HotSearchList>> entry : map.entrySet()) {
String mapKey = entry.getKey();
List<HotSearchList> toutiaoOtherList = entry.getValue();
log.info("{}, "+mapKey+"此轮采集到的数据量为:{}", new Date(), toutiaoOtherList != null ? toutiaoOtherList.size() : 0);
TipsUtils.addHotList(mapKey, toutiaoOtherList);
}
log.info(" 头条财经,科技,汽车,数码榜采集结束........");
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment