Commit 6941070e by 马黎滨

今日头条采集修改

parent 88b59f64
......@@ -3,12 +3,15 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.*;
......@@ -32,9 +35,20 @@ public class ToutiaoHotSearchCrawler {
* @return void 返回类型
*/
public static List<HotSearchList> toutiaoHotSearchByPhone(){
String origin = "hot_board";
String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js";
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(jsUrl)).body().string();
if(htmlBody.contains("origin")){
String s = htmlBody.substring(htmlBody.indexOf("origin:")+"origin:".length());
origin = s.substring(1,s.indexOf("}")-1);
}
} catch (IOException e) {
log.error("获取今日头条实时热搜头部信息标识失败",e);
}
for(int count =0; count<=5; count++){
String url = "https://ib.snssdk.com/api/suggest_words/?business_id=10017";
String url = "https://i.snssdk.com/hot-event/hot-board/?origin="+origin;
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
headerMap.put("referer","https://ib.snssdk.com/rogue/aladdin_landingpage/template/aladdin_landingpage/hot_words.html?isBrowser=true&traffic_source=");
......@@ -42,17 +56,17 @@ public class ToutiaoHotSearchCrawler {
try {
List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("words")){
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
try {
JSONArray words = JSONObject.parseObject(htmlBody).getJSONArray("data").getJSONObject(0).getJSONArray("words");
JSONArray words = JSONObject.parseObject(htmlBody).getJSONArray("data");
int rank = 1;
for(int i=0;i<words.size();i++){
try {
JSONObject word = words.getJSONObject(i);
String name = word.getString("word");
String name = word.getString("Title");
String link = "https://ib.snssdk.com/search/?keyword="+ URLCodeUtil.getURLEncode(name, "utf-8") +"&pd=synthesis&source=trending_list&traffic_source=";
Integer hotCount = word.getJSONObject("params").getInteger("fake_click_cnt");
Integer wordsType = word.getInteger("words_type");
Integer hotCount = word.getInteger("HotValue");
String wordsType = word.getString("Label");
String icon = getIcon(wordsType);
HotSearchList hotSearch = new HotSearchList(link, name, hotCount, true, rank, HotSearchType.今日头条热搜.name(), icon);
......@@ -83,17 +97,17 @@ public class ToutiaoHotSearchCrawler {
* @param wordsType
* @return
*/
private static String getIcon(Integer wordsType){
private static String getIcon(String wordsType){
String icon = "无";
if(Objects.nonNull(wordsType)){
switch (wordsType){
case 1:
case "new":
icon = "新";
break;
case 2:
case "hot":
icon = "热";
break;
case 3:
case "explode":
icon = "爆";
break;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment