Commit b20cc34b by chenweitao

百度热搜采集编码解析修改(数据稍后恢复)

parent 36a3d51c
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.net.URLDecoder;
import java.time.Duration; import java.time.Duration;
import java.util.*; import java.util.*;
...@@ -80,11 +81,20 @@ public class BaiDuHotSearchCrawler { ...@@ -80,11 +81,20 @@ public class BaiDuHotSearchCrawler {
if (StringUtils.isNoneBlank(rankStr)) { if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr); rank = Integer.valueOf(rankStr);
} }
// 获取关键词相关链接everurl(String)
String everurl = element.select("td.keyword").select("a.list-title").attr("href");
// 获取关键词(String) // 获取关键词(String)
String kw = element.select("td.keyword").select("a.list-title").text(); String kw = element.select("td.keyword").select("a.list-title").text();
// logger.info("关键词:{}", kw); // logger.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String) //从连接中获取正确编码关键词
String everurl = element.select("td.keyword").select("a.list-title").attr("href"); try{
if (!everurl.isEmpty()){
kw = URLDecoder.decode(everurl.substring(everurl.indexOf("&wd=")+4).split("&")[0], "GB2312" );
}
}catch (Exception e1){
log.error("解析百度风云榜,地址",e1);
}
// 获取搜索指数count(int) // 获取搜索指数count(int)
String hot = null; String hot = null;
// 判断热度值所在的规则是否为null // 判断热度值所在的规则是否为null
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment