Commit 51117558 by chenweitao

Merge branch 'working' into 'master'

百度热搜采集编码解析修改(数据稍后恢复)

See merge request !88
parents c8d280ad b20cc34b
package com.zhiwei.searchhotcrawler.crawler;
import java.net.URLDecoder;
import java.time.Duration;
import java.util.*;
......@@ -80,11 +81,20 @@ public class BaiDuHotSearchCrawler {
if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
// 获取关键词相关链接everurl(String)
String everurl = element.select("td.keyword").select("a.list-title").attr("href");
// 获取关键词(String)
String kw = element.select("td.keyword").select("a.list-title").text();
// logger.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
String everurl = element.select("td.keyword").select("a.list-title").attr("href");
//从连接中获取正确编码关键词
try{
if (!everurl.isEmpty()){
kw = URLDecoder.decode(everurl.substring(everurl.indexOf("&wd=")+4).split("&")[0], "GB2312" );
}
}catch (Exception e1){
log.error("解析百度风云榜,地址",e1);
}
// 获取搜索指数count(int)
String hot = null;
// 判断热度值所在的规则是否为null
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment