Commit efe7897c by chenweitao

Merge branch 'working' into 'master'

搜狗微信热搜采集程序更新

See merge request !204
parents 7577a1fa 4987c3f7
...@@ -40,10 +40,10 @@ public class SougoHotSearchCrawler { ...@@ -40,10 +40,10 @@ public class SougoHotSearchCrawler {
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> sougoHotSearch(Date date) { public static List<HotSearchList> sougoHotSearch(Date date) {
String url = "https://weixin.sogou.com"; //String url = "https://weixin.sogou.com";
String url = "https://weixin.sogou.com/pcindex/pc/web/web.js?";
List<HotSearchList> list = new ArrayList<>(); List<HotSearchList> list = new ArrayList<>();
Map<String,String> headMap = HeaderTool.getCommonHead(); Request request = RequestUtils.wrapGet(url);
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
String htmlBody = null; String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY); Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
...@@ -55,35 +55,15 @@ public class SougoHotSearchCrawler { ...@@ -55,35 +55,15 @@ public class SougoHotSearchCrawler {
} }
if (htmlBody != null && htmlBody.contains("topwords")) { if (htmlBody != null && htmlBody.contains("topwords")) {
try { try {
Document document = Jsoup.parse(htmlBody); int rank = 0;
Elements elements = document.select("ol#topwords").select("li"); JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("topwords");
for (Element element : elements) { for (Object object : jsonArray) {
try { rank++;
// 获取排名rank JSONObject json = (JSONObject)JSONObject.toJSON(object);
String rankStr = null; String kw = json.getString("word");
if (!element.select("li").select("i").isEmpty()) { String everurl="https://weixin.sogou.com/weixin?type=2&query="+kw;
rankStr = element.select("li").select("i").text(); HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name(),date);
} list.add(hotSearch);
Integer rank = null;
if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
// 获取关键词(String)
String kw = element.select("li").select("a").attr("title");
// log.info("关键词:{}", kw);
String everurl = element.select("li").select("a").attr("href");
HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name(),date);
if (Objects.nonNull(rank)) {
list.add(hotSearch);
}
} catch (Exception e) {
log.error("解析搜狗微信时出现解析错误", e);
}
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment