Commit d00d9860 by 马黎滨

Merge branch 'mlbWork' into 'master'

Mlb work

See merge request !7
parents d767f59c c209c204
...@@ -50,6 +50,11 @@ ...@@ -50,6 +50,11 @@
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
<version>1.18.8</version> <version>1.18.8</version>
</dependency> </dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.0.4-RELEASE</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
......
...@@ -8,5 +8,6 @@ public enum HotSearchType { ...@@ -8,5 +8,6 @@ public enum HotSearchType {
搜狗微信热搜, 搜狗微信热搜,
微博话题, 微博话题,
今日头条热搜, 今日头条热搜,
知乎热搜榜单 知乎热搜榜单,
腾讯新闻
} }
...@@ -7,6 +7,8 @@ import java.util.List; ...@@ -7,6 +7,8 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -29,7 +31,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -29,7 +31,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2 @Log4j2
public class BaiDuHotSearchCrawler { public class BaiDuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/** /**
* @Title: BaiDuHotSearchTest * @Title: BaiDuHotSearchTest
...@@ -39,16 +41,18 @@ public class BaiDuHotSearchCrawler { ...@@ -39,16 +41,18 @@ public class BaiDuHotSearchCrawler {
*/ */
public static List<HotSearchList> baiduHotSearch() { public static List<HotSearchList> baiduHotSearch() {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex"; String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
try { String htmlBody = null;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string(); Request request = RequestUtils.wrapGet(url);
if (htmlBody != null && htmlBody.contains("mainBody")) { try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
return ansysData(htmlBody); htmlBody = response.body().string();
} else {
log.info("解析百度风云榜时出现解析错误,页面结构有问题");
}
} catch (Exception e) { } catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e); log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
} }
if (htmlBody != null && htmlBody.contains("mainBody")) {
return ansysData(htmlBody);
} else {
log.info("解析百度风云榜时出现解析错误,页面结构有问题");
}
return Collections.emptyList(); return Collections.emptyList();
} }
......
...@@ -5,6 +5,8 @@ import java.util.ArrayList; ...@@ -5,6 +5,8 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -28,7 +30,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -28,7 +30,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2 @Log4j2
public class DouyinHotSearchCrawler { public class DouyinHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
/** /**
* @Title: getMobileDouyinHotList * @Title: getMobileDouyinHotList
...@@ -40,34 +42,36 @@ public class DouyinHotSearchCrawler { ...@@ -40,34 +42,36 @@ public class DouyinHotSearchCrawler {
public static List<HotSearchList> getMobileDouyinHotList(){ public static List<HotSearchList> getMobileDouyinHotList(){
List<HotSearchList> list = null; List<HotSearchList> list = null;
String url = "https://api.amemv.com/aweme/v1/hot/search/list/"; String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
try { String htmlBody = null;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string(); Request request = RequestUtils.wrapGet(url);
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")){ try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
list = new ArrayList<>(); htmlBody = response.body().string();
JSONObject data = JSONObject.parseObject(htmlBody); }catch (IOException e) {
JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list"); log.debug("获取抖音热搜榜时出现问题:{}", e);
String positionStr = null; }
String word = null; if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")) {
String hotValueStr = null; list = new ArrayList<>();
for (int i = 0; i < wordList.size(); i++) { JSONObject data = JSONObject.parseObject(htmlBody);
JSONObject wl = wordList.getJSONObject(i); JSONArray wordList = data.getJSONObject("data").getJSONArray("word_list");
//获取排名 String positionStr = null;
positionStr = wl.getString("position"); String word = null;
Integer position = null; String hotValueStr = null;
position = Integer.valueOf(positionStr); for (int i = 0; i < wordList.size(); i++) {
//获取关键词 JSONObject wl = wordList.getJSONObject(i);
word = wl.getString("word"); //获取排名
//获取热度值 positionStr = wl.getString("position");
hotValueStr =wl.getString("hot_value"); Integer position = null;
Integer hotValue = null; position = Integer.valueOf(positionStr);
hotValue = Integer.valueOf(hotValueStr); //获取关键词
word = wl.getString("word");
//获取热度值
hotValueStr = wl.getString("hot_value");
Integer hotValue = null;
hotValue = Integer.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value); // logger.info("热度为:::{}", hot_value);
HotSearchList douyin = new HotSearchList(null,word, hotValue, position,HotSearchType.抖音热搜.name()); HotSearchList douyin = new HotSearchList(null, word, hotValue, position, HotSearchType.抖音热搜.name());
list.add(douyin); list.add(douyin);
}
} }
} catch (IOException e) {
log.debug("获取抖音热搜榜时出现问题:{}", e);
} }
return list; return list;
} }
......
...@@ -7,6 +7,8 @@ import java.util.Map; ...@@ -7,6 +7,8 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -31,7 +33,7 @@ import com.zhiwei.tools.httpclient.HeaderTool; ...@@ -31,7 +33,7 @@ import com.zhiwei.tools.httpclient.HeaderTool;
@Log4j2 @Log4j2
public class SougoHotSearchCrawler { public class SougoHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
/** /**
* @Title: SougoHotSearchTest * @Title: SougoHotSearchTest
...@@ -41,55 +43,57 @@ public class SougoHotSearchCrawler { ...@@ -41,55 +43,57 @@ public class SougoHotSearchCrawler {
*/ */
public static List<HotSearchList> sougoHotSearch() { public static List<HotSearchList> sougoHotSearch() {
String url = "https://weixin.sogou.com"; String url = "https://weixin.sogou.com";
List<HotSearchList> list = new ArrayList<>(); List<HotSearchList> list = new ArrayList<>();
Map<String,String> headMap = HeaderTool.getCommonHead();
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
String htmlBody = null; String htmlBody = null;
try { try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
Map<String,String> headMap = HeaderTool.getCommonHead(); htmlBody = response.body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); }catch (Exception e) {
if (htmlBody != null && htmlBody.contains("topwords")) { log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
try { }
Document document = Jsoup.parse(htmlBody); if (htmlBody != null && htmlBody.contains("topwords")) {
Elements elements = document.select("ol#topwords").select("li"); try {
for (Element element : elements) { Document document = Jsoup.parse(htmlBody);
try { Elements elements = document.select("ol#topwords").select("li");
// 获取排名rank for (Element element : elements) {
String rankStr = null; try {
if (!element.select("li").select("i").isEmpty()) { // 获取排名rank
rankStr = element.select("li").select("i").text(); String rankStr = null;
} if (!element.select("li").select("i").isEmpty()) {
Integer rank = null; rankStr = element.select("li").select("i").text();
if (StringUtils.isNoneBlank(rankStr)) { }
rank = Integer.valueOf(rankStr); Integer rank = null;
} if (StringUtils.isNoneBlank(rankStr)) {
rank = Integer.valueOf(rankStr);
}
// 获取关键词(String) // 获取关键词(String)
String kw = element.select("li").select("a").attr("title"); String kw = element.select("li").select("a").attr("title");
// logger.info("关键词:{}", kw); // logger.info("关键词:{}", kw);
String everurl = element.select("li").select("a").attr("href"); String everurl = element.select("li").select("a").attr("href");
HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name()); HotSearchList hotSearch = new HotSearchList(everurl, kw, null, rank, HotSearchType.搜狗微信热搜.name());
if (Objects.nonNull(rank)) { if (Objects.nonNull(rank)) {
list.add(hotSearch); list.add(hotSearch);
}
} catch (Exception e) {
log.error("解析搜狗微信时出现解析错误", e);
} }
} catch (Exception e) {
log.error("解析搜狗微信时出现解析错误", e);
} }
} catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace());
return Collections.emptyList();
} }
} else { break;
log.info("解析搜狗微信时出现解析错误,页面结构有问题"); } catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,数据不是json结构", e.fillInStackTrace());
return Collections.emptyList();
} }
break; } else {
} catch (Exception e) { log.info("解析搜狗微信时出现解析错误,页面结构有问题");
log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
} }
} }
return list; return list;
} }
......
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@Log4j2
public class TengXunCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* 腾讯热榜数据采集
* @return
*/
public static List<HotSearchList> getTengXunHotList() {
log.info("腾讯新闻热榜开始采集...");
List<HotSearchList> list = new ArrayList<>();
JSONArray dataJson = null;
String htmlBody = null;
String url = "https://r.inews.qq.com/getWeiboRankingList?chlid=news_recommend_hot&appver=28_android_4.2.40&devid=&qn-rid=&qn-sig=f690e21095559203e3f55c42a04f8f15";
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
}
if (htmlBody != null && htmlBody.contains("idlist")) {
JSONObject topSearch = JSONObject.parseObject(htmlBody);
dataJson = topSearch.getJSONArray("idlist").getJSONObject(0).getJSONArray("newslist");
for (int i = 1; i < dataJson.size(); i++) {
Integer rank = i;
String name = dataJson.getJSONObject(i).getString("title");
String tengxunUrl = "https://view.inews.qq.com/topic/" + dataJson.getJSONObject(i).getString("id");
Integer count = 0;
String icon = null;
if (dataJson.getJSONObject(i).containsKey("topic")) {
count = dataJson.getJSONObject(i).getJSONObject("topic").getIntValue("ranking_score");
if (dataJson.getJSONObject(i).getJSONObject("topic").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("topic").getString("rec_icon");
}
} else if (dataJson.getJSONObject(i).containsKey("hotEvent")) {
count = dataJson.getJSONObject(i).getJSONObject("hotEvent").getIntValue("hotScore");
if (dataJson.getJSONObject(i).getJSONObject("hotEvent").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("hotEvent").getString("rec_icon");
}
}
if (icon != null) {
if (icon.contains("11918331890")) {
icon = "热";
} else if (icon.contains("11918332271")) {
icon = "新";
}
}
HotSearchList hotSearchList = new HotSearchList(tengxunUrl, name, count, false, rank, HotSearchType.腾讯新闻.name(), icon);
list.add(hotSearchList);
}
}
ZhiWeiTools.sleep(3000L);
}
log.info("{}, 此轮腾讯新闻热榜采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
log.info("腾讯新闻采集结束");
return list;
}
}
...@@ -9,6 +9,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -9,6 +9,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -37,56 +39,60 @@ public class ToutiaoHotSearchCrawler { ...@@ -37,56 +39,60 @@ public class ToutiaoHotSearchCrawler {
public static List<HotSearchList> toutiaoHotSearchByPhone(){ public static List<HotSearchList> toutiaoHotSearchByPhone(){
String origin = "hot_board"; String origin = "hot_board";
String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"; String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js";
try { Request jsRequest = RequestUtils.wrapGet(jsUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(jsUrl)).body().string(); String jsBody = null;
if(htmlBody.contains("origin")){ try(Response response = httpBoot.syncCall(jsRequest)) {
String s = htmlBody.substring(htmlBody.indexOf("origin:")+"origin:".length()); jsBody = response.body().string();
origin = s.substring(1,s.indexOf("}")-1);
}
} catch (IOException e) { } catch (IOException e) {
log.error("获取今日头条实时热搜头部信息标识失败",e); log.error("获取今日头条实时热搜头部信息标识失败",e);
} }
if(jsBody != null && jsBody.contains("origin")){
String s = jsBody.substring(jsBody.indexOf("origin:")+"origin:".length());
origin = s.substring(1,s.indexOf("}")-1);
}
//采集头条内容
String url = "https://i.snssdk.com/hot-event/hot-board/?origin="+origin;
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
headerMap.put("referer","https://ib.snssdk.com/rogue/aladdin_landingpage/template/aladdin_landingpage/hot_words.html?isBrowser=true&traffic_source=");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for(int count =0; count<=5; count++){ for(int count =0; count<=5; count++){
String url = "https://i.snssdk.com/hot-event/hot-board/?origin="+origin; try(Response response = httpBoot.syncCall(request)) {
Map<String,String> headerMap = new HashMap<>(); htmlBody = response.body().string();
headerMap.put("User-Agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"); } catch (IOException e1) {
headerMap.put("referer","https://ib.snssdk.com/rogue/aladdin_landingpage/template/aladdin_landingpage/hot_words.html?isBrowser=true&traffic_source="); log.error("解析今日头条实时热搜时出现连接失败",e1);
String htmlBody; }
try { List<HotSearchList> result = new ArrayList<HotSearchList>();
List<HotSearchList> result = new ArrayList<HotSearchList>(); if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); try {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){ JSONArray words = JSONObject.parseObject(htmlBody).getJSONArray("data");
try { int rank = 1;
JSONArray words = JSONObject.parseObject(htmlBody).getJSONArray("data"); for (int i = 0; i < words.size(); i++) {
int rank = 1; try {
for(int i=0;i<words.size();i++){ JSONObject word = words.getJSONObject(i);
try { String name = word.getString("Title");
JSONObject word = words.getJSONObject(i); String link = "https://ib.snssdk.com/search/?keyword=" + URLCodeUtil.getURLEncode(name, "utf-8") + "&pd=synthesis&source=trending_list&traffic_source=";
String name = word.getString("Title"); Integer hotCount = word.getInteger("HotValue");
String link = "https://ib.snssdk.com/search/?keyword="+ URLCodeUtil.getURLEncode(name, "utf-8") +"&pd=synthesis&source=trending_list&traffic_source="; String wordsType = word.getString("Label");
Integer hotCount = word.getInteger("HotValue"); String icon = getIcon(wordsType);
String wordsType = word.getString("Label");
String icon = getIcon(wordsType);
HotSearchList hotSearch = new HotSearchList(link, name, hotCount, true, rank, HotSearchType.今日头条热搜.name(), icon); HotSearchList hotSearch = new HotSearchList(link, name, hotCount, true, rank, HotSearchType.今日头条热搜.name(), icon);
result.add(hotSearch); result.add(hotSearch);
rank++; rank++;
} catch (Exception e) { } catch (Exception e) {
log.error("解析今日头条实时热搜时出现解析错误",e); log.error("解析今日头条实时热搜时出现解析错误", e);
continue; continue;
}
} }
return result;
} catch (Exception e) {
log.error("解析今日头条实时热搜时出现解析错误,数据不是json结构",e);
} }
}else{ return result;
log.info("解析今日头条实时热搜时出现解析错误,页面结构有问题"); } catch (Exception e) {
log.error("解析今日头条实时热搜时出现解析错误,数据不是json结构", e);
} }
} catch (IOException e1) { } else {
log.error("解析今日头条实时热搜时出现连接失败",e1); log.info("解析今日头条实时热搜时出现解析错误,页面结构有问题");
} }
} }
return Collections.emptyList(); return Collections.emptyList();
} }
......
...@@ -4,6 +4,8 @@ import java.io.IOException; ...@@ -4,6 +4,8 @@ import java.io.IOException;
import java.util.*; import java.util.*;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
...@@ -44,52 +46,52 @@ public class WeiboHotSearchCrawler { ...@@ -44,52 +46,52 @@ public class WeiboHotSearchCrawler {
List<HotSearchList> list = new ArrayList<HotSearchList>(); List<HotSearchList> list = new ArrayList<HotSearchList>();
for(int i =0; i<3; i++){ for(int i =0; i<3; i++){
String htmlBody = null; String htmlBody = null;
try { Request request = RequestUtils.wrapGet(url);
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string(); try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){ htmlBody = response.body().string();
try { } catch (Exception e) {
if(i==2){
return list;
}else{
continue;
}
}
if (htmlBody != null && htmlBody.contains("pl_top_realtimehot")) {
try {
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0]; // String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", ""); // script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script); // JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html"); // String html = json.getString("html");
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr"); Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){ for (Element element : elements) {
try { try {
String id = "http://s.weibo.com"+element.select("td.td-02").select("a").attr("href"); String id = "http://s.weibo.com" + element.select("td.td-02").select("a").attr("href");
String name = element.select("td.td-02").select("a").text(); String name = element.select("td.td-02").select("a").text();
String num = !element.select("td.td-02").select("span").text().equals("")?element.select("td.td-02").select("span").text():"0"; String num = !element.select("td.td-02").select("span").text().equals("") ? element.select("td.td-02").select("span").text() : "0";
String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("")?element.select("td[class=\"td-01 ranktop\"]").text():"-1"; String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("") ? element.select("td[class=\"td-01 ranktop\"]").text() : "-1";
int hotCount = Integer.valueOf(num); int hotCount = Integer.valueOf(num);
int rankCount = Integer.valueOf(rank); int rankCount = Integer.valueOf(rank);
HotSearchList hotSearch = new HotSearchList(id, name, hotCount,true, rankCount, HotSearchType.微博热搜.name(),null); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, true, rankCount, HotSearchType.微博热搜.name(), null);
list.add(hotSearch); list.add(hotSearch);
} catch (Exception e) { } catch (Exception e) {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
log.error("解析微博时时热搜时出现解析错误", e); log.error("解析微博时时热搜时出现解析错误", e);
continue; continue;
}
} }
} catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
return null;
} }
}else{ } catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误,数据不是json结构", e.fillInStackTrace());
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
log.info("解析微博时时热搜时出现解析错误,页面结构有问题"); return null;
}
break;
} catch (Exception e) {
if(i==2){
return list;
}else{
continue;
} }
} else {
SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
break;
} }
return list; return list;
} }
...@@ -103,61 +105,61 @@ public class WeiboHotSearchCrawler { ...@@ -103,61 +105,61 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> weiboHotSearchByPhone(){ public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for(int count =0; count<=5; count++){ for(int count =0; count<=5; count++){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"; try(Response response = httpBoot.syncCall(request)) {
Map<String,String> headerMap = new HashMap<>(); htmlBody = response.body().string();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"); } catch (IOException e1) {
String htmlBody; log.error("解析微博时时热搜时出现连接失败",e1);
try { }
List<HotSearchList> result = new ArrayList<HotSearchList>(); List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){ try {
try { JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data"); JSONArray cards = json.getJSONArray("cards");
JSONArray cards = json.getJSONArray("cards"); int rank = 0;
int rank = 0; for (int i = 0; i < cards.size(); i++) {
for(int i=0;i<cards.size();i++){ try {
try { JSONObject card = cards.getJSONObject(i);
JSONObject card = cards.getJSONObject(i); JSONArray cardGroup = card.getJSONArray("card_group");
JSONArray cardGroup = card.getJSONArray("card_group"); if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) {
if(Objects.nonNull(cardGroup) && !cardGroup.isEmpty()){ String title = card.getString("title");
String title = card.getString("title"); boolean hot = true;
boolean hot = true; if (Objects.nonNull(title) && title.contains("实时上升热点")) {
if(Objects.nonNull(title) && title.contains("实时上升热点")){ hot = false;
hot = false; rank = 51;
rank = 51; }
} for (int j = 0; j < cardGroup.size(); j++) {
for(int j=0; j<cardGroup.size(); j++){ JSONObject cardInfo = cardGroup.getJSONObject(j);
JSONObject cardInfo = cardGroup.getJSONObject(j); String name = cardInfo.getString("desc");
String name = cardInfo.getString("desc"); int hotCount = cardInfo.getIntValue("desc_extr");
int hotCount = cardInfo.getIntValue("desc_extr"); String icon = cardInfo.getString("icon");
String icon = cardInfo.getString("icon"); if (StringUtils.isNotBlank(icon)) {
if(StringUtils.isNotBlank(icon)){ icon = icon.split("_")[1].split(".png")[0];
icon = icon.split("_")[1].split(".png")[0];
}
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon);
result.add(hotSearch);
rank++;
} }
}else{ String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
log.info("card 数据结构为:{}", card); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon);
result.add(hotSearch);
rank++;
} }
} catch (Exception e) { } else {
log.error("解析微博时时热搜时出现解析错误",e); log.info("card 数据结构为:{}", card);
continue;
} }
} catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误", e);
continue;
} }
return result;
} catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误,数据不是json结构",e);
} }
}else{ return result;
log.info("解析微博时时热搜时出现解析错误,页面结构有问题"); } catch (Exception e) {
log.error("解析微博时时热搜时出现解析错误,数据不是json结构", e);
} }
} catch (IOException e1) { } else {
log.error("解析微博时时热搜时出现连接失败",e1); log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
} }
} }
return Collections.emptyList(); return Collections.emptyList();
......
...@@ -10,6 +10,8 @@ import java.util.Objects; ...@@ -10,6 +10,8 @@ import java.util.Objects;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic; import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -52,29 +54,29 @@ public class WeiboSuperTopicCrawler { ...@@ -52,29 +54,29 @@ public class WeiboSuperTopicCrawler {
urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm="); urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm=");
List<WeiboSuperTopic> topicList = new ArrayList<>(); List<WeiboSuperTopic> topicList = new ArrayList<>();
for(Entry<String,String> entry : urlMap.entrySet()) { for(Entry<String,String> entry : urlMap.entrySet()) {
String url = entry.getValue(); String url = entry.getValue();
String type = entry.getKey(); String type = entry.getKey();
for(int page= 1; page<=5; page++) { for(int page= 1; page<=5; page++) {
String pageUrl = url + "&page=" + page; String pageUrl = url + "&page=" + page;
Request request = RequestUtils.wrapGet(pageUrl, headMap);
String htmlBody = null;
//重试三次 //重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) { for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
try { try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
// System.out.println("pageUrl=========="+pageUrl); htmlBody = response.body().string();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl, headMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); }catch (Exception e) {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) {
topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
break;
}else {
log.error("获取榜单列表页面时数据格式错误,页面为:{}", htmlBody);
}
} catch (Exception e) {
log.error("获取榜单列表页面时出现错误,错误为:{}", e); log.error("获取榜单列表页面时出现错误,错误为:{}", e);
continue; continue;
} }
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) {
topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
break;
} else {
log.error("获取榜单列表页面时数据格式错误,页面为:{}", htmlBody);
}
} }
} }
} }
return topicList; return topicList;
...@@ -136,23 +138,24 @@ public class WeiboSuperTopicCrawler { ...@@ -136,23 +138,24 @@ public class WeiboSuperTopicCrawler {
*/ */
private static WeiboSuperTopic getTopicInfo(String id, WeiboSuperTopic topic) { private static WeiboSuperTopic getTopicInfo(String id, WeiboSuperTopic topic) {
for(int retryTimes=1; retryTimes<=3; retryTimes++) { for(int retryTimes=1; retryTimes<=3; retryTimes++) {
try { String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id; Request request = RequestUtils.wrapGet(url);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyHolder.NAT_HEAVY_PROXY).body().string(); String htmlBody = null;
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc_more")) { try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
String descMore = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("pageInfo").getJSONArray("desc_more").getString(0); htmlBody = response.body().string();
if(StringUtils.isNotBlank(descMore)) {
String readNum = descMore.replaceAll("阅读|帖子.*", "").trim();
String postNum = descMore.replaceAll(".*帖子|粉丝.*", "").trim();
topic.setPostNum(postNum);
topic.setReadNum(readNum);
return topic;
}
}
} catch (Exception e) { } catch (Exception e) {
log.error("解析榜单详情页面时出现错误,错误为:{}", e); log.error("解析榜单详情页面时出现错误,错误为:{}", e);
} }
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc_more")) {
String descMore = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("pageInfo").getJSONArray("desc_more").getString(0);
if (StringUtils.isNotBlank(descMore)) {
String readNum = descMore.replaceAll("阅读|帖子.*", "").trim();
String postNum = descMore.replaceAll(".*帖子|粉丝.*", "").trim();
topic.setPostNum(postNum);
topic.setReadNum(readNum);
return topic;
}
}
} }
return topic; return topic;
} }
......
...@@ -11,6 +11,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -11,6 +11,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic; import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
...@@ -131,21 +133,23 @@ public class WeiboTopicCrawler { ...@@ -131,21 +133,23 @@ public class WeiboTopicCrawler {
List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=6; page++){ for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page; String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page;
Request request = RequestUtils.wrapGet(pageUrl);
String htmlBody = null;
//重试三次 //重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) { for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try { try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
// log.info("pageUrl::{}", pageUrl); // log.info("pageUrl::{}", pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string(); htmlBody = response.body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
topicList.addAll(parseTopicHtml(htmlBody));
break;
}else {
log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody);
}
} catch (Exception e) { } catch (Exception e) {
log.error("下载榜单列表页面时出现错误,错误为:{}", e); log.error("下载榜单列表页面时出现错误,错误为:{}", e);
continue; continue;
} }
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
topicList.addAll(parseTopicHtml(htmlBody));
break;
} else {
log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody);
}
} }
} }
return topicList; return topicList;
......
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Log4j2
public class ZhihuChildHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* 知乎子级分类数据采集
* @param type
* @param typeName
* @return
*/
public static List<HotSearchList> getZhihuTopicSearch(String type,String typeName) {
List<HotSearchList> list = new ArrayList<>();
String url = "https://www.zhihu.com/api/v3/feed/topstory/hot-lists/"+type;
Map<String,String> headerMap = new HashMap<>();
headerMap.put("x-api-version","3.0.76");
JSONArray dataJson =null;
String htmlBody =null;
Request request = RequestUtils.wrapGet(url, headerMap);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject topSearch = JSONObject.parseObject(htmlBody);
dataJson = topSearch.getJSONArray("data");
for (int i = 0; i < dataJson.size(); i++) {
JSONObject jsonObject = dataJson.getJSONObject(i).getJSONObject("target");
Integer rank = i + 1;
String name = jsonObject.getJSONObject("title_area").getString("text");
String hotCountString = jsonObject.getJSONObject("metrics_area").getString("text");
Integer count = getHotCount(hotCountString);
String childUrl = jsonObject.getJSONObject("link").getString("url");
HotSearchList hotSearchList = new HotSearchList(childUrl, name, count, rank, HotSearchType.知乎热搜.name() + typeName + "分类");
list.add(hotSearchList);
}
}
ZhiWeiTools.sleep(3000L);
}
return list;
}
/**
* 截取出热度值
* @param hotCountString
* @return
*/
private static Integer getHotCount(String hotCountString){
Integer count;
if(hotCountString.contains("万")){
hotCountString = hotCountString.replaceAll("万.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*10000);
}else if(hotCountString.contains("亿")){
hotCountString = hotCountString.replaceAll("亿.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*10000000);
}else{
count = Integer.getInteger(hotCountString.substring(0, hotCountString.indexOf("领域热度")));
}
return count;
}
}
...@@ -6,6 +6,8 @@ import java.util.List; ...@@ -6,6 +6,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -46,27 +48,28 @@ public class ZhihuHotSearchCrawler { ...@@ -46,27 +48,28 @@ public class ZhihuHotSearchCrawler {
headerMap.put("accept", "application/json, text/plain, */*"); headerMap.put("accept", "application/json, text/plain, */*");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20"); headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
headerMap.put("Referer", rerferer); headerMap.put("Referer", rerferer);
try { Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); String htmlBody = null;
if(htmlBody != null && htmlBody.contains("words")){ try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
list = new ArrayList<>(); htmlBody = response.body().string();
JSONObject topSearch = JSONObject.parseObject(htmlBody); }catch (IOException e) {
JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
String link = null;
String displayQuery = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i);
query = word.getString("query");
displayQuery = word.getString("display_query");
link = "https://www.zhihu.com/search?q="+URLCodeUtil.getURLEncode(query, "utf-8")+"&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
}
} catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e); log.debug("获取知乎热搜时出现问题:{}", e);
return list; }
if (htmlBody != null && htmlBody.contains("words")) {
list = new ArrayList<>();
JSONObject topSearch = JSONObject.parseObject(htmlBody);
JSONArray words = topSearch.getJSONObject("top_search").getJSONArray("words");
String link = null;
String displayQuery = null;
String query = null;
for (int i = 0; i < words.size(); i++) {
JSONObject word = words.getJSONObject(i);
query = word.getString("query");
displayQuery = word.getString("display_query");
link = "https://www.zhihu.com/search?q=" + URLCodeUtil.getURLEncode(query, "utf-8") + "&utm_content=search_hot&utm_medium=organic&utm_source=zhihu&type=content";
HotSearchList zhihu = new HotSearchList(link, displayQuery, null, i, HotSearchType.知乎热搜.name());
list.add(zhihu);
}
} }
return list; return list;
} }
...@@ -81,7 +84,7 @@ public class ZhihuHotSearchCrawler { ...@@ -81,7 +84,7 @@ public class ZhihuHotSearchCrawler {
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<HotSearchList> getMobileZhihuHotList(){ public static List<HotSearchList> getMobileZhihuHotList(){
List<HotSearchList> list = new ArrayList<>();; List<HotSearchList> list = new ArrayList<>();
String url = "https://api.zhihu.com/topstory/hot-list?limit=50&reverse_order=0"; String url = "https://api.zhihu.com/topstory/hot-list?limit=50&reverse_order=0";
Map<String,String> headerMap = HeaderTool.getCommonHead(); Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "api.zhihu.com"); headerMap.put("Host", "api.zhihu.com");
...@@ -89,43 +92,44 @@ public class ZhihuHotSearchCrawler { ...@@ -89,43 +92,44 @@ public class ZhihuHotSearchCrawler {
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36");
headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8="); headerMap.put("X-UDID", "AFAC3hv3vgyPTt9ZmNmqTm0yv_8NKY3S3z8=");
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20"); headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
String htmlBody = null;
try { Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string(); try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
if(htmlBody != null && htmlBody.contains("author")){ htmlBody = response.body().string();
JSONObject topSearch = JSONObject.parseObject(htmlBody); } catch (IOException e) {
JSONArray dataJson = topSearch.getJSONArray("data"); log.debug("获取知乎热搜时出现问题:{}", e);
String link = null; return list;
String displayQuery = null; }
Integer hotCount = null; if (htmlBody != null && htmlBody.contains("author")) {
String hotText = null; JSONObject topSearch = JSONObject.parseObject(htmlBody);
for (int i = 0; i < dataJson.size(); i++) { JSONArray dataJson = topSearch.getJSONArray("data");
JSONObject data = dataJson.getJSONObject(i).getJSONObject("target"); String link = null;
displayQuery = data.getString("title"); String displayQuery = null;
link = "https://www.zhihu.com/question/" + data.getLongValue("id"); Integer hotCount = null;
hotText = dataJson.getJSONObject(i).getString("detail_text"); String hotText = null;
for (int i = 0; i < dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i).getJSONObject("target");
displayQuery = data.getString("title");
link = "https://www.zhihu.com/question/" + data.getLongValue("id");
hotText = dataJson.getJSONObject(i).getString("detail_text");
//计算热度 //计算热度
try { try {
if(hotText.contains("万")){ if (hotText.contains("万")) {
hotText = hotText.replaceAll("万.*", "").trim(); hotText = hotText.replaceAll("万.*", "").trim();
hotCount = (int)(Double.parseDouble(hotText)*10000); hotCount = (int) (Double.parseDouble(hotText) * 10000);
}else if(hotText.contains("亿")){ } else if (hotText.contains("亿")) {
hotText = hotText.replaceAll("亿.*", "").trim(); hotText = hotText.replaceAll("亿.*", "").trim();
hotCount = (int)(Double.parseDouble(hotText)*10000000); hotCount = (int) (Double.parseDouble(hotText) * 10000000);
}else{ } else {
hotCount = Integer.getInteger(hotText); hotCount = Integer.getInteger(hotText);
}
}catch (Exception e){
e.printStackTrace();
} }
HotSearchList zhihu = new HotSearchList(link, displayQuery, hotCount, i+1, HotSearchType.知乎热搜.name()); } catch (Exception e) {
list.add(zhihu); e.printStackTrace();
} }
HotSearchList zhihu = new HotSearchList(link, displayQuery, hotCount, i + 1, HotSearchType.知乎热搜.name());
list.add(zhihu);
} }
} catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
return list;
} }
return list; return list;
} }
......
...@@ -10,6 +10,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -10,6 +10,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
...@@ -30,35 +32,34 @@ public class ZhihuTopicSearchCrawler { ...@@ -30,35 +32,34 @@ public class ZhihuTopicSearchCrawler {
List<HotSearchList> list = new ArrayList<>(); List<HotSearchList> list = new ArrayList<>();
String url = "https://www.zhihu.com/topsearch"; String url = "https://www.zhihu.com/topsearch";
JSONObject jsonObject = null; JSONObject jsonObject = null;
try { String htmlBody = null;
for(int t=0 ;t<3 && jsonObject== null;t++) Request request = RequestUtils.wrapGet(url);
{ for (int t = 0; t < 3 && jsonObject == null; t++) {
// ZhiWeiTools.sleep(10000L); try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), htmlBody = response.body().string();
ProxyHolder.NAT_HEAVY_PROXY).body().string(); } catch (IOException e) {
// log.info("页面内容获取:{}",htmlBody); log.error("知乎热搜页面连接异常", e);
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").select("#js-initialData").html(); String html = document.getElementsByTag("script").select("#js-initialData").html();
jsonObject = JSONObject.parseObject(html); jsonObject = JSONObject.parseObject(html);
} if (jsonObject != null) {
if(jsonObject != null) { JSONArray dataJson = jsonObject.getJSONObject("initialState").getJSONObject("topsearch").getJSONArray("data");
JSONArray dataJson = jsonObject.getJSONObject("initialState").getJSONObject("topsearch").getJSONArray("data"); for (int i = 0; i < dataJson.size(); i++) {
for (int i = 0; i < dataJson.size(); i++) { Integer rank = i + 1;
Integer rank = i + 1; JSONObject data = dataJson.getJSONObject(i);
JSONObject data = dataJson.getJSONObject(i); String name = data.getString("queryDisplay");
String name = data.getString("queryDisplay"); String realQuery = data.getString("realQuery");
String realQuery = data.getString("realQuery"); String zhihuUrl = "https://www.zhihu.com/search?q=" + realQuery + "&utm_content=search_hot&type=content";
String zhihuUrl = "https://www.zhihu.com/search?q=" + realQuery + "&utm_content=search_hot&type=content"; HotSearchList hotSearchList = new HotSearchList(zhihuUrl, name, null, rank, HotSearchType.知乎热搜榜单.name());
HotSearchList hotSearchList = new HotSearchList(zhihuUrl, name, null, rank, HotSearchType.知乎热搜榜单.name()); list.add(hotSearchList);
list.add(hotSearchList); }
return list;
} }
return list; } else {
}else{ log.error("知乎热搜榜单页面获取异常");
log.error("知乎热搜榜单页面获取异常,404");
log.error(jsonObject);
} }
} catch (IOException e) {
log.error("知乎热搜获取异常", e);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
......
...@@ -6,10 +6,9 @@ import java.util.Date; ...@@ -6,10 +6,9 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import com.mongodb.client.ListIndexesIterable; import com.mongodb.client.*;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.mongodb.client.model.IndexOptions; import com.mongodb.client.model.IndexOptions;
import com.mongodb.client.model.Sorts;
import com.zhiwei.searchhotcrawler.config.DBConfig; import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
...@@ -64,5 +63,20 @@ public class HotSearchListDAO{ ...@@ -64,5 +63,20 @@ public class HotSearchListDAO{
log.error("存储数据时出错,错误为:{}", e); log.error("存储数据时出错,错误为:{}", e);
} }
} }
public Date getLastTimeByType(String type){
try {
BasicDBObject basicDBObject = new BasicDBObject();
basicDBObject.put("type", type);
MongoCursor<Document> cursor = mongoCollection.find(basicDBObject).sort(
Sorts.orderBy(Sorts.descending("time"))).skip(0).limit(1).iterator();
while (cursor.hasNext()) {
return (Date) cursor.next().get("time");
}
}catch (Exception e){
log.error("查询数据时出错,错误为:{}",e);
}
return null;
}
} }
...@@ -52,5 +52,7 @@ public class HotSearchRun { ...@@ -52,5 +52,7 @@ public class HotSearchRun {
new WeiboTopicRun().start(); new WeiboTopicRun().start();
new ToutiaoHotSearchRun().start(); new ToutiaoHotSearchRun().start();
new ZhihuTopSearchRun().start(); new ZhihuTopSearchRun().start();
new ZhihuChildHotSearchRun().start();
new ThreadOneRun().start();
} }
} }
...@@ -7,6 +7,7 @@ import java.util.Objects; ...@@ -7,6 +7,7 @@ import java.util.Objects;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -47,6 +48,8 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -47,6 +48,8 @@ public class BaiduHotSearchRun extends Thread{
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
} else {
TipsUtils.sendTips("百度热搜",new Date());
} }
log.info("百度风云榜采集结束........"); log.info("百度风云榜采集结束........");
} }
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -47,6 +48,9 @@ public class DouyinHotSearchRun extends Thread{ ...@@ -47,6 +48,9 @@ public class DouyinHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList(); List<HotSearchList> list = DouyinHotSearchCrawler.getMobileDouyinHotList();
log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 抖音热搜榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("抖音热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("抖音热搜榜采集结束........"); log.info("抖音热搜榜采集结束........");
......
...@@ -7,6 +7,7 @@ import java.util.concurrent.TimeUnit; ...@@ -7,6 +7,7 @@ import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -43,6 +44,9 @@ public class SougoHotSearchRun extends Thread { ...@@ -43,6 +44,9 @@ public class SougoHotSearchRun extends Thread {
log.info("搜狗微信采集开始........"); log.info("搜狗微信采集开始........");
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(); List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch();
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("搜狗微信热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("搜狗微信采集结束........"); log.info("搜狗微信采集结束........");
......
package com.zhiwei.searchhotcrawler.timer;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.TengXunCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Log4j2
public class ThreadOneRun extends Thread {
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList(){
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = TengXunCrawler.getTengXunHotList();
if(list == null || list.size() == 0){
TipsUtils.sendTips("腾讯新闻",new Date());
} else {
List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data);
}
}
}
...@@ -5,6 +5,7 @@ import com.zhiwei.searchhotcrawler.crawler.ToutiaoHotSearchCrawler; ...@@ -5,6 +5,7 @@ import com.zhiwei.searchhotcrawler.crawler.ToutiaoHotSearchCrawler;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -39,6 +40,9 @@ public class ToutiaoHotSearchRun extends Thread{ ...@@ -39,6 +40,9 @@ public class ToutiaoHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(); List<HotSearchList> list = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone();
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("今日头条热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("今日头条热搜采集结束........"); log.info("今日头条热搜采集结束........");
......
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
...@@ -37,6 +38,9 @@ public class WeiboHotSearchRun extends Thread{ ...@@ -37,6 +38,9 @@ public class WeiboHotSearchRun extends Thread{
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone(); List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
} }
......
...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler; import com.zhiwei.searchhotcrawler.crawler.WeiboTopicCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -37,6 +38,9 @@ public class WeiboTopicRun extends Thread{ ...@@ -37,6 +38,9 @@ public class WeiboTopicRun extends Thread{
log.info("微博话题采集开始........"); log.info("微博话题采集开始........");
List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(); List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone();
log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("微博话题",new Date());
}
List<Document> data = new ArrayList<>(); List<Document> data = new ArrayList<>();
for(HotSearchList topic : list){ for(HotSearchList topic : list){
Document doc = new Document(); Document doc = new Document();
......
package com.zhiwei.searchhotcrawler.timer;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuChildHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Log4j2
public class ZhihuChildHotSearchRun extends Thread {
private List<String> childType = Arrays.asList("digital","focus","depth");
@Override
public void run() {
boolean f = true;
while(f) {
try {
getHotList();
TimeUnit.MINUTES.sleep(1);
} catch (Exception e) {
e.fillInStackTrace();
ZhiWeiTools.sleep(60*60*1000);
}
ZhiWeiTools.sleep(50);
}
}
private void getHotList() {
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
for (int i = 0; i < childType.size(); i++) {
String name = this.getTypeName(childType.get(i));
if (!"".equals(name)) {
log.info("知乎{}话题热榜采集开始...", name);
List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(childType.get(i), name);
log.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
if (list == null || list.size() == 0) {
TipsUtils.sendTips("知乎热搜"+name+"分类", new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data);
log.info("知乎{}话题热榜采集结束...", name);
ZhiWeiTools.sleep(3000);
}
}
}
private String getTypeName(String type){
String name;
switch (type) {
case "digital":
name = "数码";
break;
case "focus":
name = "国际";
break;
case "depth":
name = "时事";
break;
default:
name = "";
}
return name;
}
}
...@@ -6,6 +6,7 @@ import java.util.List; ...@@ -6,6 +6,7 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -44,6 +45,9 @@ public class ZhihuHotSearchRun extends Thread{ ...@@ -44,6 +45,9 @@ public class ZhihuHotSearchRun extends Thread{
// List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList(); // List<HotSearchList> list = ZhihuHotSearchCrawler.getZhihuHotList();
List<HotSearchList> list = ZhihuHotSearchCrawler.getMobileZhihuHotList(); List<HotSearchList> list = ZhihuHotSearchCrawler.getMobileZhihuHotList();
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("知乎热搜",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("知乎话题采集结束........"); log.info("知乎话题采集结束........");
......
...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList; ...@@ -4,6 +4,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.crawler.ZhihuTopicSearchCrawler; import com.zhiwei.searchhotcrawler.crawler.ZhihuTopicSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.bson.Document; import org.bson.Document;
...@@ -36,6 +37,9 @@ public class ZhihuTopSearchRun extends Thread { ...@@ -36,6 +37,9 @@ public class ZhihuTopSearchRun extends Thread {
log.info("知乎热搜采集开始...,当前线程名字:{}", Thread.currentThread().getName()); log.info("知乎热搜采集开始...,当前线程名字:{}", Thread.currentThread().getName());
List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(); List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch();
log.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0)); log.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
if(list == null || list.size() == 0){
TipsUtils.sendTips("知乎热搜榜单",new Date());
}
List<Document> data = hotSearchCacheDAO.addData(list); List<Document> data = hotSearchCacheDAO.addData(list);
hotSearchDAO.addHotSearchList(data); hotSearchDAO.addHotSearchList(data);
log.info("知乎热搜话题采集结束........"); log.info("知乎热搜话题采集结束........");
......
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import okhttp3.MediaType;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map;
/**
* http请求的工具类
*/
public final class HttpClientUtils {
private static final Logger LOGGER = LogManager.getLogger(HttpClientUtils.class);
private static final String NAME_VALUE_SEPARATOR = "=";
private static final String QUERY_PARAM_SEP = "&";
private static final String URL_QUERY_PARAM_SEPARATOR = "?";
private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
public static String sendPost(String url, String jsonParam){
return sendPost(url, jsonParam, null, Charset.forName("UTF-8"));
}
public static String sendPost(String url, String jsonParam, Map<String, String> headers, final Charset charset) {
if (StringUtils.isEmpty(url)) {
LOGGER.error("URL can not be empty or null.");
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Post Request:{}", url);
}
String result = null;
Request request= RequestUtils.wrapPost(url, headers, RequestBody.create(MediaType.get("application/json"),
jsonParam));
try(Response response = httpBoot.syncCall(request)) {
result = response.body().string();
}catch (IOException e) {
LOGGER.error("http connection error :" + e.getMessage(), e);
}
return result;
}
}
package com.zhiwei.searchhotcrawler.util;
import com.alibaba.fastjson.JSONObject;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 企业微信机器人推送工具
*
* @ClassName: QYWechatUtil
* @author: 陈炜涛
* @date: 2019年7月17日 下午2:33:12
*
* @Copyright: 2019 www.zhiweidata.com
*/
public class QYWechatUtil {
/** 推送地址 **/
private static final String SEND_URL = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=";
/** markdown模式 **/
public static final String MSGTYPE_MARKDOWN = "markdown";
/** 文字 **/
public static final String MSGTYPE_TEXT = "text";
/** 图片,需另外封装 **/
public static final String MSGTYPE_IMAGE = "image";
/** 图文,需另外封装 **/
public static final String MSGTYPE_NEWS = "news";
/**
* @param key
* 发送预警的key 目标机器人
* @param content
* @param mentionedList
* '@'对象id集合
* @param mentionedMobileList
* 手机号码集合
* @return
* @return: String
* @throws @author:
* 陈炜涛
* @date: 2019年7月17日 下午2:56:40
*/
public static String send(String key, String msgtype, String content, List<String> mentionedList,
List<String> mentionedMobileList) {
msgtype = msgtype != null && !msgtype.isEmpty() ? msgtype : MSGTYPE_TEXT;
TextBody text = new TextBody(content, mentionedList, mentionedMobileList);
Map<String, Object> dataMap = new HashMap<>();
dataMap.put("msgtype", msgtype);
dataMap.put(msgtype, text);
return HttpClientUtils.sendPost(SEND_URL + key, JSONObject.toJSONString(dataMap));
}
}
/**
* 中转对象仅在此处使用
*
* @ClassName: Body
* @author: 陈炜涛
* @date: 2019年7月17日 下午2:50:19
*
* @Copyright: 2019 www.zhiweidata.com
*/
class TextBody {
/**
* 消息内容
*/
private String content;
/**
* 通知人id
*/
private List<String> mentionedList;
/**
* 通知人手机号
*/
private List<String> mentionedMobileList;
public TextBody() {
super();
}
public TextBody(String content, List<String> mentionedList, List<String> mentionedMobileList) {
super();
this.content = content;
this.mentionedList = mentionedList;
this.mentionedMobileList = mentionedMobileList;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public List<String> getMentionedList() {
return mentionedList;
}
public void setMentionedList(List<String> mentionedList) {
this.mentionedList = mentionedList;
}
public List<String> getMentionedMobileList() {
return mentionedMobileList;
}
public void setMentionedMobileList(List<String> mentionedMobileList) {
this.mentionedMobileList = mentionedMobileList;
}
@Override
public String toString() {
return "TextBody [content=" + content + ", mentionedList=" + mentionedList + ", mentionedMobileList="
+ mentionedMobileList + "]";
}
}
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
/**
* 预警发送
*/
public class TipsUtils {
private static Long timeDifference = 5 * 60 * 1000L;
private static String key = "a8e26ce3-8aaa-4d3e-bcf6-30b81526050b";
private Logger logger = LoggerFactory.getLogger(TipsUtils.class);
//未采集到数据发送预警信息
public static void sendTips(String type, Date time){
HotSearchListDAO hotSearchListDAO = new HotSearchListDAO();
//获取数据库最后一条数据判断该程序几分钟没有采集到数据
Date lastTime = hotSearchListDAO.getLastTimeByType(type);
if(time.getTime() - lastTime.getTime() > timeDifference){
//发送预警
String crawlerContent = String.format("%s已经连续%s分钟未采集到数据",type,(time.getTime() - lastTime.getTime())/1000/60);
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
null, null);
}
}
}
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils; import okhttp3.Request;
import org.slf4j.Logger; import okhttp3.Response;
import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import com.alibaba.fastjson.JSONArray; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.alibaba.fastjson.JSONArray;
import com.zhiwei.crawler.utils.RequestUtils; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import okhttp3.MediaType; import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.RequestBody;
import okhttp3.MediaType;
public class WechatCodeUtil { import okhttp3.RequestBody;
private static Logger logger = LoggerFactory.getLogger(WechatCodeUtil.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); public class WechatCodeUtil {
/** private static Logger logger = LoggerFactory.getLogger(WechatCodeUtil.class);
* @Title: getToken private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
* @author hero /**
* @Description: 获取token * @Title: getToken
* @param @return * @author hero
* 设定文件 * @Description: 获取token
* @return String 返回类型 * @param @return
*/ * 设定文件
private static String getToken() { * @return String 返回类型
String token = ""; */
String appId = "wx2f555218d66e5948"; private static String getToken() {
String jmAppId = AESUtils.encrypt("wechat", appId); String token = "";
String url = "http://yuqing.zhiweidata.com/WechatPublic/common/getToken?appId=" + jmAppId; String appId = "wx2f555218d66e5948";
Map<String, String> headerMap = HeaderTool.getCommonHead(); String jmAppId = AESUtils.encrypt("wechat", appId);
try { String url = "http://yuqing.zhiweidata.com/WechatPublic/common/getToken?appId=" + jmAppId;
String result = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); Map<String, String> headerMap = HeaderTool.getCommonHead();
if (result != null) { Request request = RequestUtils.wrapGet(url, headerMap);
JSONObject jsonObject = JSONObject.parseObject(result); String result = null;
if(jsonObject.containsKey("data")) { try(Response response = httpBoot.syncCall(request)) {
JSONObject inJson = JSONObject.parseObject(jsonObject.getString("data")); result = response.body().string();
token = inJson.getString("accessToken"); } catch (IOException e) {
} e.printStackTrace();
} logger.error("获取微信公众号推送token失败,问题为:::{}", e.fillInStackTrace());
} catch (IOException e) { return null;
e.printStackTrace(); }
logger.error("获取微信公众号推送token失败,问题为:::{}", e.fillInStackTrace()); if (result != null) {
return null; JSONObject jsonObject = JSONObject.parseObject(result);
} if (jsonObject.containsKey("data")) {
return token; JSONObject inJson = JSONObject.parseObject(jsonObject.getString("data"));
} token = inJson.getString("accessToken");
}
/** }
* @Title: sendDataJson return token;
* @author hero }
* @Description: t推送模版消息数据
* @param @param /**
* templateJson * @Title: sendDataJson
* @param @return * @author hero
* 设定文件 * @Description: t推送模版消息数据
* @return int 返回类型 * @param @param
*/ * templateJson
public static int sendDataJson(JSONObject templateJson) { * @param @return
int msgid = 0; * 设定文件
String url = WechatConstant.WECHAT_TEMPLET_SEND_URL.replace("ACCESS_TOKEN", getToken()); * @return int 返回类型
try { */
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), templateJson.toJSONString()); public static int sendDataJson(JSONObject templateJson) {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string(); int msgid = 0;
if(StringUtils.isNotBlank(htmlBody)) { String url = WechatConstant.WECHAT_TEMPLET_SEND_URL.replace("ACCESS_TOKEN", getToken());
JSONObject jsonObject = JSONObject.parseObject(htmlBody); RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), templateJson.toJSONString());
if (null != jsonObject) { Request request = RequestUtils.wrapPost(url,requestBody);
if ("ok".equals(jsonObject.getString("errmsg"))) { String htmlBody = null;
msgid = jsonObject.getIntValue("msgid"); try(Response response = httpBoot.syncCall(request)) {
}else { htmlBody = response.body().string();
msgid = 0; } catch (Exception e) {
logger.info("消息推送失败,错误为::{}",jsonObject.toString()); logger.error("消息推送失败,错误为::{}",e.fillInStackTrace());
} msgid = 0;
} }
} if (StringUtils.isNotBlank(htmlBody)) {
} catch (Exception e) { JSONObject jsonObject = JSONObject.parseObject(htmlBody);
logger.error("消息推送失败,错误为::{}",e.fillInStackTrace()); if (null != jsonObject) {
msgid = 0; if ("ok".equals(jsonObject.getString("errmsg"))) {
} msgid = jsonObject.getIntValue("msgid");
return msgid; } else {
} msgid = 0;
logger.info("消息推送失败,错误为::{}", jsonObject.toString());
/** }
* @Title: getUserList }
* @author hero }
* @Description: 根据用户分组名称拉取用户openid return msgid;
* @param @param }
* groupName
* @param @return /**
* 设定文件 * @Title: getUserList
* @return List<String> 返回类型 * @author hero
*/ * @Description: 根据用户分组名称拉取用户openid
@SuppressWarnings("unchecked") * @param @param
public static List<String> getUserListByGroupName(String groupName) { * groupName
try { * @param @return
String token = getToken(); * 设定文件
if(token!=null){ * @return List<String> 返回类型
String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token; */
JSONObject postData = new JSONObject(); @SuppressWarnings("unchecked")
postData.put("tagid", getGroupIp(groupName)); public static List<String> getUserListByGroupName(String groupName) {
postData.put("next_openid", ""); try {
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString()); String token = getToken();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string(); if(token!=null){
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) { String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token;
JSONObject jsonObject = JSONObject.parseObject(htmlBody); JSONObject postData = new JSONObject();
if (null != jsonObject) { postData.put("tagid", getGroupIp(groupName));
if(jsonObject.containsKey("data")) { postData.put("next_openid", "");
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class); RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
}else{ Request request = RequestUtils.wrapPost(url,requestBody);
logger.info("拉取用户列表时,出现问题{}", jsonObject); String htmlBody = null;
} try(Response response = httpBoot.syncCall(request)) {
} htmlBody = response.body().string();
} }catch (IOException e){
}else{ logger.error("页面连接获取失败",e);
logger.info("token 获取失败"); return null;
} }
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
} catch (Exception e) { JSONObject jsonObject = JSONObject.parseObject(htmlBody);
e.printStackTrace(); if (null != jsonObject) {
return null; if(jsonObject.containsKey("data")) {
} return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
return null; }else{
} logger.info("拉取用户列表时,出现问题{}", jsonObject);
}
}
}
public static List<String> getUserListByGroupId(Integer groupId) { }else{
try { logger.info("token 获取失败");
String token = getToken(); }
if(token!=null){ } catch (Exception e) {
String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token; e.printStackTrace();
JSONObject postData = new JSONObject(); return null;
postData.put("tagid", groupId); }
postData.put("next_openid", ""); return null;
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString()); }
String htmlBody = httpBoot.syncCall(RequestUtils.wrapPost(url,requestBody)).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody);
if (null != jsonObject) { public static List<String> getUserListByGroupId(Integer groupId) {
if(jsonObject.containsKey("data")) { try {
return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class); String token = getToken();
}else{ if(token!=null){
logger.info("拉取用户列表时,出现问题{}", jsonObject); String url = "https://api.weixin.qq.com/cgi-bin/user/tag/get?access_token="+token;
} JSONObject postData = new JSONObject();
} postData.put("tagid", groupId);
} postData.put("next_openid", "");
}else{ RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
logger.info("token 获取失败"); Request request = RequestUtils.wrapPost(url,requestBody);
} String htmlBody = null;
} catch (Exception e) { try(Response response = httpBoot.syncCall(request)){
e.printStackTrace(); htmlBody = response.body().string();
return null; }catch (IOException e){
} logger.error("页面链接获取失败",e);
return null; return null;
} }
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
/*** JSONObject jsonObject = JSONObject.parseObject(htmlBody);
* if (null != jsonObject) {
* @Title: getGroupIp if(jsonObject.containsKey("data")) {
* @author hero return (List<String>) jsonObject.getJSONObject("data").getObject("openid", List.class);
* @Description: 根据分组名称获取分组id }else{
* @param @param logger.info("拉取用户列表时,出现问题{}", jsonObject);
* groupName }
* @param @return }
* 设定文件 }
* @return Integer 返回类型 }else{
*/ logger.info("token 获取失败");
public static Integer getGroupIp(String groupName) { }
String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken(); } catch (Exception e) {
Integer groupId = null; e.printStackTrace();
Map<String, String> headerMap = HeaderTool.getCommonHead(); return null;
try { }
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); return null;
if (htmlBody != null) { }
if(htmlBody.contains("tags")) {
JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags"); /***
for (int i = 0; i < jsonArry.size(); i++) { *
JSONObject data = jsonArry.getJSONObject(i); * @Title: getGroupIp
Integer id = data.getInteger("id"); * @author hero
String name = data.getString("name"); * @Description: 根据分组名称获取分组id
if (name.equals(groupName)) { * @param @param
groupId = id; * groupName
break; * @param @return
} * 设定文件
} * @return Integer 返回类型
} */
} public static Integer getGroupIp(String groupName) {
} catch (IOException e) { String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken();
logger.error("获取分组id时出现错误",e.fillInStackTrace()); Integer groupId = null;
return null; Map<String, String> headerMap = HeaderTool.getCommonHead();
} Request request = RequestUtils.wrapGet(url, headerMap);
return groupId; String htmlBody = null;
} try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
/** } catch (IOException e) {
* 查询公众号下的所有分组 logger.error("获取分组id时出现错误",e.fillInStackTrace());
* @return return null;
*/ }
public static Map<String,Integer> getAllGroupIp() { if (htmlBody != null) {
String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken(); if (htmlBody.contains("tags")) {
Map<String,Integer> resultMap = new HashMap<String,Integer>(); JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags");
Map<String, String> headerMap = HeaderTool.getCommonHead(); for (int i = 0; i < jsonArry.size(); i++) {
try { JSONObject data = jsonArry.getJSONObject(i);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string(); Integer id = data.getInteger("id");
if (htmlBody != null) { String name = data.getString("name");
if(htmlBody.contains("tags")) { if (name.equals(groupName)) {
JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags"); groupId = id;
for (int i = 0; i < jsonArry.size(); i++) { break;
JSONObject data = jsonArry.getJSONObject(i); }
Integer id = data.getInteger("id"); }
String name = data.getString("name"); }
resultMap.put(name, id); }
} return groupId;
}else{ }
logger.info("获取分组id时出现错误,数据为:::{}", htmlBody);
} /**
} * 查询公众号下的所有分组
} catch (IOException e) { * @return
logger.error("获取分组id时出现错误",e.fillInStackTrace()); */
return null; public static Map<String,Integer> getAllGroupIp() {
} String url = "https://api.weixin.qq.com/cgi-bin/tags/get?access_token=" + getToken();
return resultMap; Map<String,Integer> resultMap = new HashMap<String,Integer>();
} Map<String, String> headerMap = HeaderTool.getCommonHead();
Request request = RequestUtils.wrapGet(url, headerMap);
} String htmlBody = null;
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
logger.error("获取分组id时出现错误",e.fillInStackTrace());
return null;
}
if (htmlBody != null) {
if (htmlBody.contains("tags")) {
JSONArray jsonArry = JSONObject.parseObject(htmlBody).getJSONArray("tags");
for (int i = 0; i < jsonArry.size(); i++) {
JSONObject data = jsonArry.getJSONObject(i);
Integer id = data.getInteger("id");
String name = data.getString("name");
resultMap.put(name, id);
}
} else {
logger.info("获取分组id时出现错误,数据为:::{}", htmlBody);
}
}
return resultMap;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment