Commit 3a0f95c0 by leiliangliang

升级核心包Http-boot

parent 672054e6
......@@ -6,7 +6,7 @@
<name>各平台热搜榜单采集程序</name>
<version>0.0.6-SNAPSHOT</version>
<description>各平台热搜榜单采集程序
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
目前包含:1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
<developers>
<developer>
<id>Bewilder</id>
......@@ -38,6 +38,15 @@
</filters>
<transformers>
<transformer>
<resource>META-INF/spring.handlers</resource>
</transformer>
<transformer>
<resource>META-INF/spring.schemas</resource>
</transformer>
<transformer>
<resource>META-INF/spring.tooling</resource>
</transformer>
<transformer>
<mainClass>com.zhiwei.searchhotcrawler.run.HotSearchRun</mainClass>
</transformer>
</transformers>
......@@ -73,32 +82,22 @@
</build>
<dependencies>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.7.2-RELEASE</version>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hamcrest-core</artifactId>
<groupId>org.hamcrest</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>5.3.6</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<spring.version>4.2.2.RELEASE</spring.version>
<log4j.version>2.15.0</log4j.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
</project>
......
......@@ -43,10 +43,16 @@
<artifactId>zhiwei-tools</artifactId>
<version>0.1.6-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.7.4-SNAPSHOT</version>
<!--<dependency>-->
<!--<groupId>com.zhiwei.crawler</groupId>-->
<!--<artifactId>crawler-core</artifactId>-->
<!--<version>0.6.7.4-SNAPSHOT</version>-->
<!--</dependency>-->
<!-- http知微核心包 -->
<dependency>
<groupId>com.zhiwei.http</groupId>
<artifactId>http-boot</artifactId>
<version>0.0.5.9-SNAPSHOT</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.conscrypt/conscrypt-openjdk-uber -->
<dependency>
......
......@@ -73,18 +73,17 @@ public class WeiboSuperTopic {
public WeiboSuperTopic() {}
public WeiboSuperTopic(String url, String topicName, Integer rank, String score,
public WeiboSuperTopic(String url, String topicName, Integer rank, String postNum,
String fensi, String type) {
this.url = url;
this.topicName = topicName;
this.rank = rank;
this.score = score;
this.postNum = postNum;
this.fensi = fensi;
this.type = type;
this.time = new Date();
this.day = TimeParse.dateFormartString(new Date(), "yyyy-MM-dd");
this.id = topicName + "_" + type + "_" + day;
this.id = topicName + "_" + type + "_" + time.getTime();
}
}
......@@ -4,18 +4,17 @@ import java.net.URLDecoder;
import java.time.Duration;
import java.util.*;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -28,7 +27,8 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2
public class BaiDuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
......@@ -40,10 +40,12 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析百度风云榜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("container-bg_lQ801")) {
return ansysNewData(htmlBody, date);
......
......@@ -2,17 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
......@@ -27,7 +26,8 @@ import java.util.*;
@Log4j2
public class BiliComprehensiveHotCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
......@@ -53,10 +53,12 @@ public class BiliComprehensiveHotCrawler {
for (int i = 0; i < urlList.size(); i++) {
Request request = RequestUtils.wrapGet(urlList.get(i));
//发送请求每次获取20条数据
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error(fmt.format(date)+":第"+i+1+"次请求解析B站综合热门时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error(fmt.format(date)+":第"+i+1+"次请求解析B站综合热门时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
try {
......
......@@ -2,16 +2,17 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -20,12 +21,13 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Log4j2
public class BililiCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* B站排行榜的采集
......@@ -41,10 +43,12 @@ public class BililiCrawler {
String url = "https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站排行榜页面连接失败",e.fillInStackTrace());
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("B站排行榜页面连接失败",cause.fillInStackTrace());
}else {
htmlBody = response.bodyString();
}
try {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
......@@ -129,8 +133,8 @@ public class BililiCrawler {
Request request = RequestUtils.wrapGet(url);
try {
System.setProperty("https.protocols", "TLSv1,TLSv1.1,TLSv1.2,SSLv3");
Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY);
String htmlBody = response.body().string();
Response response = httpBoot.syncCall(request, ProxySupplier.FOREIGN_INNER_PROXY);
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("v-wrap")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
......@@ -139,16 +143,20 @@ public class BililiCrawler {
hotSearchList.setTag(tag);
//获取粉丝数
if (htmlBody.contains("v_upinfo")) {
String text = document.select("div.follow-btn").select("span").text();
String fan = text.split(" ")[2];
Long fanCount =null;
if (fan.contains("万")){
double dou = Double.parseDouble(fan.replaceAll("万", " "));
fanCount =new Double(dou*10000).longValue();
}else {
fanCount =Long.valueOf(fan);
String text = document.select("div.follow-btn").select("span").last().text();
if (StringUtils.isNotEmpty(text)&& Objects.nonNull(text)) {
Long fanCount = null;
if (text.contains("关注")){
text =text.replaceAll("关注"," ").trim();
}
if (text.contains("万")) {
double dou = Double.parseDouble(text.replaceAll("万", " ").trim());
fanCount = new Double(dou * 10000).longValue();
} else {
fanCount = Long.valueOf(text);
}
hotSearchList.setFans(fanCount);
}
hotSearchList.setFans(fanCount);
}
return hotSearchList;
} else {
......@@ -173,10 +181,12 @@ public class BililiCrawler {
String url = "https://app.biliapi.com/x/v2/search/square?build=616050&limit=10";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("B站热搜页面连接失败",e.fillInStackTrace());
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("B站热搜页面连接失败",cause.fillInStackTrace());
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
dataJson = JSONObject.parseObject(htmlBody).getJSONArray("data").getJSONObject(0).getJSONObject("data").getJSONArray("list");
......
......@@ -5,17 +5,17 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -30,7 +30,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
@Log4j2
public class DouyinHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public static List<HotSearchList> list = new ArrayList<>();
......@@ -46,10 +46,12 @@ public class DouyinHotSearchCrawler {
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取抖音热搜榜时出现问题:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音热搜榜时出现问题:{}", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("word_list")) {
list = new ArrayList<>();
......@@ -87,10 +89,12 @@ public class DouyinHotSearchCrawler {
String resultUrl = null;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取抖音热搜榜链接时出现问题:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音热搜榜链接时出现问题:{}", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("aweme_list")){
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("aweme_list");
......
......@@ -2,15 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
......@@ -20,7 +21,7 @@ import java.util.List;
@Log4j2
public class FengHuangSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 获取凤凰新闻热榜
......@@ -33,10 +34,12 @@ public class FengHuangSearchCrawler {
String url = "https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="+page;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("凤凰新闻热榜页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("凤凰新闻热榜页面连接异常...", cause);
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list");
......@@ -71,10 +74,12 @@ public class FengHuangSearchCrawler {
String url = "https://shankapi.ifeng.com/autumn/sogouSearchHotword";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("凤凰新闻热搜页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("凤凰新闻热搜页面连接异常...", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("data").getJSONObject(0).getJSONArray("item");
......
package com.zhiwei.searchhotcrawler.crawler;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -26,7 +27,7 @@ import java.util.*;
@Log4j2
public class HotSearch36KrCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
......@@ -38,13 +39,13 @@ public class HotSearch36KrCrawler {
String url = "https://www.36kr.com/hot-list/catalog";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", e);
}
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("article-list")) {
return ansysData(htmlBody,date);
} else {
......
......@@ -2,14 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
......@@ -27,17 +29,19 @@ import java.util.*;
*/
@Log4j2
public class HuXiuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> HuXiuHotArticleRecommended(Date date){
String url = "https://www.huxiu.com/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("hot__list")) {
return ansysData(htmlBody,date);
......
package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.time.Duration;
import java.util.*;
......@@ -22,7 +24,7 @@ import java.util.*;
*/
@Log4j2
public class KuaiShouHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
......@@ -34,11 +36,12 @@ public class KuaiShouHotSearchCrawler {
String url = "https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析快手热榜时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析快手热榜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("APOLLO_STATE")) {
return ansysData(htmlBody,date);
......
......@@ -2,15 +2,17 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
......@@ -21,7 +23,7 @@ import java.util.List;
@Log4j2
public class MaiMaiHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 获取maimai热榜
......@@ -33,10 +35,12 @@ public class MaiMaiHotSearchCrawler {
String url = "https://open.taou.com/maimai/feed/v6/hot_posts_list?tab=profession&count=15&version=5.3.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("脉脉热榜页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("脉脉热榜页面连接异常...", cause);
}else {
htmlBody = response.bodyString();
}
//1024 - 26(时间戳+type) = 998 -> name.getBytes(StandardCharsets.UTF_8).length<998 -> 998/3 = 332
int nameLengthMax = 300;
......
......@@ -4,10 +4,13 @@ import java.util.*;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -15,9 +18,6 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
......@@ -31,7 +31,7 @@ import com.zhiwei.tools.httpclient.HeaderTool;
@Log4j2
public class SougoHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @Title: SougoHotSearchTest
......@@ -46,10 +46,12 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析搜狗微信时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("topwords")) {
try {
......@@ -108,10 +110,12 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析搜狗微信时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("data");
......
......@@ -2,19 +2,18 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
......@@ -22,7 +21,7 @@ import java.util.List;
@Log4j2
public class SouhuTopicCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public static List<HotSearchList> getSouhuTopic(Date date){
List<HotSearchList> hotSearchLists = new ArrayList<>();
......@@ -32,10 +31,12 @@ public class SouhuTopicCrawler {
String url = "https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("搜狐话题页面连接失败",e.fillInStackTrace());
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("搜狐话题页面连接失败",cause.fillInStackTrace());
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")){
JSONObject jsonObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
......
......@@ -2,15 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.MD5Util;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.time.Duration;
import java.util.*;
......@@ -23,7 +24,7 @@ import java.util.*;
*/
@Log4j2
public class TaoBaoHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> taoBaoHotSearch(Date date) {
Map<String, String> headerMap = new HashMap<>();
......@@ -34,8 +35,12 @@ public class TaoBaoHotSearchCrawler {
String urls = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request1 = RequestUtils.wrapGet(urls);
String token = null;
try (Response response = httpBoot.syncCall(request1, ProxyHolder.NAT_HEAVY_PROXY)) {
List<String> values = response.networkResponse().headers().values("Set-Cookie");
Response response = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
}else {
List<String> values = response.headers().values("Set-Cookie");
String tk = values.get(1);
String[] splitTk = tk.split(";");
String _m_h5_tk = splitTk[0];
......@@ -44,18 +49,18 @@ public class TaoBaoHotSearchCrawler {
String[] splitEnc = enc.split(";");
String _m_h5_tk_enc = splitEnc[0];
headerMap.put("cookie", _m_h5_tk + ";" + _m_h5_tk_enc);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
String signs = token + "&" + time + "&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = MD5Util.getMD5(signs).toLowerCase();
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
Response response1 = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response1.bodyString();
ht = !htmlBody.contains("非法请求");
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
if (htmlBody != null && htmlBody.contains("data")) {
return ansysData(htmlBody, date);
......
......@@ -2,15 +2,15 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.util.ArrayList;
......@@ -20,7 +20,7 @@ import java.util.List;
@Log4j2
public class TengXunCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 腾讯热榜数据采集
......@@ -35,10 +35,12 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("idlist")) {
JSONObject topSearch = JSONObject.parseObject(htmlBody);
......@@ -96,10 +98,12 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")){
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("data");
......
......@@ -3,9 +3,11 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
......@@ -14,7 +16,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -34,7 +36,7 @@ import java.util.*;
*/
@Log4j2
public class ToutiaoHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @Title: weiboHotSearchByPhoneTest
......@@ -47,10 +49,12 @@ public class ToutiaoHotSearchCrawler {
String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js";
Request jsRequest = RequestUtils.wrapGet(jsUrl);
String jsBody = null;
try(Response response = httpBoot.syncCall(jsRequest,ProxyHolder.NAT_HEAVY_PROXY)) {
jsBody = response.body().string();
} catch (IOException e) {
log.error("获取今日头条实时热搜头部信息标识失败",e);
Response response = httpBoot.syncCall(jsRequest, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取今日头条实时热搜头部信息标识失败",cause);
}else {
jsBody = response.bodyString();
}
if(jsBody != null && jsBody.contains("origin")){
String s = jsBody.substring(jsBody.indexOf("origin:")+"origin:".length());
......@@ -61,10 +65,12 @@ public class ToutiaoHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for(int count =0; count<=5; count++){
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e1) {
log.error("解析今日头条实时热搜时出现连接失败",e1);
Response response1 = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("解析今日头条实时热搜时出现连接失败",cause);
}else {
htmlBody = response1.bodyString();
}
List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
......@@ -159,10 +165,12 @@ public class ToutiaoHotSearchCrawler {
String url = hotSearchList.getUrl();
Request request = RequestUtils.wrapGet(url);
for (int i = 0; i <= 5; i++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e1) {
log.error("解析今日头条热搜详情页面出现连接失败", e1);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析今日头条热搜详情页面出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody)) {
Document document = Jsoup.parse(htmlBody);
......
......@@ -2,16 +2,18 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -26,7 +28,7 @@ import java.util.List;
*/
@Log4j2
public class WangYiHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 网易新闻实时热榜的采集
......@@ -39,10 +41,12 @@ public class WangYiHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
for(int t=0 ;t<3; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("网易新闻实时热榜页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("网易新闻实时热榜页面连接异常...",cause);
}else {
htmlBody = response.bodyString();
}
if(htmlBody!=null && htmlBody.contains("data")) {
JSONObject bodyObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
......@@ -80,10 +84,12 @@ public class WangYiHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
for(int t=0 ;t<3; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("网易新闻跟贴热议页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("网易新闻跟贴热议页面连接异常...",cause);
}else {
htmlBody = response.bodyString();
}
if(htmlBody!=null && htmlBody.contains("data")) {
JSONObject bodyObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
......
......@@ -2,14 +2,15 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords;
import com.zhiwei.searchhotcrawler.dao.WeiBoSearchBoxHotWordsDao;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.time.Duration;
import java.util.ArrayList;
......@@ -26,7 +27,7 @@ import java.util.Objects;
*/
@Log4j2
public class WeiBoSearchBoxHotWordsCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
static WeiBoSearchBoxHotWordsDao weiBoSearchDao = new WeiBoSearchBoxHotWordsDao();
public static void weiBoSearchBoxHotWords(Date date){
......@@ -35,11 +36,12 @@ public class WeiBoSearchBoxHotWordsCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析微博搜索框热词时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博搜索框热词时出现解析错误,页面结构有问题",cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("hotwords")) {
int num = ansysData(htmlBody, date);
......
......@@ -2,17 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.*;
/**
......@@ -24,7 +23,7 @@ import java.util.*;
@Log4j2
public class WeiboEntertainmentCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @return void 返回类型
......@@ -38,10 +37,12 @@ public class WeiboEntertainmentCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博娱乐榜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博娱乐榜时出现连接失败",cause);
}else {
htmlBody = response.bodyString();
}
List<HotSearchList> result = new ArrayList();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
......
......@@ -9,6 +9,10 @@ import java.util.*;
import java.util.stream.Collectors;
import com.alibaba.fastjson.JSON;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.*;
import com.zhiwei.searchhotcrawler.config.RedisConfig;
import com.zhiwei.searchhotcrawler.dao.RedisDao;
......@@ -17,7 +21,6 @@ import com.zhiwei.searchhotcrawler.dao.WeiBoUserDao;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.jsoup.Jsoup;
......@@ -26,9 +29,6 @@ import org.jsoup.select.Elements;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
import com.zhiwei.tools.tools.URLCodeUtil;
......@@ -45,7 +45,7 @@ import static java.util.Objects.nonNull;
@Log4j2
public class WeiboHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private static RedisDao redisDao = new RedisDao();
......@@ -66,14 +66,15 @@ public class WeiboHotSearchCrawler {
for (int i = 0; i < 3; i++) {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
if (i == 2) {
return list;
} else {
continue;
}
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("pl_top_realtimehot")) {
try {
......@@ -261,10 +262,12 @@ public class WeiboHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博时热搜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败",cause);
}else {
htmlBody = response.bodyString();
}
List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
......@@ -349,10 +352,12 @@ public class WeiboHotSearchCrawler {
String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜时出现连接失败",cause);
}else {
htmlBody = response.bodyString();
}
List<HotSearchList> result = new ArrayList<>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
......@@ -405,10 +410,12 @@ public class WeiboHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜详情页面时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败",cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("cardlistInfo");
......@@ -500,10 +507,12 @@ public class WeiboHotSearchCrawler {
Map<String, String> headerMap = new HashMap<>();
headerMap.put("Cookie", "SUB=_2AkMWEQNHf8NxqwFRmPwdzmrnaYl_zgzEieKgTfKcJRMxHRl-yT9jqmkjtRB6PZEtqE0muNq5OZJPytvesIwD-Kh1dwIz; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFKfwIoPvvYaew277IR3CUN");
Request request = RequestUtils.wrapGet(url,headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博时热搜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败",cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("m-main")) {
Document docm = new Document();
......
......@@ -2,14 +2,16 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
......@@ -25,7 +27,7 @@ import java.util.*;
@Log4j2
public class WeiboNewsCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
......@@ -46,11 +48,13 @@ public class WeiboNewsCrawler {
for (int count = 0; count <= 5; count++) {
List<HotSearchList> result = new ArrayList();
//发送第一次请求获取前20条数据
try (Response response = httpBoot.syncCall(request1, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("第一次请求解析微博要闻榜时出现连接失败", e);
Response response = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("第一次请求解析微博要闻榜时出现连接失败", cause);
continue;
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try {
......@@ -66,11 +70,13 @@ public class WeiboNewsCrawler {
continue;
}
//发送第二次请求获取中间20条数据
try (Response response = httpBoot.syncCall(request2, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("第二次请求解析微博要闻榜时出现连接失败", e);
Response response1 = httpBoot.syncCall(request2, ProxySupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("第二次请求解析微博要闻榜时出现连接失败",cause);
continue;
}else {
htmlBody = response1.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try {
......@@ -87,11 +93,13 @@ public class WeiboNewsCrawler {
continue;
}
//发送第三次请求获取最后10条数据
try (Response response = httpBoot.syncCall(request3, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("第三次请求解析微博要闻榜时出现连接失败", e);
Response response2 = httpBoot.syncCall(request3, ProxySupplier.NAT_HEAVY_PROXY);
if (response2.hasCause()){
Throwable cause = response2.cause();
log.error("第三次请求解析微博要闻榜时出现连接失败",cause);
continue;
}else {
htmlBody = response2.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try {
......
......@@ -8,18 +8,19 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
/**
*
......@@ -31,7 +32,7 @@ import com.zhiwei.crawler.core.utils.RequestUtils;
@Log4j2
public class WeiboSuperTopicCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private static Map<String,String> headMap = new HashMap<>();
static {
......@@ -63,13 +64,15 @@ public class WeiboSuperTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("获取榜单列表页面时出现错误,错误为:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取榜单列表页面时出现错误,错误为:{}",cause);
continue;
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc1")) {
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
topicList.addAll(parseTopicRankHtml(page, htmlBody, type));
break;
} else {
......@@ -99,21 +102,18 @@ public class WeiboSuperTopicCrawler {
Integer toprank = null;
String topicName = null;
String id = null;
String score = null;
String desc1 = null;
String postNum = null;
String fensi = null;
String url = null;
for(int i=0;i<list.size();i++) {
JSONObject data = list.getJSONObject(i);
toprank = page + data.getInteger("toprank");
toprank = ++page;
topicName = data.getString("display_name");
id = data.getString("page_id");
score = data.getString("score");
desc1 = data.getString("desc1");
fensi = desc1.replaceAll(".*影响力|粉丝", "").trim();
postNum = data.getString("status_count");
fensi = data.getString("fans_count");
url = data.getString("link");
WeiboSuperTopic topic = new WeiboSuperTopic(url, topicName, toprank, score, fensi, type);
WeiboSuperTopic topic = new WeiboSuperTopic(url, topicName, toprank, postNum, fensi, type);
topic = getTopicInfo(id, topic);
topicList.add(topic);
}
......@@ -140,17 +140,19 @@ public class WeiboSuperTopicCrawler {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析榜单详情页面时出现错误,错误为:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析榜单详情页面时出现错误,错误为:{}",cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("desc_more")) {
String descMore = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("pageInfo").getJSONArray("desc_more").getString(0);
if (StringUtils.isNotBlank(descMore)) {
String readNum = descMore.replaceAll("阅读|帖子.*", "").trim();
String postNum = descMore.replaceAll(".*帖子|粉丝.*", "").trim();
topic.setPostNum(postNum);
//String postNum = descMore.replaceAll(".*帖子|粉丝.*", "").trim();
//topic.setPostNum(postNum);
topic.setReadNum(readNum);
return topic;
}
......
......@@ -3,9 +3,10 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
......@@ -13,7 +14,6 @@ import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
......@@ -31,7 +31,7 @@ import java.util.regex.Pattern;
*/
@Log4j2
public class WeiboTopicCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
private static Map<String,String> headMap = new HashMap<>();
static {
......@@ -137,11 +137,13 @@ public class WeiboTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("下载榜单列表页面时出现错误,错误为:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("下载榜单列表页面时出现错误,错误为:{}", cause);
continue;
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody)) {
topicList.addAll(parseTopicHtml(htmlBody,date));
......
......@@ -2,9 +2,10 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -12,7 +13,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -24,7 +25,7 @@ import java.util.*;
@Log4j2
public class XinLangHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 新浪热榜的采集
......@@ -38,10 +39,12 @@ public class XinLangHotSearchCrawler {
String htmlBody = null;
JSONObject jsonObject = null;
for(int t=0 ;t<3&&jsonObject==null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("新浪热榜页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("新浪热榜页面连接异常...",cause);
}else {
htmlBody = response.bodyString();
}
if(htmlBody!=null) {
Document document = Jsoup.parse(htmlBody);
......@@ -111,10 +114,12 @@ public class XinLangHotSearchCrawler {
String htmlBody = null;
JSONArray dataJson = null;
for(int t=0 ;t<3&&dataJson==null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("新浪热点页面连接异常...", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("新浪热点页面连接异常...",cause);
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody).getJSONObject("data");
......
......@@ -4,16 +4,18 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.util.*;
......@@ -21,7 +23,7 @@ import java.util.*;
@Log4j2
public class ZhihuChildHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 知乎子级分类数据采集
......@@ -39,10 +41,11 @@ public class ZhihuChildHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headerMap);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject topSearch = JSONObject.parseObject(htmlBody);
......
......@@ -3,17 +3,18 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.*;
import com.zhiwei.crawler.core.config.SslProvider;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import io.netty.handler.ssl.SslProvider;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
......@@ -34,7 +35,8 @@ import static java.util.Objects.nonNull;
@Log4j2
public class ZhihuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().sslProvider(SslProvider.CONSCRYPT).retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
//private static HttpBoot httpBoot = HttpBoot.newBuilder().sslProvider(SslProvider.CONSCRYPT).retryTimes(3).build();
/**
* @Title: getZhihuHotList
* @author hero
......@@ -98,11 +100,13 @@ public class ZhihuHotSearchCrawler {
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取知乎热搜时出现问题:{}",cause);
return list;
}else {
htmlBody = response.bodyString();
}
try {
if (htmlBody != null && htmlBody.contains("author")) {
......@@ -160,17 +164,22 @@ public class ZhihuHotSearchCrawler {
Map<String,String> Map = HeaderTool.getCommonHead();
Map.put("cookie", "_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4");
Request request = RequestUtils.wrapGet(url,Map);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
String htmlBody = response.body().string();
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("单条知乎热搜数据页面连接失败",cause);
return doc;
}else {
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("QuestionHeader")) {
Document document = Jsoup.parse(htmlBody);
//获取标签
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
String label="";
Elements select = document.select("div.Tag");
for (Element element : select) {
String text = "`"+element.select("div.Popover").text()+";";
label=label+text;
}
doc.put("tag",label.trim());
String strong = document.select("div.NumberBoard-itemInner").select("strong").text();
String[] count = strong.split(" ");
......@@ -182,9 +191,6 @@ public class ZhihuHotSearchCrawler {
}else {
return doc;
}
} catch (Exception e) {
log.error("单条知乎热搜数据页面连接失败",e);
return doc;
}
}
......
......@@ -2,29 +2,21 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.w3c.dom.Element;
import java.io.IOException;
import java.util.*;
@Log4j2
public class ZhihuTopicSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
public static List<HotSearchList> getZhihuTopicSearch(Date date){
List<HotSearchList> list = new ArrayList<>();
......@@ -33,10 +25,12 @@ public class ZhihuTopicSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int t = 0; t < 3 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("知乎热搜页面连接异常",cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody);
......
package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.*;
......
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -26,7 +27,8 @@ import java.util.*;
@Log4j2
public class HotSearch36KrCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
......@@ -38,12 +40,13 @@ public class HotSearch36KrCrawlerTest {
String url = "https://www.36kr.com/hot-list/catalog";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", e);
}
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("article-list")) {
return ansysData(htmlBody,date);
......
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
......
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
......@@ -31,17 +31,20 @@ import java.util.*;
*/
@Log4j2
public class HuXiuHotSearchCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> HuXiuHotArticleRecommended(Date date){
String url = "https://www.huxiu.com/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("hot__list")) {
return ansysData(htmlBody,date);
......
......@@ -4,10 +4,12 @@ package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -17,7 +19,7 @@ import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBLocalTemplate;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.context.ApplicationContext;
......@@ -38,7 +40,7 @@ public class Job51Test {
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
List<HotSearchList> list = new ArrayList<>();
......@@ -62,11 +64,12 @@ public class Job51Test {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,header);
for (int t = 0; t < 1 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("知乎热搜页面连接异常", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody);
......
......@@ -3,14 +3,15 @@ package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.time.Duration;
import java.util.*;
......@@ -23,7 +24,8 @@ import java.util.*;
*/
@Log4j2
public class KuaiShouHotSearchCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @return void 返回类型
......@@ -35,11 +37,12 @@ public class KuaiShouHotSearchCrawlerTest {
String url = "https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析快手热榜时出现解析错误,页面结构有问题", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析快手热榜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("APOLLO_STATE")) {
return ansysData(htmlBody,date);
......
......@@ -2,15 +2,17 @@ package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.MD5Util;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
;
import java.time.Duration;
import java.util.*;
......@@ -23,7 +25,8 @@ import java.util.*;
*/
@Log4j2
public class TaoBaoHotSearchCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> taoBaoHotSearch(Date date) {
Map<String, String> headerMap = new HashMap<>();
......@@ -34,8 +37,12 @@ public class TaoBaoHotSearchCrawlerTest {
String urls = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request1 = RequestUtils.wrapGet(urls);
String token = null;
try (Response response = httpBoot.syncCall(request1, ProxyHolder.NAT_HEAVY_PROXY)) {
List<String> values = response.networkResponse().headers().values("Set-Cookie");
Response response1 = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
}else {
List<String> values = response1.headers().values("Set-Cookie");
String tk = values.get(1);
String[] splitTk = tk.split(";");
String _m_h5_tk = splitTk[0];
......@@ -44,18 +51,18 @@ public class TaoBaoHotSearchCrawlerTest {
String[] splitEnc = enc.split(";");
String _m_h5_tk_enc = splitEnc[0];
headerMap.put("cookie", _m_h5_tk + ";" + _m_h5_tk_enc);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
String signs = token + "&" + time + "&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = MD5Util.getMD5(signs).toLowerCase();
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
}else {
htmlBody = response.bodyString();
ht = !htmlBody.contains("非法请求");
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
if (htmlBody != null && htmlBody.contains("data")) {
return ansysData(htmlBody, date);
......
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboSuperTopicRun;
import com.zhiwei.searchhotcrawler.timer.WeiboTopicRun;
import java.text.ParseException;
......@@ -18,8 +20,10 @@ public class TaoBaoRunTest {
//微博热搜开始采集
// new WeiboHotSearchRun().start();
//快手热榜开始采集
// new KuaiShouHotSearchRun().start();
//new KuaiShouHotSearchRun().start();
//百度热搜
// new TaoBaoHotSearchRun().run();
//new TaoBaoHotSearchRun().run();
//超话测试
//new WeiboSuperTopicRun().run();
}
}
......@@ -2,14 +2,16 @@ package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.util.*;
......@@ -24,7 +26,8 @@ import java.util.*;
@Log4j2
public class WeiboEntertainmentCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
......@@ -38,10 +41,12 @@ public class WeiboEntertainmentCrawlerTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博娱乐榜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博娱乐榜时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
List<HotSearchList> result = new ArrayList();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
......
......@@ -43,7 +43,7 @@ public class WeiboSuperTopicRun extends Thread{
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("read_Num", topic.getReadNum());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
......@@ -53,7 +53,7 @@ public class WeiboSuperTopicRun extends Thread{
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
log.info("微博话题采集结束........");
log.info("微博超话采集结束........");
}
}
......@@ -309,7 +309,7 @@ public class GatherTimer {
* 腾讯较真辟谣榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
//@Scheduled(cron = "10 * * * * ? ")
public void crawlerTengXunVerificationHotSearch(){
log.info("{},腾讯较真辟谣榜开始采集", new Date());
Date date = DateUtils.getMillSecondTime(new Date());
......@@ -371,7 +371,7 @@ public class GatherTimer {
* 知乎热搜国际分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
//@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuFocus(){
this.crawlerZhiHuChild(FOCUS);
}
......@@ -380,7 +380,7 @@ public class GatherTimer {
* 知乎热搜时事分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
//@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuDepth(){
this.crawlerZhiHuChild(DEPTH);
}
......@@ -442,7 +442,7 @@ public class GatherTimer {
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("read_Num", topic.getReadNum());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
......@@ -452,7 +452,7 @@ public class GatherTimer {
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
log.info("微博话题采集结束........");
log.info("微博超话采集结束........");
}
......
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.util.RequestUtils;
import okhttp3.MediaType;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
......@@ -24,7 +24,8 @@ public final class HttpClientUtils {
private static final String QUERY_PARAM_SEP = "&";
private static final String URL_QUERY_PARAM_SEPARATOR = "?";
private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
//private static final HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(2).build();
private static final HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(2).build();
public static String sendPost(String url, String jsonParam){
return sendPost(url, jsonParam, null, Charset.forName("UTF-8"));
}
......@@ -39,12 +40,13 @@ public final class HttpClientUtils {
String result = null;
Request request= RequestUtils.wrapPost(url, headers, RequestBody.create(MediaType.get("application/json"),
jsonParam));
try(Response response = httpBoot.syncCall(request)) {
result = response.body().string();
}catch (IOException e) {
LOGGER.error("http connection error :" + e.getMessage(), e);
}
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
LOGGER.error("http connection error :" + cause.getMessage(), cause);
}else {
result = response.bodyString();
}
return result;
}
}
......@@ -4,18 +4,15 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.util.RequestUtils;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.MediaType;
......@@ -23,7 +20,8 @@ import okhttp3.RequestBody;
public class WechatCodeUtil {
private static Logger log = LogManager.getLogger(WechatCodeUtil.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
//private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* @Title: getToken
* @author hero
......@@ -40,12 +38,13 @@ public class WechatCodeUtil {
Map<String, String> headerMap = HeaderTool.getCommonHead();
Request request = RequestUtils.wrapGet(url, headerMap);
String result = null;
try(Response response = httpBoot.syncCall(request)) {
result = response.body().string();
} catch (IOException e) {
e.printStackTrace();
log.error("获取微信公众号推送token失败,问题为:::{}", e.fillInStackTrace());
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取微信公众号推送token失败,问题为:::{}", cause.fillInStackTrace());
return null;
}else {
result = response.bodyString();
}
if (result != null) {
JSONObject jsonObject = JSONObject.parseObject(result);
......@@ -73,11 +72,13 @@ public class WechatCodeUtil {
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), templateJson.toJSONString());
Request request = RequestUtils.wrapPost(url,requestBody);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("消息推送失败,错误为::{}",e.fillInStackTrace());
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("消息推送失败,错误为::{}",cause.fillInStackTrace());
msgid = 0;
}else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody)) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody);
......@@ -115,11 +116,13 @@ public class WechatCodeUtil {
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
Request request = RequestUtils.wrapPost(url,requestBody);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
}catch (IOException e){
log.error("页面连接获取失败",e);
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("页面连接获取失败",cause);
return null;
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody);
......@@ -154,11 +157,13 @@ public class WechatCodeUtil {
RequestBody requestBody = RequestBody.create(MediaType.get("application/json"), postData.toJSONString());
Request request = RequestUtils.wrapPost(url,requestBody);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request)){
htmlBody = response.body().string();
}catch (IOException e){
log.error("页面链接获取失败",e);
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("页面链接获取失败",cause);
return null;
}else {
htmlBody = response.bodyString();
}
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("data")) {
JSONObject jsonObject = JSONObject.parseObject(htmlBody);
......@@ -197,11 +202,13 @@ public class WechatCodeUtil {
Map<String, String> headerMap = HeaderTool.getCommonHead();
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("获取分组id时出现错误",e.fillInStackTrace());
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取分组id时出现错误",cause.fillInStackTrace());
return null;
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null) {
if (htmlBody.contains("tags")) {
......@@ -230,11 +237,13 @@ public class WechatCodeUtil {
Map<String, String> headerMap = HeaderTool.getCommonHead();
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("获取分组id时出现错误",e.fillInStackTrace());
Response response = httpBoot.syncCall(request);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取分组id时出现错误",cause.fillInStackTrace());
return null;
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null) {
if (htmlBody.contains("tags")) {
......
......@@ -2,10 +2,12 @@ package hotSaerchTest;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.client.MongoCollection;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.config.DBConfig;
......@@ -14,11 +16,11 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.searchhotcrawler.test.KuaiShouHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.test.TaoBaoHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import com.zhiwei.searchhotcrawler.util.QYWechatUtil;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.bson.Document;
import org.junit.Test;
import org.junit.runner.RunWith;
......@@ -29,7 +31,6 @@ import java.io.IOException;
import java.util.Date;
import java.util.List;
import static com.ibm.icu.util.LocalePriorityList.add;
import static java.util.Objects.nonNull;
/**
......@@ -42,7 +43,7 @@ import static java.util.Objects.nonNull;
{"classpath:applicationContext.xml"})
public class HotSearchTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 测试快手热榜采集
......@@ -71,10 +72,12 @@ public class HotSearchTest {
String url = "https://m.weibo.cn/api/container/getIndex?containerid=231522type%3D1%26q%3D%23可口可乐回应C罗拒绝与可乐同框%23";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜详情页面时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("cardlistInfo");
......@@ -167,8 +170,18 @@ public class HotSearchTest {
long time = new Date().getTime();
String signs="undefined&1625624820156&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
// https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624930984092&sign=acf994dbcee6c0c1d7a8a566a6b8ff0a&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D
String s = TaoBaoUtils.parsJSFunction(signs);
System.out.println(s);
// String s = TaoBaoUtils.parsJSFunction(signs);
// System.out.println(s);
}
private static String key = "a8e26ce3-8aaa-4d3e-bcf6-30b81526050b";
/**
* 测试预警发送
*/
@Test
public void testWarn(){
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, "你好",
null, null);
}
}
......
......@@ -3,10 +3,12 @@ package weiboTest;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -19,7 +21,6 @@ import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -50,7 +51,7 @@ public class WeiboHotSearchTest {
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//调用weiBoUserDao添加数据
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
@Test
public void test() {
......@@ -122,10 +123,12 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 2; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜详情页面时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject dataJson = JSONObject.parseObject(htmlBody).getJSONObject("data");
......@@ -213,10 +216,12 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 2; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜详情页面时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject dataJson = JSONObject.parseObject(htmlBody).getJSONObject("data");
......@@ -527,10 +532,12 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for (int count = 0; count <= 5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博时热搜时出现连接失败", e);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败", cause);
}else {
htmlBody = response.bodyString();
}
List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
......
package weiboTest;
import com.alibaba.fastjson.JSONArray;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.util.AESUtils;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.junit.Test;
......@@ -19,6 +21,7 @@ import javax.crypto.spec.SecretKeySpec;
import java.beans.Encoder;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.Charset;
......@@ -33,7 +36,7 @@ import java.util.Map;
*/
public class WeiboTopInfoTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
/**
* 加密测试
......@@ -106,10 +109,11 @@ public class WeiboTopInfoTest {
System.out.println(url);
Request request = RequestUtils.wrapGet(url,getHeaderMap());
//测试使用空代理
try (Response response = httpBoot.syncCall(request, ProxyHolder.NONE_PROXY)) {
System.out.println(response.body().string());
} catch (IOException e) {
e.printStackTrace();
Response response = httpBoot.syncCall(request, ProxySupplier.NONE_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
System.out.println(response.bodyString());
}
}
......@@ -135,8 +139,11 @@ public class WeiboTopInfoTest {
System.out.println(url);
Request request = RequestUtils.wrapGet(url,getHeaderMap());
//测试使用空代理
try (Response response = httpBoot.syncCall(request, ProxyHolder.NONE_PROXY)) {
String result = response.body().string();
Response response = httpBoot.syncCall(request, ProxySupplier.NONE_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
String result = response.bodyString();
//结果解密
String decodeResult = decodeStr(key,result);
System.out.println(decodeResult);
......@@ -144,11 +151,7 @@ public class WeiboTopInfoTest {
JSONArray jsonArray = JSONArray.parseArray(decodeResult);
for (Object o : jsonArray) {
System.out.println(o);
}
} catch (IOException e) {
e.printStackTrace();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment