Commit 60774fb9 by 马黎滨

Merge branch 'mlbWork' into 'master'

代理ip

See merge request !8
parents d00d9860 02d17aa6
......@@ -43,7 +43,8 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
......
......@@ -44,7 +44,8 @@ public class DouyinHotSearchCrawler {
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取抖音热搜榜时出现问题:{}", e);
......
......@@ -48,7 +48,8 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
......
......@@ -35,7 +35,8 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
......
......@@ -47,7 +47,8 @@ public class WeiboHotSearchCrawler {
for(int i =0; i<3; i++){
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (Exception e) {
if(i==2){
......
......@@ -64,7 +64,8 @@ public class WeiboSuperTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("获取榜单列表页面时出现错误,错误为:{}", e);
......@@ -141,7 +142,8 @@ public class WeiboSuperTopicCrawler {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析榜单详情页面时出现错误,错误为:{}", e);
......
......@@ -137,7 +137,8 @@ public class WeiboTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
// log.info("pageUrl::{}", pageUrl);
htmlBody = response.body().string();
} catch (Exception e) {
......
......@@ -41,7 +41,8 @@ public class ZhihuChildHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headerMap);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
......
......@@ -50,7 +50,8 @@ public class ZhihuHotSearchCrawler {
headerMap.put("Referer", rerferer);
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
......@@ -94,7 +95,8 @@ public class ZhihuHotSearchCrawler {
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
......
......@@ -35,7 +35,8 @@ public class ZhihuTopicSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int t = 0; t < 3 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment