Commit d5a20472 by 马黎滨

Merge branch 'mlbWork' into 'master'

代理ip恢复

See merge request !9
parents 60774fb9 07e166b9
......@@ -43,8 +43,7 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
......
......@@ -44,8 +44,7 @@ public class DouyinHotSearchCrawler {
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取抖音热搜榜时出现问题:{}", e);
......
......@@ -48,8 +48,7 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("解析搜狗微信时出现解析错误,页面结构有问题", e);
......
......@@ -35,8 +35,7 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
......
......@@ -47,8 +47,7 @@ public class WeiboHotSearchCrawler {
for(int i =0; i<3; i++){
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
if(i==2){
......
......@@ -64,8 +64,7 @@ public class WeiboSuperTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
//,ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (Exception e) {
log.error("获取榜单列表页面时出现错误,错误为:{}", e);
......@@ -142,8 +141,7 @@ public class WeiboSuperTopicCrawler {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析榜单详情页面时出现错误,错误为:{}", e);
......
......@@ -137,8 +137,7 @@ public class WeiboTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
// log.info("pageUrl::{}", pageUrl);
htmlBody = response.body().string();
} catch (Exception e) {
......
......@@ -41,8 +41,7 @@ public class ZhihuChildHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headerMap);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
e.printStackTrace();
......
......@@ -50,8 +50,7 @@ public class ZhihuHotSearchCrawler {
headerMap.put("Referer", rerferer);
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
}catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
......@@ -95,8 +94,7 @@ public class ZhihuHotSearchCrawler {
headerMap.put("authorization", "oauth c3cef7c66a1843f8b3a9e6a1e3160e20");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
//, ProxyHolder.NAT_HEAVY_PROXY
try(Response response = httpBoot.syncCall(request)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.debug("获取知乎热搜时出现问题:{}", e);
......
......@@ -35,8 +35,7 @@ public class ZhihuTopicSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int t = 0; t < 3 && jsonObject == null; t++) {
//, ProxyHolder.NAT_HEAVY_PROXY
try (Response response = httpBoot.syncCall(request)) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
......
......@@ -24,9 +24,9 @@ public class TipsUtils {
Date lastTime = hotSearchListDAO.getLastTimeByType(type);
if(time.getTime() - lastTime.getTime() > timeDifference){
//发送预警
String crawlerContent = String.format("%s已经连续%s分钟未采集到数据",type,(time.getTime() - lastTime.getTime())/1000/60);
QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
null, null);
String crawlerContent = String.format("%s已经采集数据异常",type);
// QYWechatUtil.send(key, QYWechatUtil.MSGTYPE_TEXT, crawlerContent,
// null, null);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment