Commit f4b4b293 by zhiwei

升级采集核心包版本并默认代理使用晋豪提供得NAT代理

parent f0484148
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -68,8 +69,8 @@ public class BaiduNewsCrawlerParse { ...@@ -68,8 +69,8 @@ public class BaiduNewsCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime != null ){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -201,8 +202,13 @@ public class BaiduNewsCrawlerParse { ...@@ -201,8 +202,13 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap)); Response response = null;
return response.body().string(); if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){ if(i==3){
...@@ -247,7 +253,12 @@ public class BaiduNewsCrawlerParse { ...@@ -247,7 +253,12 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -271,7 +282,12 @@ public class BaiduNewsCrawlerParse { ...@@ -271,7 +282,12 @@ public class BaiduNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.TiebaData; import com.zhiwei.media_data_crawler.entity.TiebaData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -57,8 +58,8 @@ public class BaiduTiebaCrawlerParse { ...@@ -57,8 +58,8 @@ public class BaiduTiebaCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -217,8 +218,13 @@ public class BaiduTiebaCrawlerParse { ...@@ -217,8 +218,13 @@ public class BaiduTiebaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
return response.body().toString(); if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){ if(i==3){
...@@ -255,8 +261,12 @@ public class BaiduTiebaCrawlerParse { ...@@ -255,8 +261,12 @@ public class BaiduTiebaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
return response.body().string(); if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){ if(i==3){
......
...@@ -3,6 +3,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -3,6 +3,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.DouBanData; import com.zhiwei.media_data_crawler.entity.DouBanData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -94,8 +95,12 @@ public class DoubanCrawlerParse { ...@@ -94,8 +95,12 @@ public class DoubanCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
return response.body().string(); if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){ if(i==3){
...@@ -116,8 +121,12 @@ public class DoubanCrawlerParse { ...@@ -116,8 +121,12 @@ public class DoubanCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
return response.body().toString(); if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){ if(i==3){
...@@ -193,8 +202,8 @@ public class DoubanCrawlerParse { ...@@ -193,8 +202,8 @@ public class DoubanCrawlerParse {
String content = document.select("div.topic-doc").select("div#link-report").select("div.topic-content").text(); String content = document.select("div.topic-doc").select("div#link-report").select("div.topic-content").text();
douban.setContent(content); douban.setContent(content);
} }
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
return douban; return douban;
} catch (Exception e) { } catch (Exception e) {
...@@ -273,8 +282,8 @@ public class DoubanCrawlerParse { ...@@ -273,8 +282,8 @@ public class DoubanCrawlerParse {
douban.setContent(content); douban.setContent(content);
} }
} }
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
return douban; return douban;
} catch (Exception e) { } catch (Exception e) {
......
...@@ -3,6 +3,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -3,6 +3,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -62,8 +63,8 @@ public class SoCrawlerParse { ...@@ -62,8 +63,8 @@ public class SoCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -104,7 +105,12 @@ public class SoCrawlerParse { ...@@ -104,7 +105,12 @@ public class SoCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -59,8 +60,8 @@ public class SoNewsCrawlerParse { ...@@ -59,8 +60,8 @@ public class SoNewsCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -108,8 +109,8 @@ public class SoNewsCrawlerParse { ...@@ -108,8 +109,8 @@ public class SoNewsCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -136,7 +137,12 @@ public class SoNewsCrawlerParse { ...@@ -136,7 +137,12 @@ public class SoNewsCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取360新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -9,6 +10,8 @@ import com.zhiwei.tools.timeparse.TimeParse; ...@@ -9,6 +10,8 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
...@@ -63,8 +66,8 @@ public class SougouNewsCrawlerParse { ...@@ -63,8 +66,8 @@ public class SougouNewsCrawlerParse {
} }
page++; page++;
logger.info("采集到 {} 页 采集的数据量为 {}", page, list.size()); logger.info("采集到 {} 页 采集的数据量为 {}", page, list.size());
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -74,7 +77,7 @@ public class SougouNewsCrawlerParse { ...@@ -74,7 +77,7 @@ public class SougouNewsCrawlerParse {
public static Map<String,Object> getSougouNewsData(String word, Proxy proxy, int page) throws Exception{ public static Map<String,Object> getSougouNewsData(String word, Proxy proxy, int page) throws Exception{
String htmlBody = downloadHtml(word, 1, proxy, page); String htmlBody = downloadHtml(word, 1, proxy, page);
if(htmlBody != null && !htmlBody.equals("")){ if(StringUtils.isBlank(htmlBody)){
return analysisData(htmlBody, proxy, word, "normal"); return analysisData(htmlBody, proxy, word, "normal");
} }
return null; return null;
...@@ -101,8 +104,8 @@ public class SougouNewsCrawlerParse { ...@@ -101,8 +104,8 @@ public class SougouNewsCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
return list; return list;
...@@ -130,7 +133,12 @@ public class SougouNewsCrawlerParse { ...@@ -130,7 +133,12 @@ public class SougouNewsCrawlerParse {
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -154,7 +162,12 @@ public class SougouNewsCrawlerParse { ...@@ -154,7 +162,12 @@ public class SougouNewsCrawlerParse {
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData; import com.zhiwei.media_data_crawler.entity.ZhiHuData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -9,6 +10,8 @@ import com.zhiwei.tools.timeparse.TimeParse; ...@@ -9,6 +10,8 @@ import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
...@@ -59,8 +62,8 @@ public class SougouZhihuCrawlerParse{ ...@@ -59,8 +62,8 @@ public class SougouZhihuCrawlerParse{
}else{ }else{
more = false; more = false;
} }
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
page++; page++;
} }
...@@ -70,7 +73,7 @@ public class SougouZhihuCrawlerParse{ ...@@ -70,7 +73,7 @@ public class SougouZhihuCrawlerParse{
public static Map<String,Object> getSougouZhihuData(String word, Proxy proxy, int page) throws Exception{ public static Map<String,Object> getSougouZhihuData(String word, Proxy proxy, int page) throws Exception{
String htmlBody = downloadHtml(word, proxy, page); String htmlBody = downloadHtml(word, proxy, page);
if(htmlBody != null && !htmlBody.equals("")){ if(StringUtils.isBlank(htmlBody)){
return analysisData(htmlBody, proxy, word); return analysisData(htmlBody, proxy, word);
} }
return null; return null;
...@@ -97,7 +100,12 @@ public class SougouZhihuCrawlerParse{ ...@@ -97,7 +100,12 @@ public class SougouZhihuCrawlerParse{
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
...@@ -124,7 +132,12 @@ public class SougouZhihuCrawlerParse{ ...@@ -124,7 +132,12 @@ public class SougouZhihuCrawlerParse{
//下载数据页面 //下载数据页面
for(int i = 1; i<=3; i++){ for(int i = 1; i<=3; i++){
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取搜狗新闻数据时出现问题,问题为:{}", e.fillInStackTrace());
......
...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler; ...@@ -2,6 +2,7 @@ package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.LunTanData; import com.zhiwei.media_data_crawler.entity.LunTanData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -55,8 +56,8 @@ public class TianYaCrawlerParse { ...@@ -55,8 +56,8 @@ public class TianYaCrawlerParse {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){ if(DataCrawler.sleepTime!=null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(DataCrawler.sleepTime);
} }
} }
...@@ -87,7 +88,12 @@ public class TianYaCrawlerParse { ...@@ -87,7 +88,12 @@ public class TianYaCrawlerParse {
// 下载数据页面 // 下载数据页面
for (int i = 1; i <= 3; i++) { for (int i = 1; i <= 3; i++) {
try { try {
Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false); Response response = null;
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
}
return response.body().string(); return response.body().string();
} catch (Exception e) { } catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace()); logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment