Commit 7f0418e6 by zhiwei

升级核心包版本

parent 47654569
......@@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>media_data_crawler</artifactId>
<version>0.0.9-SNAPSHOT</version>
<version>0.1.0-SNAPSHOT</version>
<name>media_data_crawler</name>
<description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等</description>
......@@ -10,13 +10,13 @@
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.1.2-SNAPSHOT</version>
<version>0.1.3-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.3.0-RELEASE</version>
<version>0.3.6-RELEASE</version>
<scope>provided</scope>
</dependency>
</dependencies>
......
......@@ -31,7 +31,7 @@ import okhttp3.Response;
public class BaiduNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduNewsCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "百度新闻";
/**
......@@ -208,7 +208,7 @@ public class BaiduNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -259,7 +259,7 @@ public class BaiduNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -288,7 +288,7 @@ public class BaiduNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -28,7 +28,7 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class BaiduTiebaCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
/**
......@@ -236,7 +236,7 @@ public class BaiduTiebaCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -279,7 +279,7 @@ public class BaiduTiebaCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -25,7 +25,7 @@ public class BaiduZhidaoCrawlerParse {
private static Logger logger = LoggerFactory.getLogger(BaiduZhidaoCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot(false,2);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static List<Map<String,Object>> getData(String word,ProxyHolder proxy) {
try {
......
......@@ -27,7 +27,8 @@ import okhttp3.Response;
public class DoubanCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
*
* @Title: getDoubanData
......@@ -101,7 +102,7 @@ public class DoubanCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -128,7 +129,7 @@ public class DoubanCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -33,7 +33,7 @@ import okhttp3.Response;
public class JianshuCrawler {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot(false,2);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static List<JianshuUser> getUserList(String word,String cookie) {
List<JianshuUser> jsList = new ArrayList<>();
......@@ -46,7 +46,7 @@ public class JianshuCrawler {
headers.put("origin", "https://www.jianshu.com");
headers.put("accept", "application/json");
headers.put("user-agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
try (Response response = httpBoot.syncCall(RequestUtils.wrapPost(url,headers,null), ProxyHolder.NAT_PROXY)){
try (Response response = httpBoot.syncCall(RequestUtils.wrapPost(url,headers,null), ProxyHolder.NAT_HEAVY_PROXY)){
String result = response.body().string();
System.out.println(result);
if(result.contains("搜索过于频繁")) {
......
......@@ -20,7 +20,6 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
......@@ -31,7 +30,8 @@ import okhttp3.Response;
public class SoCrawlerParse {
private static Logger logger = LogManager.getLogger(SoCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "360网页";
/**
......@@ -111,7 +111,7 @@ public class SoCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -218,7 +218,7 @@ public class SoCrawlerParse {
Map<String,String> headMap = HeaderTool.getCommonHead();
headMap.put("accept-encoding", "deflate, br");
headMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36");
String htmlBody = HttpClientTemplateOK.get(url, proxy, headMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy).body().string();
if(htmlBody!=null){
if(htmlBody.contains("question")){
String html = htmlBody.split("var __wenda_data =")[1].split("\"err_tips\":\"\"};")[0]+"\"err_tips\":\"\"}";
......
......@@ -28,7 +28,7 @@ import okhttp3.Response;
public class SoNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(SoNewsCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "360新闻";
/**
......@@ -143,7 +143,7 @@ public class SoNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -30,7 +30,7 @@ import okhttp3.Response;
public class SougouNewsCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "搜狗新闻";
......@@ -138,7 +138,7 @@ public class SougouNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -167,7 +167,7 @@ public class SougouNewsCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -33,7 +33,7 @@ import okhttp3.Response;
public class SougouZhihuCrawlerParse{
private static Logger logger = LogManager.getLogger(SougouZhihuCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "搜狗知乎";
......@@ -109,7 +109,7 @@ public class SougouZhihuCrawlerParse{
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......@@ -141,7 +141,7 @@ public class SougouZhihuCrawlerParse{
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -29,7 +29,8 @@ import okhttp3.Response;
public class TianYaCrawlerParse {
private static Logger logger = LogManager.getLogger(TianYaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static final String pt = "天涯论坛";
/**
* @Title: getBaiduTiebaData
......@@ -98,7 +99,7 @@ public class TianYaCrawlerParse {
if(proxy != null) {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy);
}else {
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_PROXY);
response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), ProxyHolder.NAT_HEAVY_PROXY);
}
return response.body().string();
} catch (Exception e) {
......
......@@ -12,7 +12,6 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswerComment;
......@@ -24,7 +23,8 @@ public class ZhihuAnswerCommentParse {
private static Logger logger = LogManager.getLogger(TianYaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
*
......@@ -121,7 +121,7 @@ public class ZhihuAnswerCommentParse {
private static List<ZhihuAnswerComment> getReplayList(String url,String strRootID) {
List<ZhihuAnswerComment> dataList = new ArrayList<>();
try {
String result = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyFactory.getNatProxy()).body().string();
String result = httpBoot.syncCall(RequestUtils.wrapGet(url),ProxyHolder.NAT_HEAVY_PROXY).body().string();
if (result != null) {
JSONObject dataJson = JSONObject.parseObject(result);
JSONArray dataArray = dataJson.getJSONArray("data");
......
......@@ -27,7 +27,7 @@ import okhttp3.Response;
*/
public class ZhihuAnwserCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Logger logger = LoggerFactory.getLogger(ZhihuAnwserCrawlerParse.class);
......
......@@ -29,8 +29,7 @@ import okhttp3.Response;
public class ZhihuCrawlerParse {
private static Logger logger = LogManager.getLogger(TianYaCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: getBaiduTiebaData
......
......@@ -26,7 +26,7 @@ public class ZhihuUserAnswerCrawlerParse {
private static final Logger logger = LoggerFactory.getLogger(ZhihuUserAnswerCrawlerParse.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static List<ZhihuAnswer> getData(String userId,ProxyHolder proxy) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment