Commit aacd8761 by yangchen

修改部分代理使用 并升级版本

parent ea9efe8f
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>media_data_crawler</artifactId> <artifactId>media_data_crawler</artifactId>
<version>0.1.1-SNAPSHOT</version> <version>0.1.2-SNAPSHOT</version>
<name>media_data_crawler</name> <name>media_data_crawler</name>
<description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等</description> <description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等</description>
......
...@@ -15,6 +15,7 @@ import org.apache.logging.log4j.Logger; ...@@ -15,6 +15,7 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData; import com.zhiwei.media_data_crawler.entity.ZhiHuData;
...@@ -43,8 +44,8 @@ public class ZhihuCrawlerParse { ...@@ -43,8 +44,8 @@ public class ZhihuCrawlerParse {
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<ZhiHuData> getZhihuData(String word, String timeLimit,Proxy proxy, Date endTime) throws Exception { public static List<ZhiHuData> getZhihuData(String word, String timeLimit,ProxyHolder proxy, Date endTime) throws Exception {
List<ZhiHuData> list = new ArrayList<ZhiHuData>(); List<ZhiHuData> list = new ArrayList<>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
while (more) { while (more) {
...@@ -265,7 +266,7 @@ public class ZhihuCrawlerParse { ...@@ -265,7 +266,7 @@ public class ZhihuCrawlerParse {
* @return * @return
* @throws Exception * @throws Exception
*/ */
private static String downloadHtml(String word, String timeLimit,Proxy proxy, private static String downloadHtml(String word, String timeLimit,ProxyHolder proxy,
int page) throws Exception{ int page) throws Exception{
// 获取通用请求头 // 获取通用请求头
Map<String, String> headerMap = HeaderTool.getCommonHead(); Map<String, String> headerMap = HeaderTool.getCommonHead();
...@@ -300,9 +301,9 @@ public class ZhihuCrawlerParse { ...@@ -300,9 +301,9 @@ public class ZhihuCrawlerParse {
* @param @throws Exception 设定文件 * @param @throws Exception 设定文件
* @return Map<String,Object> 返回类型 * @return Map<String,Object> 返回类型
*/ */
private static Map<String, Object> analysisData(String htmlBody, Proxy proxy, String word, Date endTime) throws Exception{ private static Map<String, Object> analysisData(String htmlBody, ProxyHolder proxy, String word, Date endTime) throws Exception{
Map<String, Object> resultMap = new HashMap<String, Object>(); Map<String, Object> resultMap = new HashMap<>();
List<ZhiHuData> list = new ArrayList<ZhiHuData>(); List<ZhiHuData> list = new ArrayList<>();
boolean more = true; boolean more = true;
try { try {
JSONArray dataJson = JSONObject.parseObject(htmlBody).getJSONArray("data"); JSONArray dataJson = JSONObject.parseObject(htmlBody).getJSONArray("data");
...@@ -351,7 +352,6 @@ public class ZhihuCrawlerParse { ...@@ -351,7 +352,6 @@ public class ZhihuCrawlerParse {
} }
} catch (Exception e) { } catch (Exception e) {
System.out.println("======="+objectJson); System.out.println("======="+objectJson);
continue;
} }
} }
}else{ }else{
...@@ -359,7 +359,6 @@ public class ZhihuCrawlerParse { ...@@ -359,7 +359,6 @@ public class ZhihuCrawlerParse {
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
System.out.println();
more = false; more = false;
} }
......
...@@ -428,7 +428,7 @@ public class DataCrawler { ...@@ -428,7 +428,7 @@ public class DataCrawler {
* @return * @return
* @throws Exception * @throws Exception
*/ */
public static List<ZhiHuData> getZhihuByWord(String word, String timeLimit,Date endDate, Proxy proxy) throws Exception{ public static List<ZhiHuData> getZhihuByWord(String word, String timeLimit,Date endDate, ProxyHolder proxy) throws Exception{
try{ try{
return ZhihuCrawlerParse.getZhihuData(word, timeLimit, proxy, endDate); return ZhihuCrawlerParse.getZhihuData(word, timeLimit, proxy, endDate);
}catch (Exception e){ }catch (Exception e){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment