Commit af2398c5 by yangchen

百度获取量失败

parent a7d988a7
......@@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>media_data_crawler</artifactId>
<version>0.0.2-SNAPSHOT</version>
<version>0.0.3-SNAPSHOT</version>
<name>media_data_crawler</name>
<description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻等</description>
......
......@@ -271,7 +271,7 @@ public class BaiduNewsCrawlerParse {
for (int i = 1; i <= 3; i++) {
try {
Response response = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap),proxy,false);
return response.body().toString();
return response.body().string();
} catch (Exception e) {
logger.error("获取数据时出现问题,问题为:{}", e.fillInStackTrace());
if(i==3){
......
......@@ -61,6 +61,7 @@ public class SougouNewsCrawlerParse {
more = false;
}
page++;
logger.info("采集到 {} 页 采集的数据量为 {}", page, list.size());
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
......
......@@ -154,6 +154,7 @@ public class DataCrawler {
*/
public static List<NewsData> getSougouNewsData(String word, Proxy proxy) {
try {
System.out.println("开始采集sogou");
return SougouNewsCrawlerParse.getSougouNewsData(word, proxy);
} catch (Exception e) {
e.printStackTrace();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment