Commit 1b78ab01 by zhiwei

修改添加休眠时间方式,修改为在DataCrawler中统一设置

parent df8ce8d3
...@@ -8,12 +8,15 @@ import java.util.List; ...@@ -8,12 +8,15 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -44,7 +47,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -44,7 +47,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception { public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy) throws Exception {
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -63,7 +66,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -63,7 +66,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
} }
} }
...@@ -110,7 +113,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -110,7 +113,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception { public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy) throws Exception {
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -129,7 +132,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -129,7 +132,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
} }
} }
......
...@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element; ...@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.TiebaData; import com.zhiwei.media_data_crawler.entity.TiebaData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -33,7 +35,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK { ...@@ -33,7 +35,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime) throws Exception { public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName) throws Exception {
List<TiebaData> list = new ArrayList<TiebaData>(); List<TiebaData> list = new ArrayList<TiebaData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -52,7 +54,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK { ...@@ -52,7 +54,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
} }
} }
......
...@@ -14,6 +14,7 @@ import org.jsoup.select.Elements; ...@@ -14,6 +14,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.DouBanData; import com.zhiwei.media_data_crawler.entity.DouBanData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -189,7 +190,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK { ...@@ -189,7 +190,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK {
String content = document.select("div.topic-doc").select("div#link-report").select("div.topic-content").text(); String content = document.select("div.topic-doc").select("div#link-report").select("div.topic-content").text();
douban.setContent(content); douban.setContent(content);
} }
ZhiWeiTools.sleep(1000); if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
return douban; return douban;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
...@@ -267,7 +270,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK { ...@@ -267,7 +270,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK {
douban.setContent(content); douban.setContent(content);
} }
} }
ZhiWeiTools.sleep(1000); if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
return douban; return douban;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
......
...@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element; ...@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -55,8 +57,10 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -55,8 +57,10 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
} }
}
return list; return list;
} }
...@@ -102,8 +106,10 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -102,8 +106,10 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
} }
}
return list; return list;
} }
......
...@@ -6,12 +6,15 @@ import java.util.ArrayList; ...@@ -6,12 +6,15 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -38,7 +41,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -38,7 +41,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime) throws Exception{ public static List<NewsData> getSougouNewsData(String word, Proxy proxy) throws Exception{
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 1; int page = 1;
boolean more = true; boolean more = true;
...@@ -57,7 +60,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -57,7 +60,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
} }
} }
...@@ -76,7 +79,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -76,7 +79,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime)throws Exception{ public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy)throws Exception{
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -95,7 +98,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -95,7 +98,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
} }
} }
......
...@@ -17,6 +17,7 @@ import org.jsoup.select.Elements; ...@@ -17,6 +17,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData; import com.zhiwei.media_data_crawler.entity.ZhiHuData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -41,7 +42,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK { ...@@ -41,7 +42,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime) throws Exception{ public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy) throws Exception{
List<ZhiHuData> list = new ArrayList<ZhiHuData>(); List<ZhiHuData> list = new ArrayList<ZhiHuData>();
int page = 1; int page = 1;
boolean more = true; boolean more = true;
...@@ -60,7 +61,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK { ...@@ -60,7 +61,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
}else{ }else{
more = false; more = false;
} }
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
} }
page++; page++;
......
...@@ -15,6 +15,7 @@ import org.jsoup.select.Elements; ...@@ -15,6 +15,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.LunTanData; import com.zhiwei.media_data_crawler.entity.LunTanData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
...@@ -37,7 +38,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK { ...@@ -37,7 +38,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime) throws Exception { public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime) throws Exception {
List<LunTanData> list = new ArrayList<LunTanData>(); List<LunTanData> list = new ArrayList<LunTanData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -56,7 +57,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK { ...@@ -56,7 +57,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
if(sleepTime==null){ if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment