Commit 1b78ab01 by zhiwei

修改添加休眠时间方式,修改为在DataCrawler中统一设置

parent df8ce8d3
......@@ -8,12 +8,15 @@ import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -44,7 +47,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy) throws Exception {
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -63,7 +66,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
......@@ -110,7 +113,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy) throws Exception {
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -129,7 +132,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
......
......@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.TiebaData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -33,7 +35,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型
*/
@SuppressWarnings("unchecked")
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime) throws Exception {
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName) throws Exception {
List<TiebaData> list = new ArrayList<TiebaData>();
int page = 0;
boolean more = true;
......@@ -52,7 +54,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
......
......@@ -14,6 +14,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.DouBanData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -189,7 +190,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK {
String content = document.select("div.topic-doc").select("div#link-report").select("div.topic-content").text();
douban.setContent(content);
}
ZhiWeiTools.sleep(1000);
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
return douban;
} catch (Exception e) {
e.printStackTrace();
......@@ -267,7 +270,9 @@ public class DoubanCrawlerParse extends HttpClientTemplateOK {
douban.setContent(content);
}
}
ZhiWeiTools.sleep(1000);
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
return douban;
} catch (Exception e) {
e.printStackTrace();
......
......@@ -12,6 +12,8 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -55,7 +57,9 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
ZhiWeiTools.sleep(5000);
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
return list;
}
......@@ -102,7 +106,9 @@ public class SoNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
ZhiWeiTools.sleep(5000);
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
return list;
}
......
......@@ -6,12 +6,15 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -38,7 +41,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime) throws Exception{
public static List<NewsData> getSougouNewsData(String word, Proxy proxy) throws Exception{
List<NewsData> list = new ArrayList<NewsData>();
int page = 1;
boolean more = true;
......@@ -57,7 +60,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
......@@ -76,7 +79,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
@SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime)throws Exception{
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy)throws Exception{
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -95,7 +98,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
......
......@@ -17,6 +17,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -41,7 +42,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime) throws Exception{
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy) throws Exception{
List<ZhiHuData> list = new ArrayList<ZhiHuData>();
int page = 1;
boolean more = true;
......@@ -60,7 +61,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
}else{
more = false;
}
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(5000);
}
page++;
......
......@@ -15,6 +15,7 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.LunTanData;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
......@@ -37,7 +38,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型
*/
@SuppressWarnings("unchecked")
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime) throws Exception {
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime) throws Exception {
List<LunTanData> list = new ArrayList<LunTanData>();
int page = 0;
boolean more = true;
......@@ -56,7 +57,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
if(DataCrawler.sleepTime==null){
ZhiWeiTools.sleep(3000);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment