Commit df8ce8d3 by zhiwei

添加休眠时间自配

parent 1325c572
...@@ -25,7 +25,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -25,7 +25,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
private static Logger logger = LoggerFactory.getLogger(BaiduNewsCrawlerParse.class); private static Logger logger = LoggerFactory.getLogger(BaiduNewsCrawlerParse.class);
private static final String pt = "百度新闻"; private static final String pt = "百度新闻";
/** /**
* @Title: getBaiduNewsData * @Title: getBaiduNewsData
* @author hero * @author hero
...@@ -44,7 +44,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -44,7 +44,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy) throws Exception { public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -63,7 +63,9 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -63,7 +63,9 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
ZhiWeiTools.sleep(3000); if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
} }
return list; return list;
} }
...@@ -108,7 +110,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -108,7 +110,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy) throws Exception { public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -127,7 +129,9 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -127,7 +129,9 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
ZhiWeiTools.sleep(3000); if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
} }
return list; return list;
} }
...@@ -252,9 +256,10 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -252,9 +256,10 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
time = soureAndtimes[1]; time = soureAndtimes[1];
source = soureAndtimes[0]; source = soureAndtimes[0];
} else { } else {
time = element.select("div.c-row").select("p.c-author").text(); time = element.select("div.c-row").select("p.c-author").text().trim();
} }
/** 文章发布时间处理 **/ /** 文章发布时间处理 **/
time = time.replaceAll(" ", "");
time = TimeParse.dateFormartString(TimeParse.stringFormartDate(time), "yyyy-MM-dd HH:mm:ss"); time = TimeParse.dateFormartString(TimeParse.stringFormartDate(time), "yyyy-MM-dd HH:mm:ss");
// 处理文章简介 // 处理文章简介
if (element.select("div.c-row") != null) { if (element.select("div.c-row") != null) {
......
...@@ -33,7 +33,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK { ...@@ -33,7 +33,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName) throws Exception { public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime) throws Exception {
List<TiebaData> list = new ArrayList<TiebaData>(); List<TiebaData> list = new ArrayList<TiebaData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -52,7 +52,9 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK { ...@@ -52,7 +52,9 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
ZhiWeiTools.sleep(3000); if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
} }
return list; return list;
} }
......
...@@ -38,7 +38,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -38,7 +38,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsData(String word, Proxy proxy) throws Exception{ public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime) throws Exception{
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 1; int page = 1;
boolean more = true; boolean more = true;
...@@ -56,8 +56,10 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -56,8 +56,10 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
}else{ }else{
more = false; more = false;
} }
ZhiWeiTools.sleep(5000);
page++; page++;
if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
} }
return list; return list;
} }
...@@ -74,7 +76,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -74,7 +76,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy)throws Exception{ public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime)throws Exception{
List<NewsData> list = new ArrayList<NewsData>(); List<NewsData> list = new ArrayList<NewsData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -93,7 +95,9 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK { ...@@ -93,7 +95,9 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
ZhiWeiTools.sleep(5000); if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
} }
return list; return list;
} }
......
...@@ -41,7 +41,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK { ...@@ -41,7 +41,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
* @throws Exception * @throws Exception
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy) throws Exception{ public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime) throws Exception{
List<ZhiHuData> list = new ArrayList<ZhiHuData>(); List<ZhiHuData> list = new ArrayList<ZhiHuData>();
int page = 1; int page = 1;
boolean more = true; boolean more = true;
...@@ -60,7 +60,9 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK { ...@@ -60,7 +60,9 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
}else{ }else{
more = false; more = false;
} }
ZhiWeiTools.sleep(5000); if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
page++; page++;
} }
return list; return list;
......
...@@ -37,7 +37,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK { ...@@ -37,7 +37,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime) throws Exception { public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime) throws Exception {
List<LunTanData> list = new ArrayList<LunTanData>(); List<LunTanData> list = new ArrayList<LunTanData>();
int page = 0; int page = 0;
boolean more = true; boolean more = true;
...@@ -56,7 +56,10 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK { ...@@ -56,7 +56,10 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
more = false; more = false;
} }
page++; page++;
ZhiWeiTools.sleep(3000); if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
} }
return list; return list;
} }
......
...@@ -32,9 +32,9 @@ public class DataCrawler { ...@@ -32,9 +32,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<NewsData> 返回类型 * @return List<NewsData> 返回类型
*/ */
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy){ public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime){
try { try {
return BaiduNewsCrawlerParse.getBaiduNewsData(word, startTime, endTime, proxy); return BaiduNewsCrawlerParse.getBaiduNewsData(word, startTime, endTime, proxy, sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -53,9 +53,9 @@ public class DataCrawler { ...@@ -53,9 +53,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<NewsData> 返回类型 * @return List<NewsData> 返回类型
*/ */
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy){ public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime){
try { try {
return BaiduNewsCrawlerParse.getBaiduNewsDataByTitle(word, startTime, endTime, proxy); return BaiduNewsCrawlerParse.getBaiduNewsDataByTitle(word, startTime, endTime, proxy,sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -112,9 +112,9 @@ public class DataCrawler { ...@@ -112,9 +112,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<NewsData> 返回类型 * @return List<NewsData> 返回类型
*/ */
public static List<NewsData> getSougouNewsData(String word, Proxy proxy){ public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime){
try { try {
return SougouNewsCrawlerParse.getSougouNewsData(word, proxy); return SougouNewsCrawlerParse.getSougouNewsData(word, proxy,sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -131,9 +131,9 @@ public class DataCrawler { ...@@ -131,9 +131,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<NewsData> 返回类型 * @return List<NewsData> 返回类型
*/ */
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy){ public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime){
try { try {
return SougouNewsCrawlerParse.getSougouNewsDataByTitle(word, proxy); return SougouNewsCrawlerParse.getSougouNewsDataByTitle(word, proxy, sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -149,9 +149,9 @@ public class DataCrawler { ...@@ -149,9 +149,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<ZhiHuData> 返回类型 * @return List<ZhiHuData> 返回类型
*/ */
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy){ public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime){
try { try {
return SougouZhihuCrawlerParse.getSougouZhihuData(word, proxy); return SougouZhihuCrawlerParse.getSougouZhihuData(word, proxy, sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -167,9 +167,9 @@ public class DataCrawler { ...@@ -167,9 +167,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy){ public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy,Long sleepTime){
try { try {
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, null); return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, null, sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -186,9 +186,9 @@ public class DataCrawler { ...@@ -186,9 +186,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<TiebaData> 返回类型 * @return List<TiebaData> 返回类型
*/ */
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName){ public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime){
try { try {
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, tiebaName); return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, tiebaName,sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
...@@ -205,9 +205,9 @@ public class DataCrawler { ...@@ -205,9 +205,9 @@ public class DataCrawler {
* @param @return 设定文件 * @param @return 设定文件
* @return List<LunTanData> 返回类型 * @return List<LunTanData> 返回类型
*/ */
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime){ public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime){
try { try {
return TianYaCrawlerParse.getLunTanData(word, proxy, endTime); return TianYaCrawlerParse.getLunTanData(word, proxy, endTime,sleepTime);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
return null; return null;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment