Commit df8ce8d3 by zhiwei

添加休眠时间自配

parent 1325c572
......@@ -44,7 +44,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy) throws Exception {
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -63,8 +63,10 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
return list;
}
......@@ -108,7 +110,7 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy) throws Exception {
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime) throws Exception {
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -127,8 +129,10 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
return list;
}
......@@ -252,9 +256,10 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
time = soureAndtimes[1];
source = soureAndtimes[0];
} else {
time = element.select("div.c-row").select("p.c-author").text();
time = element.select("div.c-row").select("p.c-author").text().trim();
}
/** 文章发布时间处理 **/
time = time.replaceAll(" ", "");
time = TimeParse.dateFormartString(TimeParse.stringFormartDate(time), "yyyy-MM-dd HH:mm:ss");
// 处理文章简介
if (element.select("div.c-row") != null) {
......
......@@ -33,7 +33,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型
*/
@SuppressWarnings("unchecked")
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName) throws Exception {
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime) throws Exception {
List<TiebaData> list = new ArrayList<TiebaData>();
int page = 0;
boolean more = true;
......@@ -52,8 +52,10 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
return list;
}
......
......@@ -38,7 +38,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsData(String word, Proxy proxy) throws Exception{
public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime) throws Exception{
List<NewsData> list = new ArrayList<NewsData>();
int page = 1;
boolean more = true;
......@@ -56,8 +56,10 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
}else{
more = false;
}
ZhiWeiTools.sleep(5000);
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
return list;
}
......@@ -74,7 +76,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
@SuppressWarnings("unchecked")
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy)throws Exception{
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime)throws Exception{
List<NewsData> list = new ArrayList<NewsData>();
int page = 0;
boolean more = true;
......@@ -93,8 +95,10 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
}
return list;
}
......
......@@ -41,7 +41,7 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
* @throws Exception
*/
@SuppressWarnings("unchecked")
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy) throws Exception{
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime) throws Exception{
List<ZhiHuData> list = new ArrayList<ZhiHuData>();
int page = 1;
boolean more = true;
......@@ -60,7 +60,9 @@ public class SougouZhihuCrawlerParse extends HttpClientTemplateOK {
}else{
more = false;
}
if(sleepTime==null){
ZhiWeiTools.sleep(5000);
}
page++;
}
return list;
......
......@@ -37,7 +37,7 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
* @return List<TiebaData> 返回类型
*/
@SuppressWarnings("unchecked")
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime) throws Exception {
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime) throws Exception {
List<LunTanData> list = new ArrayList<LunTanData>();
int page = 0;
boolean more = true;
......@@ -56,8 +56,11 @@ public class TianYaCrawlerParse extends HttpClientTemplateOK {
more = false;
}
page++;
if(sleepTime==null){
ZhiWeiTools.sleep(3000);
}
}
return list;
}
......
......@@ -32,9 +32,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<NewsData> 返回类型
*/
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy){
public static List<NewsData> getBaiduNewsData(String word, String startTime, String endTime, Proxy proxy,Long sleepTime){
try {
return BaiduNewsCrawlerParse.getBaiduNewsData(word, startTime, endTime, proxy);
return BaiduNewsCrawlerParse.getBaiduNewsData(word, startTime, endTime, proxy, sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -53,9 +53,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<NewsData> 返回类型
*/
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy){
public static List<NewsData> getBaiduNewsDataByTitle(String word, String startTime, String endTime, Proxy proxy,Long sleepTime){
try {
return BaiduNewsCrawlerParse.getBaiduNewsDataByTitle(word, startTime, endTime, proxy);
return BaiduNewsCrawlerParse.getBaiduNewsDataByTitle(word, startTime, endTime, proxy,sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -112,9 +112,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<NewsData> 返回类型
*/
public static List<NewsData> getSougouNewsData(String word, Proxy proxy){
public static List<NewsData> getSougouNewsData(String word, Proxy proxy,Long sleepTime){
try {
return SougouNewsCrawlerParse.getSougouNewsData(word, proxy);
return SougouNewsCrawlerParse.getSougouNewsData(word, proxy,sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -131,9 +131,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<NewsData> 返回类型
*/
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy){
public static List<NewsData> getSougouNewsDataByTitle(String word, Proxy proxy,Long sleepTime){
try {
return SougouNewsCrawlerParse.getSougouNewsDataByTitle(word, proxy);
return SougouNewsCrawlerParse.getSougouNewsDataByTitle(word, proxy, sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -149,9 +149,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<ZhiHuData> 返回类型
*/
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy){
public static List<ZhiHuData> getSougouZhihuData(String word, Proxy proxy,Long sleepTime){
try {
return SougouZhihuCrawlerParse.getSougouZhihuData(word, proxy);
return SougouZhihuCrawlerParse.getSougouZhihuData(word, proxy, sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -167,9 +167,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<TiebaData> 返回类型
*/
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy){
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy,Long sleepTime){
try {
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, null);
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, null, sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -186,9 +186,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<TiebaData> 返回类型
*/
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName){
public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,Long sleepTime){
try {
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, tiebaName);
return BaiduTiebaCrawlerParse.getBaiduTiebaData(word, proxy, tiebaName,sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......@@ -205,9 +205,9 @@ public class DataCrawler {
* @param @return 设定文件
* @return List<LunTanData> 返回类型
*/
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime){
public static List<LunTanData> getLunTanData(String word, Proxy proxy, String endTime,Long sleepTime){
try {
return TianYaCrawlerParse.getLunTanData(word, proxy, endTime);
return TianYaCrawlerParse.getLunTanData(word, proxy, endTime,sleepTime);
} catch (Exception e) {
e.printStackTrace();
return null;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment