Commit cd252c2d by yangchen

时间一天内的判断修改

parent fdbe7b0b
......@@ -13,7 +13,7 @@ public class DataCrawlerStart{
private ScheduledExecutorService scheduled;
public DataCrawlerStart() {
this.scheduled = Executors.newScheduledThreadPool(4);
this.scheduled = Executors.newScheduledThreadPool(3);
}
public void start() {
......
package com.zhiwei.weibocrawler.crawler;
import java.util.Date;
import java.util.List;
import org.slf4j.Logger;
......@@ -9,6 +10,7 @@ import com.zhiwei.weibobusiness.weibo4j.model.Status;
import com.zhiwei.weibocrawler.crawler.getdata.WeiboCrawlerAnalysis;
import com.zhiwei.weibocrawler.rsidClient.DataQueue;
import com.zhiwei.weibocrawler.rsidClient.UpdateQueue;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
/***
*
......@@ -39,13 +41,18 @@ public class DataUpdate implements Runnable{
while(true) {
try {
ZhiWeiTools.sleep(1000);
if(i > 60 || DataQueue.linkQueue.size() >= 50) {
if(i > 600 || DataQueue.linkQueue.size() >= 50) {
logger.info("此次更新更新队列中数据量" + DataQueue.linkQueue.size());
List<String> midList = DataQueue.get(48);
if(midList != null && midList.size() > 0) {
List<Status> list = WeiboCrawlerAnalysis.getWeiboData(midList, token);
logger.info("更新数据量" + list.size());
UpdateQueue.add(list);
for(Status status : list) {
Date date = new Date(new Date().getTime()-24*60*60*1000);
if(status.getCreatedAt().after(date)){
UpdateQueue.add(status);
}
}
}
i = 1;
}else {
......
package com.zhiwei.weibocrawler.crawler.getdata;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
......@@ -16,9 +19,9 @@ import com.zhiwei.weibobusiness.weibo4j.model.Status;
import com.zhiwei.weibobusiness.weibo4j.model.StatusWapper;
import com.zhiwei.weibobusiness.weibo4j.model.WeiboException;
import com.zhiwei.weibocrawler.httpclient.HttpClientDemo;
import com.zhiwei.weibocrawler.rsidClient.DataQueue;
import com.zhiwei.weibocrawler.rsidClient.RsidClientDAO;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.weibocrawler.rsidClient.DataQueue;
/**
*
* @ClassName WeiboCrawlerAnalysis
......@@ -52,7 +55,6 @@ public class WeiboCrawlerAnalysis {
if(i > 48) {
try {
mids = mids.substring(0,mids.length()-1);
System.out.println(mids);
StatusWapper statusWapper = searchBusiness.showStatusBusniess(mids);
statuses.addAll(statusWapper.getStatuses());
i = 0;
......@@ -127,5 +129,4 @@ public class WeiboCrawlerAnalysis {
}
}
}
......@@ -10,49 +10,75 @@ import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
public class HttpClientDemo {
private static Logger logger = LoggerFactory.getLogger(HttpClientDemo.class);
// public static String executeHttpRequestGet(String url) throws IOException {
// String result = null;
// Map<String, String> headerMap = new HashMap<String, String>();
// headerMap.put("User-Agent",
// "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
// headerMap.put("Accept","*/*");
// headerMap.put("Accept-Encoding", "gzip, deflate, br");
// headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
// headerMap.put("Connection", "keep-alive");
// headerMap.put("Content-Type", "application/x-www-form-urlencoded");
// headerMap.put("Host", "weibo.com");
// CloseableHttpClient httpClient = null;
// for(int j = 1;j <= 3;j++) {
// try {
// HttpGet httpGet = new HttpGet(url);
// RequestConfig requestConfig = RequestConfig.custom()
// .setSocketTimeout(8000).setConnectTimeout(8000).build();
// httpClient = HttpClients.custom()
// .setDefaultRequestConfig(requestConfig).build();
// if (headerMap != null) {
// for (Entry<String, String> header : headerMap.entrySet()) {
// httpGet.setHeader(header.getKey(), header.getValue());
// }
// }
// result = EntityUtils
// .toString(httpClient.execute(httpGet).getEntity());
// return result;
// }catch (Exception e) {
// e.printStackTrace();
// continue;
// }finally {
// if (httpClient != null) {
// httpClient.close();
// }
// }
// }
// return result;
//
// }
public static String executeHttpRequestGet(String url) throws IOException {
String result = null;
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept","*/*");
headerMap.put("Accept-Encoding", "gzip, deflate, br");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive");
headerMap.put("Content-Type", "application/x-www-form-urlencoded");
headerMap.put("Host", "weibo.com");
CloseableHttpClient httpClient = null;
for(int j = 1;j <= 3;j++) {
try {
HttpGet httpGet = new HttpGet(url);
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(8000).setConnectTimeout(8000).build();
httpClient = HttpClients.custom()
.setDefaultRequestConfig(requestConfig).build();
if (headerMap != null) {
for (Entry<String, String> header : headerMap.entrySet()) {
httpGet.setHeader(header.getKey(), header.getValue());
}
}
result = EntityUtils
.toString(httpClient.execute(httpGet).getEntity());
return result;
}catch (Exception e) {
e.printStackTrace();
continue;
}finally {
if (httpClient != null) {
httpClient.close();
}
}
try {
result = HttpClientTemplateOK.get(url, null, headerMap);
} catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e.getMessage());
}
return result;
}
}
......@@ -17,7 +17,6 @@ public class DataQueue {
public static ListQueue<String> linkQueue = new ListQueue<String>(); //已去重数据队列
public static void offer(String mid) {
System.out.println("更新队列中的数据大小===="+linkQueue.size());
linkQueue.offer(mid);
}
......
......@@ -23,7 +23,6 @@ public class UpdateQueue {
* @param mid
*/
public static void add(Status status){
System.out.println("更新后队列中的数据大小===="+linkQueue.size());
linkQueue.offer(status);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment