Commit fffe3c2d by zhiwei

修改部分解析代码及错误日志

parent c1c96542
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>weibohotcrawler</artifactId>
<version>0.0.4-SNAPSHOT</version>
<version>0.0.7-SNAPSHOT</version>
<name>weibohotcrawler</name>
<description>微博热搜1小时榜单,社会、热点采集程序</description>
......@@ -18,23 +18,27 @@
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>weibobusiness</artifactId>
<version>0.0.5-SNAPSHOT</version>
<version>0.0.6-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-urlfilterNew</artifactId>
<version>0.0.5-SNAPSHOT</version>
<version>0.0.6-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.1.4-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.5.2-SNAPSHOT</version>
<version>0.5.2-RELEASE</version>
<scope>provided</scope>
</dependency>
</dependencies>
<!-- 打包管理 -->
......
......@@ -22,8 +22,4 @@ public class DataCrawlerStart{
scheduled.scheduleWithFixedDelay(new WeiboSocietyData(), 3000, 3*60*1000, TimeUnit.MILLISECONDS);
}
}
......@@ -29,9 +29,7 @@ public class WeiboBangdanData implements Runnable{
ZhiWeiTools.sleep(12000);
} catch (Exception e) {
logger.error("出错====榜单的出错了",e);
e.printStackTrace();
ZhiWeiTools.sleep(20);
continue;
}
}
logger.info("微博榜单数据采集结束=========================");
......
......@@ -57,9 +57,7 @@ public class WeiboCrawlerAnalysis {
i = 0;
mids = "";
} catch (WeiboException e) {
logger.error("数据更新出错部分mids=========" + mids);
e.printStackTrace();
continue;
logger.error("数据更新出错部分mids={},数据出错=={}",mids, e.getMessage());
}
}
}
......@@ -68,9 +66,7 @@ public class WeiboCrawlerAnalysis {
StatusWapper statusWapper = searchBusiness.showStatusBusniess(mids);
statuses.addAll(statusWapper.getStatuses());
} catch (WeiboException e) {
logger.error("数据更新出错部分mids=========" + mids);
logger.error("数据出错", e.getMessage());
e.printStackTrace();
logger.error("数据更新出错部分mids={},数据出错=={}",mids, e.getMessage());
return null;
}
return statuses;
......@@ -86,7 +82,7 @@ public class WeiboCrawlerAnalysis {
String result = HttpClientDemo.executeHttpRequestGet(url);
getWeiboData(result);
} catch (Exception e) {
e.printStackTrace();
logger.error("数据出错=={}", e.getMessage());
}
}
......@@ -104,6 +100,7 @@ public class WeiboCrawlerAnalysis {
Elements elements = document.select("div.UG_contents").select("ul.clearfix")
.select("div[action-type=feed_list_item]");
List<String> midsList = new ArrayList<String>();
System.out.println("element size is " + elements.size());
for (Element element : elements) {
try {
String mid = element.attr("mid");
......@@ -113,24 +110,27 @@ public class WeiboCrawlerAnalysis {
if(!midsList.contains(mid)){
midsList.add(mid);
}
System.out.println("midList size " + midsList.size());
} catch (Exception e) {
logger.error("数据解析出错", e.getMessage());
ZhiWeiTools.sleep(200);
e.printStackTrace();
continue;
}
}
if(!midsList.isEmpty()){
for(String mid : midsList){
if (!RsidClientDAO.isWeiboExit(mid)) {
boolean f = RsidClientDAO.isWeiboExit(mid);
System.out.println(mid+"==========="+f);
if (!f) {
DataQueue.offer(mid);
}
}
}else {
System.out.println("++++++++++++++++");
}
} catch (Exception e) {
logger.error("数据解析出错", e.getMessage());
ZhiWeiTools.sleep(200);
e.printStackTrace();
}
}
......
......@@ -28,9 +28,7 @@ public class WeiboHotData implements Runnable{
ZhiWeiTools.sleep(11000);
} catch (Exception e) {
logger.error("======= 微博热门的出错了",e.getMessage());
e.printStackTrace();
ZhiWeiTools.sleep(20);
continue;
}
}
logger.info("微博热门数据采集完成======================");
......
......@@ -30,9 +30,7 @@ public class WeiboSocietyData implements Runnable{
ZhiWeiTools.sleep(11000);
} catch (Exception e) {
logger.error("====== 微博社会的出错了",e.getMessage());
e.printStackTrace();
ZhiWeiTools.sleep(20);
continue;
}
}
logger.info("微博社会数据采集完成======================");
......
package com.zhiwei.weibocrawler.httpclient;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
......@@ -14,11 +15,8 @@ public class HttpClientDemo {
private static Logger logger = LoggerFactory.getLogger(HttpClientDemo.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static String executeHttpRequestGet(String url) {
String result = null;
Map<String, String> headerMap = new HashMap<String, String>();
Map<String, String> headerMap = new HashMap<>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept","*/*");
......@@ -28,12 +26,12 @@ public class HttpClientDemo {
headerMap.put("Host", "weibo.com");
try {
return httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap)).body().string();
} catch (Exception e) {
} catch (IOException e) {
logger.info("httpClient 获取数据出现问题:{}", e.getMessage());
}catch (Exception e) {
logger.info("httpClient 获取数据出现问题:{}", e.getMessage());
e.printStackTrace();
}
return result;
return null;
}
}
......@@ -4,7 +4,6 @@ import java.util.ArrayList;
import java.util.List;
import com.zhiwei.weibocrawler.queue.ListQueue;
import com.zhiwei.weibocrawler.rsidClient.RsidClientDAO;
/**
* @ClassName TreatDataCrawler
* @Description 处理采集回来的数据,并验证是否重复
......@@ -26,14 +25,12 @@ public class DataQueue {
* @param mids
*/
public static void add(List<String> mids){
if(mids!=null && mids.size()>0){
if(mids!=null && !mids.isEmpty()){
for(String mid : mids){
if(RsidClientDAO.isWeiboExit(mid)){
linkQueue.offer(mid);
}
}
}
}
/**
......
......@@ -12,7 +12,7 @@ public class HotWeiboTest {
//开启采集
String token = "2.00HUuC3C3_jZ8E36c5026e390AzIOP";
GetData.start(token);
// //获取数据
//获取数据
while(true){
Map<String,Object> data = GetData.getWeiboData(50);
System.out.println(data);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment