Commit fffe3c2d by zhiwei

修改部分解析代码及错误日志

parent c1c96542
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>weibohotcrawler</artifactId> <artifactId>weibohotcrawler</artifactId>
<version>0.0.4-SNAPSHOT</version> <version>0.0.7-SNAPSHOT</version>
<name>weibohotcrawler</name> <name>weibohotcrawler</name>
<description>微博热搜1小时榜单,社会、热点采集程序</description> <description>微博热搜1小时榜单,社会、热点采集程序</description>
...@@ -18,23 +18,27 @@ ...@@ -18,23 +18,27 @@
<dependency> <dependency>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>weibobusiness</artifactId> <artifactId>weibobusiness</artifactId>
<version>0.0.5-SNAPSHOT</version> <version>0.0.6-SNAPSHOT</version>
<scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.middleware</groupId> <groupId>com.zhiwei.middleware</groupId>
<artifactId>cleaner-unified-urlfilterNew</artifactId> <artifactId>cleaner-unified-urlfilterNew</artifactId>
<version>0.0.5-SNAPSHOT</version> <version>0.0.6-SNAPSHOT</version>
<scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.4-SNAPSHOT</version> <version>0.1.4-SNAPSHOT</version>
<scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.2-SNAPSHOT</version> <version>0.5.2-RELEASE</version>
<scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<!-- 打包管理 --> <!-- 打包管理 -->
......
...@@ -22,8 +22,4 @@ public class DataCrawlerStart{ ...@@ -22,8 +22,4 @@ public class DataCrawlerStart{
scheduled.scheduleWithFixedDelay(new WeiboSocietyData(), 3000, 3*60*1000, TimeUnit.MILLISECONDS); scheduled.scheduleWithFixedDelay(new WeiboSocietyData(), 3000, 3*60*1000, TimeUnit.MILLISECONDS);
} }
} }
...@@ -29,9 +29,7 @@ public class WeiboBangdanData implements Runnable{ ...@@ -29,9 +29,7 @@ public class WeiboBangdanData implements Runnable{
ZhiWeiTools.sleep(12000); ZhiWeiTools.sleep(12000);
} catch (Exception e) { } catch (Exception e) {
logger.error("出错====榜单的出错了",e); logger.error("出错====榜单的出错了",e);
e.printStackTrace();
ZhiWeiTools.sleep(20); ZhiWeiTools.sleep(20);
continue;
} }
} }
logger.info("微博榜单数据采集结束========================="); logger.info("微博榜单数据采集结束=========================");
......
...@@ -57,9 +57,7 @@ public class WeiboCrawlerAnalysis { ...@@ -57,9 +57,7 @@ public class WeiboCrawlerAnalysis {
i = 0; i = 0;
mids = ""; mids = "";
} catch (WeiboException e) { } catch (WeiboException e) {
logger.error("数据更新出错部分mids=========" + mids); logger.error("数据更新出错部分mids={},数据出错=={}",mids, e.getMessage());
e.printStackTrace();
continue;
} }
} }
} }
...@@ -68,9 +66,7 @@ public class WeiboCrawlerAnalysis { ...@@ -68,9 +66,7 @@ public class WeiboCrawlerAnalysis {
StatusWapper statusWapper = searchBusiness.showStatusBusniess(mids); StatusWapper statusWapper = searchBusiness.showStatusBusniess(mids);
statuses.addAll(statusWapper.getStatuses()); statuses.addAll(statusWapper.getStatuses());
} catch (WeiboException e) { } catch (WeiboException e) {
logger.error("数据更新出错部分mids=========" + mids); logger.error("数据更新出错部分mids={},数据出错=={}",mids, e.getMessage());
logger.error("数据出错", e.getMessage());
e.printStackTrace();
return null; return null;
} }
return statuses; return statuses;
...@@ -86,7 +82,7 @@ public class WeiboCrawlerAnalysis { ...@@ -86,7 +82,7 @@ public class WeiboCrawlerAnalysis {
String result = HttpClientDemo.executeHttpRequestGet(url); String result = HttpClientDemo.executeHttpRequestGet(url);
getWeiboData(result); getWeiboData(result);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error("数据出错=={}", e.getMessage());
} }
} }
...@@ -104,6 +100,7 @@ public class WeiboCrawlerAnalysis { ...@@ -104,6 +100,7 @@ public class WeiboCrawlerAnalysis {
Elements elements = document.select("div.UG_contents").select("ul.clearfix") Elements elements = document.select("div.UG_contents").select("ul.clearfix")
.select("div[action-type=feed_list_item]"); .select("div[action-type=feed_list_item]");
List<String> midsList = new ArrayList<String>(); List<String> midsList = new ArrayList<String>();
System.out.println("element size is " + elements.size());
for (Element element : elements) { for (Element element : elements) {
try { try {
String mid = element.attr("mid"); String mid = element.attr("mid");
...@@ -113,24 +110,27 @@ public class WeiboCrawlerAnalysis { ...@@ -113,24 +110,27 @@ public class WeiboCrawlerAnalysis {
if(!midsList.contains(mid)){ if(!midsList.contains(mid)){
midsList.add(mid); midsList.add(mid);
} }
System.out.println("midList size " + midsList.size());
} catch (Exception e) { } catch (Exception e) {
logger.error("数据解析出错", e.getMessage()); logger.error("数据解析出错", e.getMessage());
ZhiWeiTools.sleep(200); ZhiWeiTools.sleep(200);
e.printStackTrace();
continue;
} }
} }
if(!midsList.isEmpty()){ if(!midsList.isEmpty()){
for(String mid : midsList){ for(String mid : midsList){
if (!RsidClientDAO.isWeiboExit(mid)) { boolean f = RsidClientDAO.isWeiboExit(mid);
System.out.println(mid+"==========="+f);
if (!f) {
DataQueue.offer(mid); DataQueue.offer(mid);
} }
} }
}else {
System.out.println("++++++++++++++++");
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("数据解析出错", e.getMessage()); logger.error("数据解析出错", e.getMessage());
ZhiWeiTools.sleep(200); ZhiWeiTools.sleep(200);
e.printStackTrace();
} }
} }
......
...@@ -28,9 +28,7 @@ public class WeiboHotData implements Runnable{ ...@@ -28,9 +28,7 @@ public class WeiboHotData implements Runnable{
ZhiWeiTools.sleep(11000); ZhiWeiTools.sleep(11000);
} catch (Exception e) { } catch (Exception e) {
logger.error("======= 微博热门的出错了",e.getMessage()); logger.error("======= 微博热门的出错了",e.getMessage());
e.printStackTrace();
ZhiWeiTools.sleep(20); ZhiWeiTools.sleep(20);
continue;
} }
} }
logger.info("微博热门数据采集完成======================"); logger.info("微博热门数据采集完成======================");
......
...@@ -30,9 +30,7 @@ public class WeiboSocietyData implements Runnable{ ...@@ -30,9 +30,7 @@ public class WeiboSocietyData implements Runnable{
ZhiWeiTools.sleep(11000); ZhiWeiTools.sleep(11000);
} catch (Exception e) { } catch (Exception e) {
logger.error("====== 微博社会的出错了",e.getMessage()); logger.error("====== 微博社会的出错了",e.getMessage());
e.printStackTrace();
ZhiWeiTools.sleep(20); ZhiWeiTools.sleep(20);
continue;
} }
} }
logger.info("微博社会数据采集完成======================"); logger.info("微博社会数据采集完成======================");
......
package com.zhiwei.weibocrawler.httpclient; package com.zhiwei.weibocrawler.httpclient;
import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
...@@ -14,11 +15,8 @@ public class HttpClientDemo { ...@@ -14,11 +15,8 @@ public class HttpClientDemo {
private static Logger logger = LoggerFactory.getLogger(HttpClientDemo.class); private static Logger logger = LoggerFactory.getLogger(HttpClientDemo.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static String executeHttpRequestGet(String url) { public static String executeHttpRequestGet(String url) {
String result = null; Map<String, String> headerMap = new HashMap<>();
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent", headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"); "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept","*/*"); headerMap.put("Accept","*/*");
...@@ -28,12 +26,12 @@ public class HttpClientDemo { ...@@ -28,12 +26,12 @@ public class HttpClientDemo {
headerMap.put("Host", "weibo.com"); headerMap.put("Host", "weibo.com");
try { try {
return httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap)).body().string(); return httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap)).body().string();
} catch (Exception e) { } catch (IOException e) {
logger.info("httpClient 获取数据出现问题:{}", e.getMessage());
}catch (Exception e) {
logger.info("httpClient 获取数据出现问题:{}", e.getMessage()); logger.info("httpClient 获取数据出现问题:{}", e.getMessage());
e.printStackTrace();
} }
return result; return null;
} }
} }
...@@ -4,7 +4,6 @@ import java.util.ArrayList; ...@@ -4,7 +4,6 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.zhiwei.weibocrawler.queue.ListQueue; import com.zhiwei.weibocrawler.queue.ListQueue;
import com.zhiwei.weibocrawler.rsidClient.RsidClientDAO;
/** /**
* @ClassName TreatDataCrawler * @ClassName TreatDataCrawler
* @Description 处理采集回来的数据,并验证是否重复 * @Description 处理采集回来的数据,并验证是否重复
...@@ -26,14 +25,12 @@ public class DataQueue { ...@@ -26,14 +25,12 @@ public class DataQueue {
* @param mids * @param mids
*/ */
public static void add(List<String> mids){ public static void add(List<String> mids){
if(mids!=null && mids.size()>0){ if(mids!=null && !mids.isEmpty()){
for(String mid : mids){ for(String mid : mids){
if(RsidClientDAO.isWeiboExit(mid)){
linkQueue.offer(mid); linkQueue.offer(mid);
} }
} }
} }
}
/** /**
......
...@@ -12,7 +12,7 @@ public class HotWeiboTest { ...@@ -12,7 +12,7 @@ public class HotWeiboTest {
//开启采集 //开启采集
String token = "2.00HUuC3C3_jZ8E36c5026e390AzIOP"; String token = "2.00HUuC3C3_jZ8E36c5026e390AzIOP";
GetData.start(token); GetData.start(token);
// //获取数据 //获取数据
while(true){ while(true){
Map<String,Object> data = GetData.getWeiboData(50); Map<String,Object> data = GetData.getWeiboData(50);
System.out.println(data); System.out.println(data);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment