Commit c799bd00 by yangchen

调整 采集问题(未测试)

parent 16314308
...@@ -32,9 +32,14 @@ public class ContentCrawler { ...@@ -32,9 +32,14 @@ public class ContentCrawler {
*/ */
public MultiThreadingCounter submitTask(ContentDataCallback callback, public MultiThreadingCounter submitTask(ContentDataCallback callback,
String... urls) { String... urls) {
try {
MultiThreadingCounter counter = new MultiThreadingCounter(15,TimeUnit.MINUTES,false); MultiThreadingCounter counter = new MultiThreadingCounter(15,TimeUnit.MINUTES,false);
start(counter, callback, urls); start(counter, callback, urls);
return counter; return counter;
} catch (Exception e) {
logger.error(" exception {}",e);
return null;
}
} }
/** /**
......
...@@ -44,10 +44,15 @@ public class MediaSelfSourceCrawler { ...@@ -44,10 +44,15 @@ public class MediaSelfSourceCrawler {
* @return * @return
* @throws Exception * @throws Exception
*/ */
public MultiThreadingCounter submitTask(MediaSelfSourceDataCallBack callback,String... urls) throws Exception { public MultiThreadingCounter submitTask(MediaSelfSourceDataCallBack callback,String... urls) {
try {
MultiThreadingCounter counter = new MultiThreadingCounter("任务======= ", 15,TimeUnit.MINUTES,true); MultiThreadingCounter counter = new MultiThreadingCounter("任务======= ", 15,TimeUnit.MINUTES,true);
start(counter, callback, urls); start(counter, callback, urls);
return counter; return counter;
} catch (Exception e) {
logger.error(" exception {}",e);
return null;
}
} }
/** /**
...@@ -60,16 +65,15 @@ public class MediaSelfSourceCrawler { ...@@ -60,16 +65,15 @@ public class MediaSelfSourceCrawler {
private void start(MultiThreadingCounter counter,MediaSelfSourceDataCallBack callback, String... urls) { private void start(MultiThreadingCounter counter,MediaSelfSourceDataCallBack callback, String... urls) {
if (urls != null && urls.length > 0) { if (urls != null && urls.length > 0) {
for (String url : urls) { for (String url : urls) {
counter.increase();
if (url != null) { if (url != null) {
try { try {
counter.increase();
search(counter, url, Attribution.of(url), callback); search(counter, url, Attribution.of(url), callback);
} catch (Exception e) { } catch (Exception e) {
logger.error("搜索创建出错", e); logger.error("搜索创建出错", e);
} finally {
counter.reduce();
} }
} }
counter.reduce();
} }
} }
} }
......
...@@ -32,25 +32,29 @@ public class SourceForwardCrawler { ...@@ -32,25 +32,29 @@ public class SourceForwardCrawler {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
private static List<String> sourceList = SourceData.getSourceList(); private static List<String> sourceList = SourceData.getSourceList();
public MultiThreadingCounter submitTask(SourceForwardDataCallBack callback,String... urls) throws Exception { public MultiThreadingCounter submitTask(SourceForwardDataCallBack callback,String... urls) {
try {
MultiThreadingCounter counter = new MultiThreadingCounter(15,TimeUnit.MINUTES,false); MultiThreadingCounter counter = new MultiThreadingCounter(15,TimeUnit.MINUTES,false);
start(counter, callback, urls); start(counter, callback, urls);
return counter; return counter;
} catch (Exception e) {
logger.error(" exception ", e);
return null;
}
} }
private void start(MultiThreadingCounter counter,SourceForwardDataCallBack callback, String... urls) { private void start(MultiThreadingCounter counter,SourceForwardDataCallBack callback, String... urls) {
if (urls != null && urls.length > 0) { if (urls != null && urls.length > 0) {
for (String url : urls) { for (String url : urls) {
counter.increase();
if (url != null) { if (url != null) {
try { try {
counter.increase();
search(counter, url, Attribution.of(url), callback); search(counter, url, Attribution.of(url), callback);
} catch (Exception e) { } catch (Exception e) {
logger.error("搜索创建出错", e); logger.error("搜索创建出错", e);
} finally {
counter.reduce();
} }
} }
counter.reduce();
} }
} }
} }
......
package com.zhiwei.source_forward.crawler; package com.zhiwei.source_forward.crawler;
import static java.util.Objects.nonNull;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -37,22 +39,29 @@ public class UrlLiveCrawler { ...@@ -37,22 +39,29 @@ public class UrlLiveCrawler {
private static final Logger logger = LogManager.getLogger(UrlLiveCrawler.class); private static final Logger logger = LogManager.getLogger(UrlLiveCrawler.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
public MultiThreadingCounter submitTask(UrlLiveDataCallback callback,String... urls) throws Exception { public MultiThreadingCounter submitTask(UrlLiveDataCallback callback,String... urls) {
try {
MultiThreadingCounter counter = new MultiThreadingCounter(10,TimeUnit.MINUTES,false); MultiThreadingCounter counter = new MultiThreadingCounter(10,TimeUnit.MINUTES,false);
start(counter, callback, urls); start(counter, callback, urls);
return counter; return counter;
} catch (Exception e) {
logger.error(" 判断链接是否删除 {} ",e);
return null;
}
} }
private void start(MultiThreadingCounter counter,UrlLiveDataCallback callback, String... urls) { private void start(MultiThreadingCounter counter,UrlLiveDataCallback callback, String... urls) {
if (urls != null && urls.length > 0) { if (nonNull(urls) && urls.length > 0) {
for (String url : urls) { for (String url : urls) {
if (url != null) { counter.increase();
if (nonNull(url)) {
try { try {
search(counter, url, Attribution.of(url,1), callback); search(counter, url, Attribution.of(url,1), callback);
} catch (Exception e) { } catch (Exception e) {
logger.error("搜索创建出错:", e); logger.error("搜索创建出错:", e);
} }
} }
counter.reduce();
} }
} }
} }
...@@ -83,7 +92,7 @@ public class UrlLiveCrawler { ...@@ -83,7 +92,7 @@ public class UrlLiveCrawler {
} }
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("解析出错", e); logger.error("解析出错 {}", e);
}finally { }finally {
if(response != null) { if(response != null) {
response.close(); response.close();
...@@ -103,7 +112,7 @@ public class UrlLiveCrawler { ...@@ -103,7 +112,7 @@ public class UrlLiveCrawler {
} }
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); logger.error(" 数据是否删除 采集出错 {} ",e);
}finally { }finally {
counter.reduce(); counter.reduce();
} }
......
...@@ -47,7 +47,7 @@ public class MediaSelfSource { ...@@ -47,7 +47,7 @@ public class MediaSelfSource {
}; };
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await(); crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); logger.error(" 网媒自媒体号 判断 {} ",e);
} }
return list; return list;
} }
......
...@@ -62,7 +62,6 @@ public class SourceForward { ...@@ -62,7 +62,6 @@ public class SourceForward {
dataMap.put(url, data); dataMap.put(url, data);
} }
} }
System.out.println("success");
return dataMap; return dataMap;
} }
...@@ -104,7 +103,7 @@ public class SourceForward { ...@@ -104,7 +103,7 @@ public class SourceForward {
}; };
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await(); crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); logger.error(" 来源判断 出错 {} ",e);
} }
return list; return list;
} }
......
...@@ -6,6 +6,9 @@ import java.util.List; ...@@ -6,6 +6,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.source_forward.bean.UrlLiveBean; import com.zhiwei.source_forward.bean.UrlLiveBean;
import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution; import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.source_forward.crawler.UrlLiveCrawler; import com.zhiwei.source_forward.crawler.UrlLiveCrawler;
...@@ -19,6 +22,8 @@ import com.zhiwei.source_forward.util.UrlLiveDataCallback; ...@@ -19,6 +22,8 @@ import com.zhiwei.source_forward.util.UrlLiveDataCallback;
*/ */
public class URLLive { public class URLLive {
private static Logger logger = LogManager.getLogger(URLLive.class);
/** /**
* @Title: verificationURLLive * @Title: verificationURLLive
* @author hero * @author hero
...@@ -93,7 +98,7 @@ public class URLLive { ...@@ -93,7 +98,7 @@ public class URLLive {
}; };
crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await(); crawler.submitTask(callback,urlList.toArray(new String[urlList.size()])).await();
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); logger.error(" 数据采集运行有问题 {} ", e);
} }
return list; return list;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment