Commit 39b30f08 by yangchen

无效链接传入处理

parent 554dd201
...@@ -22,6 +22,8 @@ import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution; ...@@ -22,6 +22,8 @@ import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.source_forward.util.UrlLiveDataCallback; import com.zhiwei.source_forward.util.UrlLiveDataCallback;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.Request;
/** /**
* *
* @ClassName UrlLiveCrawler * @ClassName UrlLiveCrawler
...@@ -65,28 +67,20 @@ public class UrlLiveCrawler { ...@@ -65,28 +67,20 @@ public class UrlLiveCrawler {
if(url.contains("www.toutiao.com")){ if(url.contains("www.toutiao.com")){
headers.put("referer", url); headers.put("referer", url);
} }
try {
Request request = RequestUtils.wrapGet(url, headers);
if(Objects.nonNull(request)) {
counter.add(); counter.add();
httpBoot.asyncCall(RequestUtils.wrapGet(url, headers), ProxyHolder.NAT_PROXY).whenComplete((rs,ex) -> { httpBoot.asyncCall(request, ProxyHolder.NAT_PROXY).whenComplete((rs,ex) -> {
try { try {
if (Objects.isNull(ex)) { if (Objects.isNull(ex)) {
if(rs.code() == 200) { if(rs.code() == 200) {
parseHtml(rs.body().string(), attr, callback,counter); parseHtml(rs.body().string(), attr, callback);
}else { }else {
if(attr.getCount() > 2) {
callBack(callback, attr, 1,String.valueOf(rs.code())); callBack(callback, attr, 1,String.valueOf(rs.code()));
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
} }
} else { } else {
if(attr.getCount() > 3) { callBack(callback, attr, 1,String.valueOf(rs.code()));
callBack(callback, attr, -1,null);
logger.info("搜索结果访问失败: {}", ex);
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
} }
} catch (Exception e) { } catch (Exception e) {
logger.error(" 数据是否删除 采集出错 {} ",e); logger.error(" 数据是否删除 采集出错 {} ",e);
...@@ -96,6 +90,11 @@ public class UrlLiveCrawler { ...@@ -96,6 +90,11 @@ public class UrlLiveCrawler {
}); });
return counter; return counter;
} }
} catch (Exception e2) {
logger.error("数据出错 {}" ,e2);
}
return counter;
}
private void callBack(UrlLiveDataCallback callback,Attribution attr,int i,String title) { private void callBack(UrlLiveDataCallback callback,Attribution attr,int i,String title) {
UrlLiveBean ulb = null; UrlLiveBean ulb = null;
...@@ -150,7 +149,7 @@ public class UrlLiveCrawler { ...@@ -150,7 +149,7 @@ public class UrlLiveCrawler {
* @param callback * @param callback
*/ */
private void parseHtml(String html, Attribution attr, private void parseHtml(String html, Attribution attr,
UrlLiveDataCallback callback,GroupSync counter) { UrlLiveDataCallback callback) {
if (callback == null) { if (callback == null) {
logger.warn("DataCallback 对象为 null,无法保存数据"); logger.warn("DataCallback 对象为 null,无法保存数据");
} else { } else {
...@@ -158,12 +157,7 @@ public class UrlLiveCrawler { ...@@ -158,12 +157,7 @@ public class UrlLiveCrawler {
if(Objects.nonNull(ulb)) { if(Objects.nonNull(ulb)) {
callback.onData(ulb, attr); callback.onData(ulb, attr);
}else { }else {
if(attr.getCount() > 3) {
callBack(callback, attr, -1,null); callBack(callback, attr, -1,null);
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
} }
} }
} }
...@@ -177,6 +171,7 @@ public class UrlLiveCrawler { ...@@ -177,6 +171,7 @@ public class UrlLiveCrawler {
* @return boolean 返回类型 * @return boolean 返回类型
*/ */
public UrlLiveBean matchDel(String result,Attribution attr,String url){ public UrlLiveBean matchDel(String result,Attribution attr,String url){
try {
Document doc = Jsoup.parse(result); Document doc = Jsoup.parse(result);
String title = null; String title = null;
if(url.contains("mp.weixin.qq.com") || url.contains("post.mp.qq.com")){ if(url.contains("mp.weixin.qq.com") || url.contains("post.mp.qq.com")){
...@@ -245,6 +240,9 @@ public class UrlLiveCrawler { ...@@ -245,6 +240,9 @@ public class UrlLiveCrawler {
} else { } else {
return null; return null;
} }
} catch (Exception e) {
return null;
}
} }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment