Commit 39b30f08 by yangchen

无效链接传入处理

parent 554dd201
......@@ -22,6 +22,8 @@ import com.zhiwei.source_forward.bean.UrlLiveBean.Attribution;
import com.zhiwei.source_forward.util.UrlLiveDataCallback;
import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.Request;
/**
*
* @ClassName UrlLiveCrawler
......@@ -65,28 +67,20 @@ public class UrlLiveCrawler {
if(url.contains("www.toutiao.com")){
headers.put("referer", url);
}
try {
Request request = RequestUtils.wrapGet(url, headers);
if(Objects.nonNull(request)) {
counter.add();
httpBoot.asyncCall(RequestUtils.wrapGet(url, headers), ProxyHolder.NAT_PROXY).whenComplete((rs,ex) -> {
httpBoot.asyncCall(request, ProxyHolder.NAT_PROXY).whenComplete((rs,ex) -> {
try {
if (Objects.isNull(ex)) {
if(rs.code() == 200) {
parseHtml(rs.body().string(), attr, callback,counter);
parseHtml(rs.body().string(), attr, callback);
}else {
if(attr.getCount() > 2) {
callBack(callback, attr, 1,String.valueOf(rs.code()));
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
}
} else {
if(attr.getCount() > 3) {
callBack(callback, attr, -1,null);
logger.info("搜索结果访问失败: {}", ex);
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
callBack(callback, attr, 1,String.valueOf(rs.code()));
}
} catch (Exception e) {
logger.error(" 数据是否删除 采集出错 {} ",e);
......@@ -96,6 +90,11 @@ public class UrlLiveCrawler {
});
return counter;
}
} catch (Exception e2) {
logger.error("数据出错 {}" ,e2);
}
return counter;
}
private void callBack(UrlLiveDataCallback callback,Attribution attr,int i,String title) {
UrlLiveBean ulb = null;
......@@ -150,7 +149,7 @@ public class UrlLiveCrawler {
* @param callback
*/
private void parseHtml(String html, Attribution attr,
UrlLiveDataCallback callback,GroupSync counter) {
UrlLiveDataCallback callback) {
if (callback == null) {
logger.warn("DataCallback 对象为 null,无法保存数据");
} else {
......@@ -158,12 +157,7 @@ public class UrlLiveCrawler {
if(Objects.nonNull(ulb)) {
callback.onData(ulb, attr);
}else {
if(attr.getCount() > 3) {
callBack(callback, attr, -1,null);
}else {
attr.AddCount();
search(counter, attr.getAttr().toString(), attr, callback);
}
}
}
}
......@@ -177,6 +171,7 @@ public class UrlLiveCrawler {
* @return boolean 返回类型
*/
public UrlLiveBean matchDel(String result,Attribution attr,String url){
try {
Document doc = Jsoup.parse(result);
String title = null;
if(url.contains("mp.weixin.qq.com") || url.contains("post.mp.qq.com")){
......@@ -245,6 +240,9 @@ public class UrlLiveCrawler {
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment