Commit b6fe1572 by yangchen

微信正文获取修改

parent 87e9aaf3
......@@ -60,7 +60,7 @@ public class UrlLiveCrawler {
logger.info("当前处理 URL: {}", url);
Request request = HttpRequestBuilder.newGetRequest(url, null);
counter.increase();
HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false).addListeners(future -> {
HttpBoot.asyncCall(request, ProxyClientUtil.getNATProxy(), false,false).addListeners(future -> {
if (future.isSuccess()) {
Response response = future.result();
try {
......
......@@ -55,6 +55,15 @@ public class ContentMatch {
return dataList;
}
public static void main(String[] args) {
List<String> urlList = new ArrayList<>();
urlList.add("https://mp.weixin.qq.com/s?src=11&timestamp=1535449515&ver=1088&signature=9kByOydse2KaausR0FP5HoQpSeSXs097LR-akxhJxfCV*onfJuoWkznZ8UEk5OfFox4aVzDqx0n0xwbtTm6KUzPpNz2desfNiQ4Uevp4LaTSyoH3OKysG2qxy2jisojb&new=1");
List<ContentBean> l = getContentMatch(urlList);
for(ContentBean cb : l) {
System.out.println(cb.toString());
}
}
static class ContentMatchCrawlerThread extends Thread{
private static List<ContentBean> getContentMatch(List<String> urlList){
......
......@@ -29,8 +29,12 @@ public class MatchContent {
public static String matchContent(String url,String html) {
String content = null;
try {
Document document = Jsoup.parse(html);
content = mathchContent(html, document);
Document document = Jsoup.parse(html);
if(url.contains("weixin.qq.com")) {
content = matchContentWeixin(document);
}else {
content = mathchContent(html, document);
}
return content;
} catch (Exception e) {
logger.debug("获取全文失败",e.fillInStackTrace());
......@@ -39,8 +43,18 @@ public class MatchContent {
return content;
}
/**
*
* @Description 微信文本获取
* @param html
* @return
*/
private static String matchContentWeixin(Document document) {
return document.select("div.rich_media_content").text();
}
/**
* @Title: mathchContent
* @author hero
* @Description: 匹配正文数据
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment