Commit fd678cf3 by yangchen

知乎回答数获取增加

parent 040405fc
package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.Proxy;
import java.util.*;
import okhttp3.Response;
/**
* 知乎评论采集
......@@ -21,7 +28,27 @@ import java.util.*;
public class ZhihuAnwserCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LoggerFactory.getLogger(ZhihuAnwserCrawlerParse.class);
/**
*
* @Description 知乎回答数获取
* @param url
* @param proxy
* @return
*/
public static int getAnswerCount(String url,Proxy proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string();
String contntent = result.split("itemProp=\"answerCount\" content=\"")[1].split("\"")[0];
return Integer.parseInt(contntent);
} catch (Exception e) {
logger.error("数据解析错误");
}
return -1;
}
/**
* 知乎回答采集
* @param url
......@@ -216,13 +243,14 @@ public class ZhihuAnwserCrawlerParse {
public static void main(String[] args){
String url = "https://www.zhihu.com/question/288128510";
Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00");
try{
getAnswerList(url,endDate, null);
}catch (Exception e){
e.fillInStackTrace();
}
// String url = "https://www.zhihu.com/question/288128510";
// Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00");
// try{
// getAnswerList(url,endDate, null);
// }catch (Exception e){
// e.fillInStackTrace();
// }
getAnswerCount("https://www.zhihu.com/question/41539825", null);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment