Commit fd678cf3 by yangchen

知乎回答数获取增加

parent 040405fc
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer; import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.*;
/** /**
* 知乎评论采集 * 知乎评论采集
...@@ -21,7 +28,27 @@ import java.util.*; ...@@ -21,7 +28,27 @@ import java.util.*;
public class ZhihuAnwserCrawlerParse { public class ZhihuAnwserCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
private static Logger logger = LoggerFactory.getLogger(ZhihuAnwserCrawlerParse.class);
/**
*
* @Description 知乎回答数获取
* @param url
* @param proxy
* @return
*/
public static int getAnswerCount(String url,Proxy proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string();
String contntent = result.split("itemProp=\"answerCount\" content=\"")[1].split("\"")[0];
return Integer.parseInt(contntent);
} catch (Exception e) {
logger.error("数据解析错误");
}
return -1;
}
/** /**
* 知乎回答采集 * 知乎回答采集
* @param url * @param url
...@@ -216,13 +243,14 @@ public class ZhihuAnwserCrawlerParse { ...@@ -216,13 +243,14 @@ public class ZhihuAnwserCrawlerParse {
public static void main(String[] args){ public static void main(String[] args){
String url = "https://www.zhihu.com/question/288128510"; // String url = "https://www.zhihu.com/question/288128510";
Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00"); // Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00");
try{ // try{
getAnswerList(url,endDate, null); // getAnswerList(url,endDate, null);
}catch (Exception e){ // }catch (Exception e){
e.fillInStackTrace(); // e.fillInStackTrace();
} // }
getAnswerCount("https://www.zhihu.com/question/41539825", null);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment