Commit 5988fd8b by cwy

知乎采集 代理 使用方式修改 升级版本

parent aacd8761
......@@ -16,8 +16,10 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
......@@ -57,7 +59,7 @@ public class ZhihuAnwserCrawlerParse {
* @return
* @throws Exception
*/
public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, Proxy proxy) throws Exception{
public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, ProxyHolder proxy) throws Exception{
try{
List<ZhihuAnswer> answerList = new ArrayList<>();
String questionId = getQuestionId(url);
......@@ -100,7 +102,7 @@ public class ZhihuAnwserCrawlerParse {
* @return
* @throws Exception
*/
private static String getNumberBoard(String url, Proxy proxy) throws Exception{
private static String getNumberBoard(String url, ProxyHolder proxy) throws Exception{
try{
String body = download(url, proxy);
Document document = Jsoup.parse(body);
......@@ -130,7 +132,7 @@ public class ZhihuAnwserCrawlerParse {
* @return
* @throws Exception
*/
public static Map<String,Object> getAnswerList(String url, int page, Date endDate, Proxy proxy) throws Exception{
public static Map<String,Object> getAnswerList(String url, int page, Date endDate, ProxyHolder proxy) throws Exception{
try{
String questionId = getQuestionId(url);
String bord = getNumberBoard(url, proxy);
......@@ -150,7 +152,7 @@ public class ZhihuAnwserCrawlerParse {
* @return
* @throws Exception
*/
private static Map<String,Object> analsis(String questionId, Date endDate, int page, String bord, Proxy proxy) throws Exception{
private static Map<String,Object> analsis(String questionId, Date endDate, int page, String bord, ProxyHolder proxy) throws Exception{
try{
boolean more = true;
List<ZhihuAnswer> answerList = new ArrayList<>();
......@@ -199,7 +201,7 @@ public class ZhihuAnwserCrawlerParse {
* @return
* @throws Exception
*/
private static String download(String url, Proxy proxy) throws Exception{
private static String download(String url, ProxyHolder proxy) throws Exception{
try(Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
return response.body().string();
}catch (Exception e){
......@@ -243,14 +245,15 @@ public class ZhihuAnwserCrawlerParse {
public static void main(String[] args){
// String url = "https://www.zhihu.com/question/288128510";
// Date endDate = TimeParse.stringFormartDate("2018-09-20 08:00:00");
// try{
// getAnswerList(url,endDate, null);
// }catch (Exception e){
// e.fillInStackTrace();
// }
getAnswerCount("https://www.zhihu.com/question/41539825", null);
String url = "https://www.zhihu.com/question/67992264";
Date endDate = TimeParse.stringFormartDate("2010-09-20 08:00:00");
try{
getAnswerList(url,endDate, null);
}catch (Exception e){
e.fillInStackTrace();
}
int count = getAnswerCount("https://www.zhihu.com/question/67992264", null);
System.out.println(count);
}
......
......@@ -391,7 +391,7 @@ public class DataCrawler {
* @return
* @throws Exception
*/
public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, Proxy proxy) throws Exception{
public static List<ZhihuAnswer> getAnswerList(String url, Date endDate, ProxyHolder proxy) throws Exception{
try{
return ZhihuAnwserCrawlerParse.getAnswerList(url,endDate, proxy);
}catch (Exception e){
......@@ -408,7 +408,7 @@ public class DataCrawler {
* @return
* @throws Exception
*/
public static Map<String,Object> getAnswerList(String url, int page, Date endDate, Proxy proxy) throws Exception{
public static Map<String,Object> getAnswerList(String url, int page, Date endDate, ProxyHolder proxy) throws Exception{
try{
return ZhihuAnwserCrawlerParse.getAnswerList(url,page,endDate, proxy);
}catch (Exception e){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment