Commit a3b1bf35 by chenweiyang

爬虫核心包升级 , 版本升级 0.2.5

parent d7dce3fc
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId> <artifactId>articlenewscrawler</artifactId>
<version>0.2.4-SNAPSHOT</version> <version>0.2.5-SNAPSHOT</version>
<name>articlenewscrawler</name> <name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description> <description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.6.0.1-SNAPSHOT</version> <version>0.6.6.3-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -9,8 +9,8 @@ import org.slf4j.Logger; ...@@ -9,8 +9,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.FormBody; import okhttp3.FormBody;
import okhttp3.Headers; import okhttp3.Headers;
......
...@@ -11,8 +11,8 @@ import org.slf4j.Logger; ...@@ -11,8 +11,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.AikaCommentAnalysis; import com.zhiwei.parse.analysis.AikaCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -11,8 +11,8 @@ import org.slf4j.LoggerFactory; ...@@ -11,8 +11,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.AiqiyiByWordAnalysis; import com.zhiwei.parse.analysis.AiqiyiByWordAnalysis;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -14,12 +14,11 @@ import org.slf4j.LoggerFactory; ...@@ -14,12 +14,11 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BaijiaAccountAnalysis; import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Baijia { public class Baijia {
......
...@@ -10,8 +10,7 @@ import java.util.Map; ...@@ -10,8 +10,7 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis; import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -28,6 +27,7 @@ public class BiliBili { ...@@ -28,6 +27,7 @@ public class BiliBili {
try { try {
// //
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=" + type +"&duration=0&tids_1=0"; String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=" + type +"&duration=0&tids_1=0";
// url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&order=pubdate&duration=0&tids_1=181";
System.out.println(url); System.out.println(url);
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com"); Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header); String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header);
......
...@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory; ...@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -11,7 +11,7 @@ import org.slf4j.Logger; ...@@ -11,7 +11,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DayuAccountAnalysis; import com.zhiwei.parse.analysis.DayuAccountAnalysis;
......
...@@ -15,8 +15,8 @@ import org.slf4j.Logger; ...@@ -15,8 +15,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.DoubanCommentAnalysis; import com.zhiwei.parse.analysis.DoubanCommentAnalysis;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -87,7 +87,9 @@ public class Douban { ...@@ -87,7 +87,9 @@ public class Douban {
} }
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
page++; page++;
if(page > 40) {
break;
}
} catch (Exception e) { } catch (Exception e) {
more = false; more = false;
logger.error("豆瓣 topic 采集出错 {}",e); logger.error("豆瓣 topic 采集出错 {}",e);
......
...@@ -10,7 +10,7 @@ import java.util.Map; ...@@ -10,7 +10,7 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.FenghuangAccountAnalysis; import com.zhiwei.parse.analysis.FenghuangAccountAnalysis;
......
...@@ -10,7 +10,7 @@ import org.slf4j.Logger; ...@@ -10,7 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.GftaiAnalysis; import com.zhiwei.parse.analysis.GftaiAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -15,8 +15,8 @@ import org.slf4j.Logger; ...@@ -15,8 +15,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -11,7 +11,7 @@ import org.slf4j.Logger; ...@@ -11,7 +11,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.KuaiTousuAnalysis; import com.zhiwei.parse.analysis.KuaiTousuAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory; ...@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis; import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.PcautoCommentAnalysis; import com.zhiwei.parse.analysis.PcautoCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory; ...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.QQkbUser; import com.zhiwei.bean.QQkbUser;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis; import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
......
...@@ -7,8 +7,8 @@ import org.slf4j.LoggerFactory; ...@@ -7,8 +7,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -9,8 +9,8 @@ import org.slf4j.Logger; ...@@ -9,8 +9,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis; import com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.SinaKejiCommentAnalysis; import com.zhiwei.parse.analysis.SinaKejiCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -11,8 +11,8 @@ import org.slf4j.Logger; ...@@ -11,8 +11,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.SinaTousuAnalysis; import com.zhiwei.parse.analysis.SinaTousuAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -10,7 +10,7 @@ import org.slf4j.LoggerFactory; ...@@ -10,7 +10,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SouhuAccountAnalysis; import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.TXNewsByWordAnalysis; import com.zhiwei.parse.analysis.TXNewsByWordAnalysis;
......
...@@ -12,8 +12,8 @@ import org.slf4j.Logger; ...@@ -12,8 +12,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.TechTxCommentAnalysis; import com.zhiwei.parse.analysis.TechTxCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory; ...@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Travel315 { public class Travel315 {
......
...@@ -12,8 +12,8 @@ import org.slf4j.LoggerFactory; ...@@ -12,8 +12,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.Ts21cnAnalysis; import com.zhiwei.parse.analysis.Ts21cnAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -6,6 +6,7 @@ import java.util.Collections; ...@@ -6,6 +6,7 @@ import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -14,8 +15,8 @@ import org.slf4j.LoggerFactory; ...@@ -14,8 +15,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.WangyiCommentAnalysis; import com.zhiwei.parse.analysis.WangyiCommentAnalysis;
...@@ -151,42 +152,51 @@ public class Wangyi { ...@@ -151,42 +152,51 @@ public class Wangyi {
while(true) { while(true) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet("https://c.m.163.com/nc/subscribe/list/"+id+"/all/"+page+"-20.html"), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet("https://c.m.163.com/nc/subscribe/list/"+id+"/all/"+page+"-20.html"), proxy)){
String result = response.body().string(); String result = response.body().string();
JSONObject json = JSONObject.parseObject(result); if(Objects.nonNull(result) && result.contains("tab_list")) {
JSONArray jsonArray = json.getJSONArray("tab_list"); JSONObject json = JSONObject.parseObject(result);
for(int i = 0,j = jsonArray.size();i < j;i++) { JSONArray jsonArray = json.getJSONArray("tab_list");
JSONObject data = jsonArray.getJSONObject(i); for(int i = 0,j = jsonArray.size();i < j;i++) {
Map<String,Object> map = new HashMap<>(); JSONObject data = jsonArray.getJSONObject(i);
String time = data.getString("ptime"); Map<String,Object> map = new HashMap<>();
if(endTime != null && endTime.length() > 1) { String time = data.getString("ptime");
System.out.println(time); if(endTime != null && endTime.length() > 1) {
if(time.compareTo(endTime) <= 0) { if(time.compareTo(endTime) <= 0) {
logger.info("超时时间采集范围 跳出采集"); logger.info("超时时间采集范围 跳出采集");
return dataList; return dataList;
}
}
map.put("title", data.getString("title"));
map.put("content", data.getString("aheadBody"));
map.put("time", time);
map.put("source", data.getString("source"));
if("video".equals(data.getString("skipType"))) {
map.put("url", "https://c.m.163.com/news/v/" + data.getString("skipID") + ".html");
}else {
map.put("url", "https://c.m.163.com/news/a/" + data.getString("postid") + ".html");
} }
errorNum = 1;
dataList.add(map);
} }
map.put("title", data.getString("title")); logger.info("id = {} , cralwer count = {}",id,dataList.size() );
map.put("content", data.getString("aheadBody")); page += 20;
map.put("time", time); if(jsonArray.size() < 10) {
map.put("source", data.getString("source")); errorNum ++;
if("video".equals(data.getString("skipType"))) { page -= 20;
map.put("url", "https://c.m.163.com/news/v/" + data.getString("skipID") + ".html");
}else {
map.put("url", "https://c.m.163.com/news/a/" + data.getString("postid") + ".html");
} }
// System.out.println(map.toString()); errorNum ++;
dataList.add(map); ZhiWeiTools.sleep(1000);
} }else {
logger.info("id = {} , cralwer count = {}",id,dataList.size() ); errorNum ++;
page += 20; ZhiWeiTools.sleep(2000);
if(jsonArray.size() < 10) {
break;
} }
} catch (Exception e) { } catch (Exception e) {
logger.info("采集数据出错 {}",e); logger.info("采集数据出错 {}",e);
ZhiWeiTools.sleep(1000);
errorNum++; errorNum++;
if(errorNum > 3) { }
break; System.out.println(errorNum);
} if(errorNum > 10) {
break;
} }
} }
return dataList; return dataList;
......
...@@ -12,8 +12,8 @@ import org.slf4j.LoggerFactory; ...@@ -12,8 +12,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -16,8 +16,8 @@ import org.slf4j.LoggerFactory; ...@@ -16,8 +16,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis; import com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -3,7 +3,6 @@ package com.zhiwei.parse; ...@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
......
...@@ -14,8 +14,8 @@ import org.slf4j.LoggerFactory; ...@@ -14,8 +14,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -16,8 +16,8 @@ import org.slf4j.LoggerFactory; ...@@ -16,8 +16,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.YidianzixunAccountAnalysis; import com.zhiwei.parse.analysis.YidianzixunAccountAnalysis;
......
...@@ -14,9 +14,8 @@ import org.slf4j.LoggerFactory; ...@@ -14,9 +14,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory; ...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory; ...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -11,8 +11,8 @@ import org.slf4j.LoggerFactory; ...@@ -11,8 +11,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
......
...@@ -10,8 +10,8 @@ import org.slf4j.LoggerFactory; ...@@ -10,8 +10,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
......
...@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory; ...@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -15,7 +15,7 @@ import org.slf4j.Logger; ...@@ -15,7 +15,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
package com.zhiwei; package com.zhiwei;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Response; import okhttp3.Response;
......
...@@ -6,18 +6,18 @@ ...@@ -6,18 +6,18 @@
// //
//import org.junit.Test; //import org.junit.Test;
// //
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder; //import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Baijia; //import com.zhiwei.parse.Baijia;
//import com.zhiwei.proxy.config.SimpleConfig;
// //
//public class BaijiaAccountExample { //public class BaijiaAccountExample {
// //
// @Test // @Test
// public void test3() { // public void test3() {
// // , "local", GroupType.PROVIDER , 10000002L // // , "local", GroupType.PROVIDER , 10000002L
// ProxyFactory.init(SimpleConfig.builder().registry("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181").appName("actool").appId(10000002).group("local").build()); // ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// String path = "D://crawlerdata//自媒体/百家号采集.xlsx"; // String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String startTime = "2018-05-01 00:00:00"; // String startTime = "2018-05-01 00:00:00";
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
//// for(Map<String,Object> m : list) { //// for(Map<String,Object> m : list) {
//// try { //// try {
//// String app_id = m.get("id").toString(); //// String app_id = m.get("id").toString();
// String app_id = "1565848819560927"; // String app_id = "1600247961135097";
// String cookie = "BAIDUID=A46414BD701A3738E17E0212A6C2FEEA:FG=1; Hmery-Time=2269711404; BIDUPSID=A46414BD701A3738E17E0212A6C2FEEA; PSTM=1583375258; delPer=0; H_PS_PSSID=30972_1439_21095_30839_30998_30823; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598"; // String cookie = "Hmery-Time=2853223123; BAIDUID=1BB712AF278D7B04E2A29BEAD50F9731:FG=1; BIDUPSID=1BB712AF278D7B04E2A29BEAD50F9731; PSTM=1589798969; H_PS_PSSID=31622_1431_31670_21083_31592_31270_31661_31463_31322_30823_26350; delPer=0; PSINO=5; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598";
// System.out.println(app_id); // System.out.println(app_id);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id, "聚富财经", startTime, cookie, ProxyHolder.NAT_HEAVY_PROXY); // List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id, "聚富财经", startTime, cookie, ProxyHolder.NAT_HEAVY_PROXY);
// if(lists != null) { // if(lists != null) {
......
//package com.zhiwei.keyword; //package com.zhiwei.keyword;
// //
//import org.testng.annotations.Test; //import java.util.Arrays;
//import java.util.List;
//import java.util.Map;
// //
//import org.junit.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Douban; //import com.zhiwei.parse.Douban;
// //
//public class DoubanTopicTest { //public class DoubanTopicTest {
//
// @Test // @Test
// public void f() { // public void f() {
// String word = "胡歌"; // String word = "龙岭迷窟";
// String cookie = "bid=rymxzs5aojg; ps=y; ll=\"118173\"; __utmc=30149280; dbcl2=\"188038058:9IHyVcSobVc\"; ck=_RvF; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18803; douban-fav-remind=1; __yadk_uid=qLflXyj3R14ro9e0cLoZOQlJoMGVN32j; douban-profile-remind=1; _vwo_uuid_v2=D85F60C118B0AF465035D9CC7BBFDA7A6|4bf255e1e3a2e9aeede3708192f5f1bc; __utmz=30149280.1543564973.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1543908324%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPk19bxnhsVWwfnrcnwT0PquON7D1JpLsbfSu9rRowalyi4pOeM3rMHKFaJo9jJF7%26wd%3D%26eqid%3De7f262650001ef98000000045c00e64f%22%5D; _pk_ses.100001.8cb4=*; ap_v=0,6.0; __utma=30149280.824403997.1543559458.1543885946.1543908324.10; __utmt=1; _pk_id.100001.8cb4=6828fef49f6bcf34.1543559455.9.1543908331.1543885945.; __utmb=30149280.9.7.1543908324"; // String cookie = "bid=aBxxYnlXock; douban-fav-remind=1; __yadk_uid=EESpKyPIvsPyVjjZ0DHerpLWEN1MLWPy; __gads=ID=9649e00ec2851223:T=1581324126:S=ALNI_MYZ0PbvzjuNKmREEpTjdvrO3x0v7w; __utmv=30149280.18803; gr_user_id=e12baa3e-7aac-44db-94a1-02c271013afd; _vwo_uuid_v2=DB1A0279AE19A1334CC41E68E320575C2|6b1aa2a313e7198665f2fdbffbd75e5c; ll=\"118173\"; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1591783934%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3Dgjo37j0pVkJmnvxzwAnyq_YEYm3AtwuzNd1W6b0NUckARLYf_cNyMNEgrID_RRaC%26wd%3D%26eqid%3Dcaecc1b00009cacb000000045ee0b1fa%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.617345388.1581324126.1590475537.1591783934.12; __utmc=30149280; __utmz=30149280.1591783934.12.6.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; dbcl2=\"188038058:bRn4JD+366s\"; ck=CqCI; push_noty_num=0; push_doumail_num=0; __utmt=1; _pk_id.100001.8cb4=4ef748b665aff851.1581324124.11.1591785082.1590475562.; __utmb=30149280.19.10.1591783934; ct=y";
// String time = "2018-11-16 00:00:00"; // String time = "2010-11-16 00:00:00";
// String path = "E:\\crawlerdata\\豆瓣.xlsx";
// //
// Douban.doubanTopicGetByWord(word, null, cookie,time); // List<Map<String, Object>> bodyList = Douban.doubanTopicGetByWord(word, null, cookie,time);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel(path, "result", Arrays.asList("_id", "title", "group", "time", "reply_count"), bodyList);
// //
// } // }
//} //}
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
// for (String word : wordList) { // for (String word : wordList) {
//// pubdate 时间 totalrank 综合 //// pubdate 时间 totalrank 综合
// List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00", // List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
// cookie, "pubdate"); // cookie, "click");
// if (dataList != null) { // if (dataList != null) {
// System.out.println(word + " ----- " + dataList.size()); // System.out.println(word + " ----- " + dataList.size());
// bodyList.addAll(dataList); // bodyList.addAll(dataList);
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
// headlist.add("url"); // headlist.add("url");
// headlist.add("word"); // headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-腾讯会议-time-20200218.xlsx", "B站数据", headlist, bodyList); // poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-20200515.xlsx", "B站数据", headlist, bodyList);
// //
// } // }
//} //}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment