Commit d7dce3fc by cwy

本地提交

parent 3fdd0d2c
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId>
<version>0.2.3-SNAPSHOT</version>
<version>0.2.4-SNAPSHOT</version>
<name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
......@@ -21,7 +21,7 @@
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.5.5.6-SNAPSHOT</version>
<version>0.6.0.1-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
</dependencies>
......
......@@ -77,9 +77,10 @@ public class Baijia {
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getBaijiaAccountByBaiduData(String appId, String name, String startTime,
String cookie, ProxyHolder proxy) {
Map<String,String> headerMap = HeaderTool.getCommonHead();
Map<String,String> headerMap = new HashMap<>();
List<Map<String,Object>> dataList = new ArrayList<>();
headerMap.put("cookie",cookie);
headerMap.put("Host", "mbd.baidu.com");
String uk = getUkData(appId,proxy,cookie);
if(Objects.isNull(uk)) {
return Collections.emptyList();
......@@ -90,13 +91,14 @@ public class Baijia {
for(int i = 1;i < 3;i++) {
try {
String url = "https://mbd.baidu.com/webpage?tab=article&num=10&uk="+uk+"&ctime="+ctime+"&type=newhome&action=dynamic&format=json";
logger.info("ctime = {} url === {}", ctime, url);
String result = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
Map<String,Object> dMap = baijiaAccountAnalysis.getBaijiaAccountData3(result,name, startTime);
List<Map<String,Object>> dList = (List<Map<String, Object>>) dMap.get("data");
if(Objects.nonNull(dList))
dataList.addAll(dList);
logger.info("{} 数据采集结果 {}",appId, dataList.size());
if(!(boolean) dMap.get("more")) {
if(!(boolean) dMap.get("more") || ctime.equals(String.valueOf(String.valueOf(dMap.get("ctime"))))) {
f = false;
}
ctime = String.valueOf(dMap.get("ctime"));
......
......@@ -23,11 +23,11 @@ public class BiliBili {
private static final Logger logger = LoggerFactory.getLogger(BiliBili.class);
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getData(String word,Proxy proxy,String endTime,String cookie) {
public static List<Map<String,Object>> getData(String word,Proxy proxy,String endTime,String cookie, String type) {
List<Map<String,Object>> bodyList = new ArrayList<>();
try {
//
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=click&duration=0&tids_1=0";
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=" + type +"&duration=0&tids_1=0";
System.out.println(url);
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header);
......
......@@ -16,9 +16,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.httpclient.HeadGet;
......@@ -141,11 +139,11 @@ public class Maimai {
return Collections.emptyMap();
}
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER, 10000002L);
String url = "https://maimai.cn/web/feed_detail?fid=1353566056&efid=QTa45Y1e-oQzyn1dZ5ozlQ";
System.out.println(getMaiaiCount2(url, ProxyHolder.NAT_HEAVY_PROXY));
}
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER, 10000002L);
// String url = "https://maimai.cn/web/feed_detail?fid=1353566056&efid=QTa45Y1e-oQzyn1dZ5ozlQ";
// System.out.println(getMaiaiCount2(url, ProxyHolder.NAT_HEAVY_PROXY));
// }
/**
* https://maimai.cn/web/feed_detail?fid=1304191535&efid=0CQbJXhoYLXdC87NFIkRMA
......
......@@ -165,12 +165,12 @@ public class Xueqiu {
List<Map<String, Object>> resultList = new ArrayList<>();
int page = 1;
boolean next = true;
int errCount = 1;
while(next) {
for(int j = 0; j < 4; j++) {
try {
String url = "https://xueqiu.com/v4/statuses/user_timeline.json?page=" + page + "&user_id=" + uid + "&type=0";
logger.info("重试次数:{},第{}页,JSON地址为:{}", j, page, url);
logger.info("第{}页,JSON地址为:{}", page, url);
JSONObject json = getJson(url, cookie);//获取json数据
JSONArray jsonArray = json.getJSONArray("statuses");
......@@ -219,16 +219,16 @@ public class Xueqiu {
boolean flag = page < maxPag;//当前页数小于最大页数时,翻页
if(flag) {
page ++;//页数+1
} else {
next = false;
errCount = 1;
}
}
break;
errCount ++;
} catch (Exception e) {
logger.error("解析JSON出错 ", e);
errCount++;
}
}
if(errCount > 4)
break;
}
return resultList;
......
......@@ -63,14 +63,12 @@ public class BaijiaAccountAnalysis {
more = true;
rmap.put("ctime", json.getJSONObject("data").getJSONObject("query").getString("ctime"));
}
// String name = json.getJSONObject("data").getJSONObject("author").getString("display_name");
for(int i = 0;i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<>();
JSONObject data = jsonArry.getJSONObject(i).getJSONObject("itemData");
String id = data.getString("article_id");
int t = data.getInteger("updated_at");
String time = TimeParse.dateFormartString(new Date(t*1000L), "yyyy-MM-dd HH:mm:ss");
System.out.println(time);
if(startTime != null && startTime.length() > 1 && time.compareTo(startTime) < 1) {
more = false;
continue;
......
......@@ -40,6 +40,7 @@ public class QQTV {
List<Map<String,Object>> dataList = new ArrayList<>();
try {
String url = "https://v.qq.com/x/search/?ses=qid%3DdKzxiFfC7NqpC6z2jq4m-KGeQjb_Th556Yrz24cQaZo1MUTw2PK4XA%26last_query%3D%E7%BE%8E%E5%9B%A2%26tabid_list%3D0%7C1%7C5%7C13%7C11%7C7%7C2%7C3%7C4%7C6%7C12%7C21%7C14%7C17%7C8%7C15%7C20%26tabname_list%3D%E5%85%A8%E9%83%A8%7C%E7%94%B5%E5%BD%B1%7C%E9%9F%B3%E4%B9%90%7C%E8%B4%A2%E7%BB%8F%7C%E6%96%B0%E9%97%BB%7C%E5%85%B6%E4%BB%96%7C%E7%94%B5%E8%A7%86%E5%89%A7%7C%E7%BB%BC%E8%89%BA%7C%E5%8A%A8%E6%BC%AB%7C%E7%BA%AA%E5%BD%95%E7%89%87%7C%E5%A8%B1%E4%B9%90%7C%E6%B1%BD%E8%BD%A6%7C%E4%BD%93%E8%82%B2%7C%E6%B8%B8%E6%88%8F%7C%E5%8E%9F%E5%88%9B%7C%E6%95%99%E8%82%B2%7C%E6%AF%8D%E5%A9%B4%26resolution_tabid_list%3D0%7C1%7C2%7C3%7C4%7C5%26resolution_tabname_list%3D%E5%85%A8%E9%83%A8%7C%E6%A0%87%E6%B8%85%7C%E9%AB%98%E6%B8%85%7C%E8%B6%85%E6%B8%85%7C%E8%93%9D%E5%85%89%7CVR&q="+URLEncoder.encode(word, "UTF-8")+"&stag=4&filter=sort%3D1%26pubfilter%3D0%26duration%3D0%26tabid%3D0%26resolution%3D0&cur=";
System.out.println(url);
int page = 1;
while(true) {
int count = dataList.size();
......
package com.zhiwei;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.TechTx;
public class CrawlerTest {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
String cookie = "bid=rymxzs5aojg; ps=y; ll=\"118173\"; __utmc=30149280; dbcl2=\"188038058:9IHyVcSobVc\"; ck=_RvF; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18803; douban-fav-remind=1; __yadk_uid=qLflXyj3R14ro9e0cLoZOQlJoMGVN32j; douban-profile-remind=1; _vwo_uuid_v2=D85F60C118B0AF465035D9CC7BBFDA7A6|4bf255e1e3a2e9aeede3708192f5f1bc; __utmz=30149280.1543564973.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1543823236%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPk19bxnhsVWwfnrcnwT0PquON7D1JpLsbfSu9rRowalyi4pOeM3rMHKFaJo9jJF7%26wd%3D%26eqid%3De7f262650001ef98000000045c00e64f%22%5D; _pk_id.100001.8cb4=6828fef49f6bcf34.1543559455.5.1543823236.1543820463.; _pk_ses.100001.8cb4=*; __utma=30149280.824403997.1543559458.1543818802.1543823236.6; __utmt=1; __utmb=30149280.5.7.1543823236";
String url = "E2S95LEA0008856R";
TechTx.getTechTxComment(url, null);
}
}
//package com.zhiwei;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.parse.TechTx;
//
//public class CrawlerTest {
//
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// String cookie = "bid=rymxzs5aojg; ps=y; ll=\"118173\"; __utmc=30149280; dbcl2=\"188038058:9IHyVcSobVc\"; ck=_RvF; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18803; douban-fav-remind=1; __yadk_uid=qLflXyj3R14ro9e0cLoZOQlJoMGVN32j; douban-profile-remind=1; _vwo_uuid_v2=D85F60C118B0AF465035D9CC7BBFDA7A6|4bf255e1e3a2e9aeede3708192f5f1bc; __utmz=30149280.1543564973.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1543823236%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPk19bxnhsVWwfnrcnwT0PquON7D1JpLsbfSu9rRowalyi4pOeM3rMHKFaJo9jJF7%26wd%3D%26eqid%3De7f262650001ef98000000045c00e64f%22%5D; _pk_id.100001.8cb4=6828fef49f6bcf34.1543559455.5.1543823236.1543820463.; _pk_ses.100001.8cb4=*; __utma=30149280.824403997.1543559458.1543818802.1543823236.6; __utmt=1; __utmb=30149280.5.7.1543823236";
// String url = "E2S95LEA0008856R";
// TechTx.getTechTxComment(url, null);
// }
//
//}
package com.zhiwei.hsitory;
import java.util.List;
import java.util.Map;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.BTime;
public class BTimeHistoryExample {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<Map<String, Object>> dataList = BTime.getHistoryData("1608238", 0L);
// dataList.forEach(System.out::println);
}
}
//package com.zhiwei.hsitory;
//
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.parse.BTime;
//
//public class BTimeHistoryExample {
//
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<Map<String, Object>> dataList = BTime.getHistoryData("1608238", 0L);
//// dataList.forEach(System.out::println);
// }
//
//}
......@@ -6,37 +6,38 @@
//
//import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Baijia;
//import com.zhiwei.proxy.config.SimpleConfig;
//
//public class BaijiaAccountExample {
//
// @Test
// public void test3() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// // , "local", GroupType.PROVIDER , 10000002L
// ProxyFactory.init(SimpleConfig.builder().registry("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181").appName("actool").appId(10000002).group("local").build());
// String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String startTime = "2018-05-01 00:00:00";
// Map<String,Object> map = poi.importExcel(path, 0);
// List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
//// Map<String,Object> map = poi.importExcel(path, 0);
//// List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(Map<String,Object> m : list) {
// try {
// String app_id = m.get("id").toString();
// app_id = "1602674438508810";
// String cookie = "BAIDUID=7D453C932433A93F7AD1F3B8ABC8B0E1:FG=1; BIDUPSID=7D453C932433A93F7AD1F3B8ABC8B0E1; PSTM=1570766401; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=eH-OJeCmH6VwoRJwCdmehrB7leKK0gOTHllvCh8hmwLadLIVJeC6EG0Ptf8g0KubFTPRogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJkD_I_hJKt3qn7I5KToh4Athxob2bbXHDo-LIvHWT6cOR5JhfA-3R-e046f3-3L5CbH5D3s5lvvhb3O3M7ShbKdMa732RbrKCnraxQF5l8-sq0x0bOte-bQypoa0q3TLDOMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjISKx-_J5LJJxK; H_PS_PSSID=1442_21103_29567_29699_29220_22158; delPer=0; PSINO=5; ZD_ENTRY=baidu; yjs_js_security_passport=9687699d4b0965c0be1e6e312fc59ff5cf3d03a2_1571106914_js; Hmery-Time=1215393878";
//// for(Map<String,Object> m : list) {
//// try {
//// String app_id = m.get("id").toString();
// String app_id = "1565848819560927";
// String cookie = "BAIDUID=A46414BD701A3738E17E0212A6C2FEEA:FG=1; Hmery-Time=2269711404; BIDUPSID=A46414BD701A3738E17E0212A6C2FEEA; PSTM=1583375258; delPer=0; H_PS_PSSID=30972_1439_21095_30839_30998_30823; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598";
// System.out.println(app_id);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,"聚富财经", startTime,cookie, ProxyHolder.NAT_HEAVY_PROXY);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id, "聚富财经", startTime, cookie, ProxyHolder.NAT_HEAVY_PROXY);
// if(lists != null) {
// bodyList.addAll(lists);
// }
// break;
// } catch (Exception e) {
// }
// }
//// break;
//// } catch (Exception e) {
//// }
//// }
// List<String> headList = new ArrayList<String>();
// headList.add("title");
// headList.add("time");
......@@ -44,7 +45,7 @@
// headList.add("url");
// headList.add("content");
// headList.add("read_amount");
// poi.exportExcel("D://crawlerdata//历史文章采集/百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
// poi.exportExcel("E://crawlerdata//历史百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
// }
//
//}
package com.zhiwei.hsitory;
import java.util.List;
import java.util.Map;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.KuaiData;
public class KuaiDataHistoryExample {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<Map<String, Object>> dataList = KuaiData.getArticleHistory("5c19954ccb14fabc153971e3f924bf36", "2686798288", 0L);
// dataList.forEach(System.out::println);
}
}
//package com.zhiwei.hsitory;
//
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.parse.KuaiData;
//
//public class KuaiDataHistoryExample {
//
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<Map<String, Object>> dataList = KuaiData.getArticleHistory("5c19954ccb14fabc153971e3f924bf36", "2686798288", 0L);
//// dataList.forEach(System.out::println);
// }
//
//}
package com.zhiwei.hsitory;
import java.util.List;
import java.util.Map;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.BTime;
import com.zhiwei.parse.MyZaker;
public class ZakerHistoryExample {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<Map<String, Object>> dataList = MyZaker.getHistoryData("13584", 0L);
// dataList.forEach(System.out::println);
}
}
//package com.zhiwei.hsitory;
//
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.parse.BTime;
//import com.zhiwei.parse.MyZaker;
//
//public class ZakerHistoryExample {
//
//
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<Map<String, Object>> dataList = MyZaker.getHistoryData("13584", 0L);
//// dataList.forEach(System.out::println);
// }
//
//}
package com.zhiwei.hsitory;
import java.util.List;
import java.util.Map;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.Zhihu;
public class ZhihuArticleHistoryExample {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<Map<String, Object>> dataList = Zhihu.getArticleHistory("da-bai-xin-wen-27", 0L);
// dataList.forEach(System.out::println);
}
}
//package com.zhiwei.hsitory;
//
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.parse.Zhihu;
//
//public class ZhihuArticleHistoryExample {
//
// public static void main(String[] args) {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<Map<String, Object>> dataList = Zhihu.getArticleHistory("da-bai-xin-wen-27", 0L);
//// dataList.forEach(System.out::println);
// }
//
//}
package com.zhiwei.shipin;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.BiliBili;
import com.zhiwei.util.WordReadFile;
public class BilibiliTest {
@Test
public void f() {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
List<Map<String, Object>> bodyList = new ArrayList<>();
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for (String word : wordList) {
List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
cookie);
if (dataList != null) {
System.out.println(word + " ----- " + dataList.size());
bodyList.addAll(dataList);
}
}
List<String> headlist = new ArrayList<>();
headlist.add("submitcount");
headlist.add("playcount");
headlist.add("time");
headlist.add("source");
headlist.add("title");
headlist.add("url");
headlist.add("word");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-dnf手游-点击-20200204.xlsx", "B站数据", headlist, bodyList);
}
}
//package com.zhiwei.shipin;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.BiliBili;
//import com.zhiwei.util.WordReadFile;
//
//public class BilibiliTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
// List<Map<String, Object>> bodyList = new ArrayList<>();
// String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
// for (String word : wordList) {
//// pubdate 时间 totalrank 综合
// List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
// cookie, "pubdate");
// if (dataList != null) {
// System.out.println(word + " ----- " + dataList.size());
// bodyList.addAll(dataList);
// }
// }
// List<String> headlist = new ArrayList<>();
// headlist.add("submitcount");
// headlist.add("playcount");
// headlist.add("time");
// headlist.add("source");
// headlist.add("title");
// headlist.add("url");
// headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-腾讯会议-time-20200218.xlsx", "B站数据", headlist, bodyList);
//
// }
//}
......@@ -17,9 +17,9 @@
//public class QQTVTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER, 10000002);
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local",GroupType.PROVIDER, 10000002);
// String time = "2019-01-11 00:00:00";
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
// List<Map<String, Object>> bodyList = new ArrayList<>();
// for (String word : wordList) {
// List<Map<String, Object>> dataList = QQTV.getData(word,time, ProxyHolder.NAT_HEAVY_PROXY);
......@@ -37,7 +37,7 @@
// headlist.add("url");
// headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//视频//腾讯视频关键词采集数据-精装修.xlsx", "腾讯视频数据", headlist, bodyList);
// poi.exportExcel("E://crawlerdata//视频//腾讯视频关键词采集数据-精装修.xlsx", "腾讯视频数据", headlist, bodyList);
//
//
//
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment