Commit 831957e0 by cwy

本地 修改提交

parent ef206a93
...@@ -27,7 +27,7 @@ public class BiliBili { ...@@ -27,7 +27,7 @@ public class BiliBili {
List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
try { try {
// //
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=pubdate&duration=0&tids_1=0"; String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=click&duration=0&tids_1=0";
System.out.println(url); System.out.println(url);
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com"); Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header); String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header);
......
...@@ -80,6 +80,7 @@ public class Chejia { ...@@ -80,6 +80,7 @@ public class Chejia {
map.put("like", data.get("RUp")); map.put("like", data.get("RUp"));
map.put("id", data.getString("ReplyId")); map.put("id", data.getString("ReplyId"));
bodyList.add(map); bodyList.add(map);
System.out.println(map.toString());
} }
int total = json.getInteger("commentcount"); int total = json.getInteger("commentcount");
logger.info(" 一共采集 了 {} 条 采集到 {} 页 一共有 {} 条",bodyList.size(),page,total); logger.info(" 一共采集 了 {} 条 采集到 {} 页 一共有 {} 条",bodyList.size(),page,total);
......
...@@ -205,6 +205,7 @@ public class Maimai { ...@@ -205,6 +205,7 @@ public class Maimai {
map.put("title", data.getString("text")); map.put("title", data.getString("text"));
map.put("author", data.getString("author")); map.put("author", data.getString("author"));
map.put("userId", data.getString("mmid")); map.put("userId", data.getString("mmid"));
map.put("egid", data.getString("egid"));
return map; return map;
} catch (Exception e) { } catch (Exception e) {
logger.error(" 脉脉 转评攒 获取失败 {}",e); logger.error(" 脉脉 转评攒 获取失败 {}",e);
...@@ -229,11 +230,12 @@ public class Maimai { ...@@ -229,11 +230,12 @@ public class Maimai {
} }
if(mmid!=null) { if(mmid!=null) {
String gid = String.valueOf(mmid.get("gid")); String gid = String.valueOf(mmid.get("gid"));
String egid = String.valueOf(mmid.get("egid"));
boolean more = true; boolean more = true;
int page = 0; int page = 0;
while(more) { while(more) {
try { try {
String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100"; String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100&egid=" + egid;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link,headers),proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link,headers),proxy).body().string();
if(Objects.nonNull(cookie) && !cookie.isEmpty()) { if(Objects.nonNull(cookie) && !cookie.isEmpty()) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
...@@ -247,6 +249,7 @@ public class Maimai { ...@@ -247,6 +249,7 @@ public class Maimai {
Map<String,Object> dataMap = toJavaObject(json, Map.class); Map<String,Object> dataMap = toJavaObject(json, Map.class);
dataMap.put("fromUrl", url); dataMap.put("fromUrl", url);
dataMap.putAll(mmid); dataMap.putAll(mmid);
System.out.println(dataMap.toString());
dataList.add(dataMap); dataList.add(dataMap);
} }
page++; page++;
......
...@@ -29,6 +29,7 @@ public class QicheHome { ...@@ -29,6 +29,7 @@ public class QicheHome {
while(true) { while(true) {
try { try {
String url = "https://reply.autohome.com.cn/api/comments/show.json?count=50&page="+page+"&id="+articleid+"&appid=1&datatype=json&order=0&replyid=0"; String url = "https://reply.autohome.com.cn/api/comments/show.json?count=50&page="+page+"&id="+articleid+"&appid=1&datatype=json&order=0&replyid=0";
System.out.println(url);
String result = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy).body().string(); String result = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy).body().string();
if(page == 1) { if(page == 1) {
count = qicheHomeKwyWordAnalysis.getCount(result); count = qicheHomeKwyWordAnalysis.getCount(result);
......
...@@ -28,6 +28,7 @@ public class QicheHomeKwyWordAnalysis { ...@@ -28,6 +28,7 @@ public class QicheHomeKwyWordAnalysis {
map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss"));
map.put("like", data.getInteger("RUp")); map.put("like", data.getInteger("RUp"));
map.put("id", data.getString("RMemberId")); map.put("id", data.getString("RMemberId"));
System.out.println(map.toString());
bodyList.add(map); bodyList.add(map);
} }
return bodyList; return bodyList;
......
...@@ -39,6 +39,7 @@ public class TechTxCommentAnalysis { ...@@ -39,6 +39,7 @@ public class TechTxCommentAnalysis {
if(json.getBoolean("hasnext")) { if(json.getBoolean("hasnext")) {
next = data.getString("id"); next = data.getString("id");
} }
System.out.println(map.toString());
} }
rMap.put("data", dataList); rMap.put("data", dataList);
rMap.put("next", next); rMap.put("next", next);
......
...@@ -18,13 +18,13 @@ ...@@ -18,13 +18,13 @@
// String path = "C:\\Users\\byte-zbs\\Documents\\WXWork\\1688854025129101\\Cache\\File\\2019-03\\爱奇艺.xlsx"; // String path = "C:\\Users\\byte-zbs\\Documents\\WXWork\\1688854025129101\\Cache\\File\\2019-03\\爱奇艺.xlsx";
// Map<String,Object> map = poi.importExcel(path, 0); // Map<String,Object> map = poi.importExcel(path, 0);
// List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("body"); // List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("body");
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER); // ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER, 100000001);
// List<String> headList = (List<String>) map.get("head"); // List<String> headList = (List<String>) map.get("head");
// headList.add("count"); // headList.add("count");
// dataList.forEach(m -> { // dataList.forEach(m -> {
// String url = String.valueOf(m.get("链接")); // String url = String.valueOf(m.get("链接"));
// //
// int i = Aiqiyi.aiqiyiHotCount(url, ProxyHolder.NAT_PROXY); // int i = Aiqiyi.aiqiyiHotCount(url, ProxyHolder.NAT_HEAVY_PROXY);
// System.out.println(url + " -- " + i); // System.out.println(url + " -- " + i);
// m.put("count", i); // m.put("count", i);
// }); // });
......
package com.zhiwei.Comment; //package com.zhiwei.Comment;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.common.config.GroupType; //import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder; //import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Maimai; //import com.zhiwei.parse.Maimai;
import com.zhiwei.tools.tools.ZhiWeiTools; //import com.zhiwei.tools.tools.ZhiWeiTools;
//
public class MaimaiCommentCountTest { //public class MaimaiCommentCountTest {
@Test // @Test
public void f() { // public void f() {
ProxyFactory.init("zookeeper://192.168.0.11:2181", "local", // ProxyFactory.init("zookeeper://192.168.0.11:2181", "local",
GroupType.PROVIDER, 10000001); // GroupType.PROVIDER, 10000001);
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); //// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi //// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0); //// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body"); //// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>(); //// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head"); //// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) { //// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + ""; //// String url = map1.get("地址") + "";
String cookie = null; // String cookie = null;
String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiZmY4YTMyMTA0M2M3MTFlYThmZmUyNDZlOTZiNDgwODgiLCJ1IjoxOTY2MDEyMTksImlkIjoyNDk1MTU0OH0.GxFOiRpMB3q3KIL-5QpUC6L-TM_qQwOLS7hEgLy2rxE"; // String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiZmY4YTMyMTA0M2M3MTFlYThmZmUyNDZlOTZiNDgwODgiLCJ1IjoxOTY2MDEyMTksImlkIjoyNDk1MTU0OH0.GxFOiRpMB3q3KIL-5QpUC6L-TM_qQwOLS7hEgLy2rxE";
List<Map<String, Object>> map3 = Maimai.getMaimaiCommentList(url, cookie, ProxyHolder.NAT_HEAVY_PROXY); // List<Map<String, Object>> map3 = Maimai.getMaimaiCommentList(url, cookie, ProxyHolder.NAT_HEAVY_PROXY);
System.out.println(map3.toString()); // System.out.println(map3.toString());
System.out.println(url); // System.out.println(url);
// map1.putAll(map3); //// map1.putAll(map3);
// ZhiWeiTools.sleep(500); //// ZhiWeiTools.sleep(500);
// System.out.println("--------------------------"); //// System.out.println("--------------------------");
//// }
//// headList.add("like");
//// headList.add("spreads");
//// headList.add("cmts");
//// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
//// list);
// } // }
// headList.add("like"); //}
// headList.add("spreads");
// headList.add("cmts");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
// list);
}
}
package com.zhiwei;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.TechTx;
public class CrawlerTest {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
String cookie = "bid=rymxzs5aojg; ps=y; ll=\"118173\"; __utmc=30149280; dbcl2=\"188038058:9IHyVcSobVc\"; ck=_RvF; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18803; douban-fav-remind=1; __yadk_uid=qLflXyj3R14ro9e0cLoZOQlJoMGVN32j; douban-profile-remind=1; _vwo_uuid_v2=D85F60C118B0AF465035D9CC7BBFDA7A6|4bf255e1e3a2e9aeede3708192f5f1bc; __utmz=30149280.1543564973.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1543823236%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPk19bxnhsVWwfnrcnwT0PquON7D1JpLsbfSu9rRowalyi4pOeM3rMHKFaJo9jJF7%26wd%3D%26eqid%3De7f262650001ef98000000045c00e64f%22%5D; _pk_id.100001.8cb4=6828fef49f6bcf34.1543559455.5.1543823236.1543820463.; _pk_ses.100001.8cb4=*; __utma=30149280.824403997.1543559458.1543818802.1543823236.6; __utmt=1; __utmb=30149280.5.7.1543823236";
String url = "E2S95LEA0008856R";
TechTx.getTechTxComment(url, null);
}
}
//package com.zhiwei.shipin; package com.zhiwei.shipin;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import org.junit.Test; import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType; import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.BiliBili; import com.zhiwei.parse.BiliBili;
//import com.zhiwei.util.WordReadFile; import com.zhiwei.util.WordReadFile;
//
//public class BilibiliTest { public class BilibiliTest {
// @Test @Test
// public void f() { public void f() {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L); ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt"); List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
// List<Map<String, Object>> bodyList = new ArrayList<>(); List<Map<String, Object>> bodyList = new ArrayList<>();
// String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"; String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
// for (String word : wordList) { for (String word : wordList) {
// List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00", List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
// cookie); cookie);
// if (dataList != null) { if (dataList != null) {
// System.out.println(word + " ----- " + dataList.size()); System.out.println(word + " ----- " + dataList.size());
// bodyList.addAll(dataList); bodyList.addAll(dataList);
// } }
// } }
// List<String> headlist = new ArrayList<>(); List<String> headlist = new ArrayList<>();
// headlist.add("submitcount"); headlist.add("submitcount");
// headlist.add("playcount"); headlist.add("playcount");
// headlist.add("time"); headlist.add("time");
// headlist.add("source"); headlist.add("source");
// headlist.add("title"); headlist.add("title");
// headlist.add("url"); headlist.add("url");
// headlist.add("word"); headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-发布.xlsx", "B站数据", headlist, bodyList); poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-dnf手游-点击-20200204.xlsx", "B站数据", headlist, bodyList);
//
// } }
//} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment