Commit 831957e0 by cwy

本地 修改提交

parent ef206a93
......@@ -27,7 +27,7 @@ public class BiliBili {
List<Map<String,Object>> bodyList = new ArrayList<>();
try {
//
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=pubdate&duration=0&tids_1=0";
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&single_column=1&order=click&duration=0&tids_1=0";
System.out.println(url);
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
String result = HttpClient.executeHttpRequestGet(url, ProxyHolder.NAT_HEAVY_PROXY, header);
......
......@@ -80,6 +80,7 @@ public class Chejia {
map.put("like", data.get("RUp"));
map.put("id", data.getString("ReplyId"));
bodyList.add(map);
System.out.println(map.toString());
}
int total = json.getInteger("commentcount");
logger.info(" 一共采集 了 {} 条 采集到 {} 页 一共有 {} 条",bodyList.size(),page,total);
......
......@@ -205,6 +205,7 @@ public class Maimai {
map.put("title", data.getString("text"));
map.put("author", data.getString("author"));
map.put("userId", data.getString("mmid"));
map.put("egid", data.getString("egid"));
return map;
} catch (Exception e) {
logger.error(" 脉脉 转评攒 获取失败 {}",e);
......@@ -229,11 +230,12 @@ public class Maimai {
}
if(mmid!=null) {
String gid = String.valueOf(mmid.get("gid"));
String egid = String.valueOf(mmid.get("egid"));
boolean more = true;
int page = 0;
while(more) {
try {
String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100";
String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100&egid=" + egid;
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link,headers),proxy).body().string();
if(Objects.nonNull(cookie) && !cookie.isEmpty()) {
ZhiWeiTools.sleep(2000);
......@@ -247,6 +249,7 @@ public class Maimai {
Map<String,Object> dataMap = toJavaObject(json, Map.class);
dataMap.put("fromUrl", url);
dataMap.putAll(mmid);
System.out.println(dataMap.toString());
dataList.add(dataMap);
}
page++;
......
......@@ -29,6 +29,7 @@ public class QicheHome {
while(true) {
try {
String url = "https://reply.autohome.com.cn/api/comments/show.json?count=50&page="+page+"&id="+articleid+"&appid=1&datatype=json&order=0&replyid=0";
System.out.println(url);
String result = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy).body().string();
if(page == 1) {
count = qicheHomeKwyWordAnalysis.getCount(result);
......
......@@ -28,6 +28,7 @@ public class QicheHomeKwyWordAnalysis {
map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss"));
map.put("like", data.getInteger("RUp"));
map.put("id", data.getString("RMemberId"));
System.out.println(map.toString());
bodyList.add(map);
}
return bodyList;
......
......@@ -39,6 +39,7 @@ public class TechTxCommentAnalysis {
if(json.getBoolean("hasnext")) {
next = data.getString("id");
}
System.out.println(map.toString());
}
rMap.put("data", dataList);
rMap.put("next", next);
......
......@@ -18,13 +18,13 @@
// String path = "C:\\Users\\byte-zbs\\Documents\\WXWork\\1688854025129101\\Cache\\File\\2019-03\\爱奇艺.xlsx";
// Map<String,Object> map = poi.importExcel(path, 0);
// List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("body");
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER, 100000001);
// List<String> headList = (List<String>) map.get("head");
// headList.add("count");
// dataList.forEach(m -> {
// String url = String.valueOf(m.get("链接"));
//
// int i = Aiqiyi.aiqiyiHotCount(url, ProxyHolder.NAT_PROXY);
// int i = Aiqiyi.aiqiyiHotCount(url, ProxyHolder.NAT_HEAVY_PROXY);
// System.out.println(url + " -- " + i);
// m.put("count", i);
// });
......
package com.zhiwei.Comment;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Maimai;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class MaimaiCommentCountTest {
@Test
public void f() {
ProxyFactory.init("zookeeper://192.168.0.11:2181", "local",
GroupType.PROVIDER, 10000001);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + "";
String cookie = null;
String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiZmY4YTMyMTA0M2M3MTFlYThmZmUyNDZlOTZiNDgwODgiLCJ1IjoxOTY2MDEyMTksImlkIjoyNDk1MTU0OH0.GxFOiRpMB3q3KIL-5QpUC6L-TM_qQwOLS7hEgLy2rxE";
List<Map<String, Object>> map3 = Maimai.getMaimaiCommentList(url, cookie, ProxyHolder.NAT_HEAVY_PROXY);
System.out.println(map3.toString());
System.out.println(url);
// map1.putAll(map3);
// ZhiWeiTools.sleep(500);
// System.out.println("--------------------------");
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Maimai;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class MaimaiCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.11:2181", "local",
// GroupType.PROVIDER, 10000001);
//// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
//// Map<String, Object> map = poi
//// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0);
//// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
//// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
//// List<String> headList = (List<String>) map.get("head");
//// for (Map<String, Object> map1 : list) {
//// String url = map1.get("地址") + "";
// String cookie = null;
// String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiZmY4YTMyMTA0M2M3MTFlYThmZmUyNDZlOTZiNDgwODgiLCJ1IjoxOTY2MDEyMTksImlkIjoyNDk1MTU0OH0.GxFOiRpMB3q3KIL-5QpUC6L-TM_qQwOLS7hEgLy2rxE";
// List<Map<String, Object>> map3 = Maimai.getMaimaiCommentList(url, cookie, ProxyHolder.NAT_HEAVY_PROXY);
// System.out.println(map3.toString());
// System.out.println(url);
//// map1.putAll(map3);
//// ZhiWeiTools.sleep(500);
//// System.out.println("--------------------------");
//// }
//// headList.add("like");
//// headList.add("spreads");
//// headList.add("cmts");
//// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
//// list);
// }
// headList.add("like");
// headList.add("spreads");
// headList.add("cmts");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
// list);
}
}
//}
package com.zhiwei;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.parse.TechTx;
public class CrawlerTest {
public static void main(String[] args) {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
String cookie = "bid=rymxzs5aojg; ps=y; ll=\"118173\"; __utmc=30149280; dbcl2=\"188038058:9IHyVcSobVc\"; ck=_RvF; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18803; douban-fav-remind=1; __yadk_uid=qLflXyj3R14ro9e0cLoZOQlJoMGVN32j; douban-profile-remind=1; _vwo_uuid_v2=D85F60C118B0AF465035D9CC7BBFDA7A6|4bf255e1e3a2e9aeede3708192f5f1bc; __utmz=30149280.1543564973.3.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1543823236%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DPk19bxnhsVWwfnrcnwT0PquON7D1JpLsbfSu9rRowalyi4pOeM3rMHKFaJo9jJF7%26wd%3D%26eqid%3De7f262650001ef98000000045c00e64f%22%5D; _pk_id.100001.8cb4=6828fef49f6bcf34.1543559455.5.1543823236.1543820463.; _pk_ses.100001.8cb4=*; __utma=30149280.824403997.1543559458.1543818802.1543823236.6; __utmt=1; __utmb=30149280.5.7.1543823236";
String url = "E2S95LEA0008856R";
TechTx.getTechTxComment(url, null);
}
}
//package com.zhiwei.shipin;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.BiliBili;
//import com.zhiwei.util.WordReadFile;
//
//public class BilibiliTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
// List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
// List<Map<String, Object>> bodyList = new ArrayList<>();
// String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
// for (String word : wordList) {
// List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
// cookie);
// if (dataList != null) {
// System.out.println(word + " ----- " + dataList.size());
// bodyList.addAll(dataList);
// }
// }
// List<String> headlist = new ArrayList<>();
// headlist.add("submitcount");
// headlist.add("playcount");
// headlist.add("time");
// headlist.add("source");
// headlist.add("title");
// headlist.add("url");
// headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-发布.xlsx", "B站数据", headlist, bodyList);
//
// }
//}
package com.zhiwei.shipin;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.BiliBili;
import com.zhiwei.util.WordReadFile;
public class BilibiliTest {
@Test
public void f() {
ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER , 10000002L);
List<String> wordList = WordReadFile.getWords("E://crawlerdata//关键词.txt");
List<Map<String, Object>> bodyList = new ArrayList<>();
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for (String word : wordList) {
List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2001-01-14 00:00:00",
cookie);
if (dataList != null) {
System.out.println(word + " ----- " + dataList.size());
bodyList.addAll(dataList);
}
}
List<String> headlist = new ArrayList<>();
headlist.add("submitcount");
headlist.add("playcount");
headlist.add("time");
headlist.add("source");
headlist.add("title");
headlist.add("url");
headlist.add("word");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("E://crawlerdata//视频//bilibili关键词采集数据-dnf手游-点击-20200204.xlsx", "B站数据", headlist, bodyList);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment