Commit 17c44df3 by yangchen

凤凰采集修改

parent c514570f
......@@ -119,7 +119,7 @@ public class HeadGet {
public static Map<String,String> getFenghuangAccountHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"%E5%87%A4%E5%87%B0%E6%96%B0%E9%97%BB/5.7.4.0 CFNetwork/811.5.4 Darwin/16.7.0");
"IfengNews/6.1.8 (iPhone; iOS 11.2.1; Scale/2.00)");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-cn");
......@@ -878,10 +878,10 @@ public class HeadGet {
}
public static void main(String[] args) throws UnsupportedEncodingException {
String url = "https://aweme.snssdk.com/aweme/v1/comment/list/?iid=36454376501&device_id=47835193298&os_api=18&app_name=aweme&channel=App%20Store&idfa=FE659B7E-5104-44C2-8A31-F88DEE7A2747&device_platform=iphone&build_number=19007&vid=E66B8A7B-F8E3-4ED2-BA42-D6D8EFAD0A3F&openudid=fa9701d8e1a8892e777693ba287551c226006542&device_type=iPhone8,1&app_version=1.9.0&version_code=1.9.0&os_version=11.2.1&screen_width=750&aid=1128&ac=WIFI&aweme_id=6570655003491437837&comment_style=2&count=20&cursor=0&digged_cid=&mas=006ecc6faa02e1374a12bc4c9a0368283d6a1f2412878507cf4eb8";
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_6452&page=1&pagesize=20&tag=article&uid=fe659b7e510444c28a31f88dee7a2747";
System.out.println(url);
String cookie = "__utma=51854390.454838676.1510118174.1528502507.1529542643.327; __utmb=51854390.0.10.1529542643; __utmc=51854390; __utmv=51854390.010--; __utmz=51854390.1510118174.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _zap=4e09de9f-d212-48b9-af42-9173e1092406; d_c0=ACACkXJlZQxLBTcU1Z70bp9TpD_qDHF6sAY=|1529542618; q_c0=2|1:0|10:1528360082|4:q_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|c6f0a98f0d30bdfda124c276e9e88ce945dcf50103663cedeab1be9415a42101; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000; z_c0=2|1:0|10:1528360082|4:z_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|021aae122258a6476003fd206423140b8c2ee162e406b628051471c64021b211; zst_82=1.0ANCm7PYOyA0LAAAASwUAADEuMDv9KlsAAAAAYiqmK0gPDsX0FVknWJlXcKc07NA=; __DAYU_PP=VFZF3fmJavniAMQf2fnf23a6dd5221ec; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000";
Map<String,String> headerMap = HeadGet.getDouyinHotHeaderMap(null);
Map<String,String> headerMap = HeadGet.getFenghuangAccountHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url,null, headerMap);
System.out.println(result);
System.out.println(result.length());
......
......@@ -35,9 +35,10 @@ public class Fenghuang {
boolean f = true;
while(f){
try {
for(int j = 0;j< 4;j++){
for(int j = 0;j< 3;j++){
f = true;
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"+id+"&page="+i+"&pagesize=20&tag=article";
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"+id+"&page="+i+"&pagesize=20&tag=article&uid=fe659b7e510444c28a31f88dee7a2747";
System.out.println(url);
List<Map<String,Object>> list = fenghuangAccountAnalysis.getArticleData(url, startTime,proxy);
if(list != null && list.size() > 0) {
dataList.addAll(list);
......
......@@ -11,10 +11,10 @@ import com.zhiwei.parse.Baijia;
public class BaijiaAccountExample {
@Test
// @Test
public void baijiaAccountTest() {
String app_id = "1536766390576806";
String startTime = "2016-01-01 00:00:00";
String app_id = "1536766276004443";
String startTime = "2015-01-01 00:00:00";
//2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
......@@ -26,13 +26,13 @@ public class BaijiaAccountExample {
headList.add("source");
headList.add("url");
headList.add("content");
poi.exportExcel("D://crawlerdata/百家号-太保.xlsx", "太保", headList, lists);
poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists);
}
// @Test
@Test
public void baijiaAccount2Test() {
String app_id = "b_1536766390576806";
String startTime = "2016-01-01 00:00:00";
String app_id = "b_1536766622381605";
String startTime = "2017-01-01 00:00:00";
//2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(app_id,startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
......@@ -42,7 +42,7 @@ public class BaijiaAccountExample {
headList.add("source");
headList.add("url");
headList.add("content");
poi.exportExcel("D://crawlerdata/百家号-俊世太保.xlsx", "俊世太保", headList, lists);
poi.exportExcel("D://crawlerdata//自媒体/百家号-科学的fan.xlsx", "科学的fan", headList, lists);
}
}
......@@ -19,7 +19,7 @@ public class DayuAccountExample {
// String mid = "d7300311c1504d24a229c3da345785c6";
// String name = "大鱼海棠雨";
String startTime = "2018-03-16 00:00:00";
String startTime = "2017-01-01 00:00:00";
String path = "D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel(path, 0);
......
......@@ -18,7 +18,7 @@ public class DayuCommentExample {
//14180961224021425316 这个为此参数
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel("D://crawlerdata/UC评论采集-1.xlsx", 0);
Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体//UC评论采集-1.xlsx", 0);
List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
List<String> urlList = new ArrayList<String>();
......@@ -27,6 +27,7 @@ public class DayuCommentExample {
try {
url = map1.get("url")+"";
String articleId = "";
url = "16848608935470442496";
if(url.contains("aid")) {
articleId = url.split("aid=")[1].split("&")[0];
}else {
......
......@@ -14,12 +14,12 @@ public class FenghuangAccountExample {
@Test
public void fenghuangAccountTest() {
//所用时间长 1s1篇文章吧
//https://api.3g.ifeng.com/client_search_subscribe?k=(凤凰号名称拿id)
String id = "724";
//https://api.3g.ifeng.com/client_search_subscribe?k=号外财经
String id = "6452";
String[] ids = id.split(",");
for(int i = 0;i < ids.length;i++) {
try {
String startTime = "2017-01-01 00:00:00"; //可为空
String startTime = "2010-05-01 00:00:00"; //可为空
List<Map<String,Object>> dataList = Fenghuang.getFenghuangAccountData(ids[i], startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>();
......@@ -29,7 +29,7 @@ public class FenghuangAccountExample {
headList.add("source");
headList.add("url");
headList.add("id");
poi.exportExcel("D://crawlerdata/凤凰-电商报.xlsx", ids[i], headList, dataList);
poi.exportExcel("D://crawlerdata/凤凰-6452.xlsx", ids[i], headList, dataList);
} catch (Exception e) {
continue;
}
......
......@@ -16,7 +16,7 @@ public class SouhuAccountExample {
@Test
public void souhuAccountTest() {
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("MjI5MzAyOTMyMEBzaW5hLnNvaHUuY29t","2018-01-01 00:00:00",false,null);
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("b1NlSFRzM1JaeURMR252VFY0VGpjVkJGckpyWUB3ZWNoYXQuc29odS5jb20=","2015-01-01 00:00:00",false,null);
System.out.println(lists.size());
List<String> headList = new ArrayList<String>();
headList.add("title");
......@@ -29,7 +29,7 @@ public class SouhuAccountExample {
headList.add("source");
headList.add("newsPv");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\搜狐号历史文章-太保乱谈.xlsx", "太保乱谈", headList, lists);
poi.exportExcel("D:\\crawlerdata\\搜狐号历史文章-乔.xlsx", "乔", headList, lists);
}
}
......@@ -16,8 +16,8 @@ public class WangyiCommentExample {
@Test
public void wangyiCommentTest() {
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel("D:\\crawlerdata\\自媒体\\网易评论采集.xlsx", 0);
String path = "D:\\crawlerdata\\自媒体\\网易评论采集-1.xlsx";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<String> urlList = new ArrayList<String>();
......@@ -49,7 +49,7 @@ public class WangyiCommentExample {
headList.add("unlike");
headList.add("from_url");
poi.exportExcel("D:\\crawlerdata\\自媒体\\网易评论采集.xlsx", "评论数据", headList, bodyList);
poi.exportExcel(path, "评论数据", headList, bodyList);
}
......
package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Wangyi;
public class WangyiHistoryExample {
public static void main(String[] args) {
String url = "http://dy.163.com/v2/article/detail/CK4OE81O0512974K.html";
Wangyi.getHistoryData(url, null, "2016-07-06 00:11:54");
String url = "http://dy.163.com/v2/article/detail/DJK1G41H0519A6FP.html";
List<Map<String,Object>> list = Wangyi.getHistoryData(url, null, "2017-01-01 00:00:00");
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("content");
headList.add("source");
headList.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//自媒体/网易-号外财经-1.xlsx", "asdasd", headList, list);
}
}
......@@ -14,9 +14,10 @@ public class YidianzixunAccountExample {
@Test
public void yidianzixunAccountTest() {
String channelid = "m143901";
String channelid = "m133695";
String startTime = "2017-01-01 00:00:00";
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunAccountData(channelid, startTime,null);
String cookie = "wuid=257912989774746; wuid_createAt=2018-04-21 12:26:54; UM_distinctid=162e674783dc4e-030ed894a4953b-4446042d-1fa400-162e674783e34a; JSESSIONID=8ee0cee7a49e812492917a669074974b9a004e7b28ed41bc99e96793df734961; weather_auth=2; Hm_lvt_15fafbae2b9b11d280c79eff3b840e45=1527148836,1527213305,1527752112; CNZZDATA1255169715=542587606-1524284730-null%7C1527749514; sptoken=Ug%3B99%3C3%3FU%3AU%3B%3AU48261efeced332cc9f20413132c69381bcc921bb210c93b90058b318eec23117; captcha=s%3A7c9d6bca395d270e3a4774968531f470.e1IzHNmf94UVpZlGYHYmDUnUk6sA1s7sPYj7RA932lo; Hm_lpvt_15fafbae2b9b11d280c79eff3b840e45=1527752125; cn_1255169715_dplus=%7B%22distinct_id%22%3A%20%22162e674783dc4e-030ed894a4953b-4446042d-1fa400-162e674783e34a%22%2C%22sp%22%3A%20%7B%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201527752148%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201527752148%7D%7D";
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunAccountData(channelid, startTime,null,cookie);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>();
headList.add("title");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment