Commit 7bf0e1d7 by yangchen

增加部分自媒体采集

parent f09aa1c9
......@@ -453,39 +453,145 @@ public class HeadGet {
return headerMap;
}
public static Map<String,String> getJikeComment39HeaderMap(String cookie) {
/**
*
* @Description 大鱼号关键词采集头信息
* @param url
* @param cookie
* @return
* @throws IOException
*/
public static Map<String,String> getDayuByWordHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("Host", "app.jike.ruguoapp.com");
headerMap.put("Accept-Language", "zh-cn");
headerMap.put("Accept", "*/*");
headerMap.put("User-Agent", "%E5%8D%B3%E5%88%BB/989 CFNetwork/811.5.4 Darwin/16.7.0");
headerMap.put("App-BuildNo", "989");
headerMap.put("App-Version", "3.9.1");
headerMap.put("Content-Type", "application/json");
headerMap.put("Manufacturer", "Apple");
headerMap.put("Content-Length", "39");
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive");
headerMap.put("OS-Version", "Version 10.3.3 (Build 14G60)");
headerMap.put("Host", "zzd.sm.cn");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,String> getJikeComment94HeaderMap(String cookie) {
/**
*
* @Description 天天快报评论采集头信息
* @param url
* @param cookie
* @return
* @throws IOException
*/
public static Map<String,String> getQQKBCommentHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("Host", "app.jike.ruguoapp.com");
headerMap.put("Accept-Language", "zh-cn");
headerMap.put("Accept", "*/*");
headerMap.put("User-Agent", "%E5%8D%B3%E5%88%BB/989 CFNetwork/811.5.4 Darwin/16.7.0");
headerMap.put("App-BuildNo", "989");
headerMap.put("App-Version", "3.9.1");
// headerMap.put("Content-Type", "application/json");
headerMap.put("Manufacturer", "Apple");
headerMap.put("Content-Length", "94");
headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-Hans-CN;q=1");
headerMap.put("Connection", "keep-alive");
headerMap.put("OS-Version", "Version 10.3.3 (Build 14G60)");
headerMap.put("Host", "r.cnews.qq.com");
headerMap.put("Referer", "http://r.cnews.qq.com/inews/iphone/");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
/**
*
* @Description 天天快报post第一页头信息
* @param comment_id
* @param article_id
* @return
*/
public static Map<String,Object> getQQKBCommentParamMap(String comment_id,String article_id){
Map<String,Object> param = new HashMap<String,Object>();
param.put("chlid", "daily_timeline");
param.put("comment_id", comment_id);
param.put("page", 1);
param.put("article_id", article_id);
param.put("showType", "orig");
return param;
}
/**
*
* @Description 天天快报评论第二页采集
* @param comment_id
* @param page
* @param coral_scorem
* @param article_id
* @param reply_id
* @return
*/
public static Map<String,Object> getQQKBCommentParamMap2(String comment_id,int page,String coral_scorem,String article_id,String reply_id){
Map<String,Object> param = new HashMap<String,Object>();
param.put("chlid", "daily_timeline");
param.put("showType", "orig");
param.put("sortType", "hot");
param.put("comment_id", comment_id);
param.put("page", page);
param.put("coral_score", coral_scorem);
param.put("article_id", article_id);
param.put("reply_id", reply_id);
param.put("c_type", "comment");
return param;
}
/**
*
* @Description 天天快报回复评论下一页post头信息
* @param old_reply_id
* @param comment_id
* @param article_id
* @param reply_id
* @return
*/
public static Map<String,Object> getQQKBCommentReplyParamMap(String old_reply_id,String comment_id,String article_id,String reply_id) {
Map<String,Object> param = new HashMap<String,Object>();
if(old_reply_id != null) {
param.put("old_reply_id", old_reply_id);
}
param.put("pageflag", "old");
param.put("comment_id", comment_id);
param.put("article_id", article_id);
param.put("orig_id", reply_id);
return param;
}
public static Map<String,Object> getQQKBByWordParamMap(String word,String sid,String queryid,int page) {
Map<String,Object> param = new HashMap<String,Object>();
if(sid != null && queryid != null) {
param.put("sid", sid);
param.put("queryid", queryid);
param.put("page", page);
}
param.put("query", word);
return param;
}
public static Map<String,String> getQQKBByWordHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-Hans-CN;q=1");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "r.cnews.qq.com");
headerMap.put("Referer", "http://r.cnews.qq.com/inews/iphone/");
headerMap.put("deviceToken", "<585bee8d f6739b65 1248b40d 7be9dc4a 126bbf27 85ad470e ce6b7923 bbcb7c1c>");
headerMap.put("qn-rid", "9343AF22-FE03-4DFF-BC91-1D41997AA9B4");
headerMap.put("qn-sig", "8D2B15DA2D55970187106A58C1966986");
headerMap.put("omgbizid", "5144dee3f39a8d4dad994e5391fcebd1a0d50090112b14");
headerMap.put("omgid", "0f63f8e68f041746372b9ceecc8e97f028e90010112b14");
headerMap.put("idfa", "FE659B7E-5104-44C2-8A31-F88DEE7A2747");
headerMap.put("appver", "11.2.1_qnreading_4.6.2");
headerMap.put("devid", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
if(cookie != null) {
headerMap.put("Cookie", cookie);
......@@ -493,23 +599,26 @@ public class HeadGet {
return headerMap;
}
public static Map<String,Object> getJikeCommentParamMap(String targetId,String time) {
Map<String,Object> paramMap = new HashMap<String,Object>();
JSONObject json = new JSONObject();
if(time != null) {
json.put("createdAt", time);
paramMap.put("loadMoreKey", json);
public static Map<String,String> getWangyiCommentHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "comment.dy.163.com");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
paramMap.put("targetId", targetId);
return paramMap;
return headerMap;
}
public static void main(String[] args) {
String url = "https://app.jike.ruguoapp.com/1.0/messageComments/listPrimary";
String cookie = "jike:config:searchPlaceholderLastInfo=1514465731446#0; jike:sess=eyJfdWlkIjoiNWE0NGRmMTlmOWM4NWYwMDExODJhMjkwIiwiX3Nlc3Npb25Ub2tlbiI6InQ5cExKaEpiTFdVeDFsbUxKZW9vMUlKMEsifQ==; jike:sess.sig=HBuRKsTsMIIR9aMDUdkNV_mGH1E";
Map<String, String> headerMap = HeadGet.getJikeComment94HeaderMap(cookie);
Map<String,Object> paramMap = HeadGet.getJikeCommentParamMap("5a449a3d580d23001148412e","2017-12-28T10:17:50.601Z");
String result = HttpClient.executeHttpRequestPost(url, headerMap, paramMap);
String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/D75MDLL10524H5KD/comments/newList?offset=0&limit=30&showLevelThreshold=72&headLimit=1&tailLimit=2&callback=getData&ibc=newspc&_=1514966469573";
String cookie = "phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=00030000fafc45b92e51a92d1a2d1c0536594402729a928137fe205f823d71e18c3e786e6f368baff37f7edc;%20uin=o0497332654;%20skey=MSF4MCe62n;%20sigA2=75E9AE34BD844F7CD19AC30353DE6116A767F02C50C78ABA2FB11B5B1D74324CCEDA1C9D13B6B3719AAA7875B14DBE4C560CB5FB99A5D63390B8041F6C83A48401EA8D5DA7B04E7A;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwvJbQ-Gsn52dfcob8V66AgcW1SAGy8xloQk1nVWfjVvR0b637c-qcRWE7M2QtFLKLsZP8o6dBVABpDhbzRQ92tw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap);
System.out.println(result);
System.out.println(result.length());
}
......
......@@ -16,6 +16,13 @@ public class Baijia {
private static Logger logger = LoggerFactory.getLogger(Baijia.class);
private static BaijiaAccountAnalysis baijiaAccountAnalysis = new BaijiaAccountAnalysis();
/**
*
* @Description 百家号历史文章采集
* @param app_id
* @param startTime
* @return
*/
public static List<Map<String,Object>> getBaijiaAccountData(String app_id,String startTime) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 0;
......@@ -23,7 +30,7 @@ public class Baijia {
while(true) {
try {
String url = "https://baijia.baidu.com/writerlistarticle?ajax=json&app_id="+app_id+"&_limit=20&_skip=";
System.out.println(url+i);
logger.info(url+i);
Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url + i, headerMap);
List<Map<String,Object>> list = baijiaAccountAnalysis.getBaijiaAccountData(result, startTime);
......
package com.zhiwei.parse;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
......@@ -12,6 +13,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DayuAccountAnalysis;
import com.zhiwei.parse.analysis.DayuByWordAnalysis;
import com.zhiwei.parse.analysis.DayuCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
......@@ -19,6 +21,7 @@ public class Dayu {
private static Logger logger = LoggerFactory.getLogger(Dayu.class);
private static DayuAccountAnalysis dayuAccountAnalysis = new DayuAccountAnalysis();
private static DayuCommentAnalysis dayuCommentAnalysis = new DayuCommentAnalysis();
private static DayuByWordAnalysis dayuByWordAnalysis = new DayuByWordAnalysis();
/**
*
......@@ -45,7 +48,7 @@ public class Dayu {
dataList.addAll(lists);
System.out.println("================解析第"+i+"页====此时有数据=="+dataList.size());
i++;
ZhiWeiTools.sleep(8000);
ZhiWeiTools.sleep(7000);
}
return dataList;
} catch (Exception e) {
......@@ -94,6 +97,12 @@ public class Dayu {
}
/**
*
* @Description 获取文章评论数
* @param articleId
* @return
*/
public static int getDayuCommentCount(String articleId) {
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/article/"+articleId+"/comments/byhot";
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
......@@ -102,4 +111,38 @@ public class Dayu {
return json.getJSONObject("data").getInteger("comment_cnt");
}
/**
*
* @Description 大鱼号依据关键词采集
* @param word
* @return
*/
public static List<Map<String,Object>> getDayuByWordData(String word) {
Map<String,String> headerMap = HeadGet.getDayuByWordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 1;
try {
while(true) {
String url = "http://zzd.sm.cn/iflow/api/v1/article/fsearch?page="+i+"&size=20&sid=&q="+URLEncoder.encode(word, "UTF-8")+"&scene=0";
System.out.println(url);
String result = HttpClient.executeHttpRequestGet(url, headerMap);
List<Map<String,Object>> lists = dayuByWordAnalysis.getDayuByWordData(result);
if(lists == null || lists.size() < 1) {
break;
}
if(lists != null && lists.size() > 0) {
dataList.addAll(lists);
}
ZhiWeiTools.sleep(5000);
i++;
}
return dataList;
} catch (Exception e) {
logger.error("关键词获取大鱼信息出错",e.getMessage());
return dataList;
}
}
}
package com.zhiwei.parse;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
......@@ -93,6 +92,12 @@ public class Fenghuang {
return map;
}
/**
*
* @Description 凤凰关键词采集
* @param word
* @return
*/
public static List<Map<String,Object>> getFenghuangByWord(String word) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 1;
......@@ -120,7 +125,7 @@ public class Fenghuang {
logger.error("依据关键词获取凤凰文章出错",e.getMessage());
e.printStackTrace();
return dataList;
} catch (IOException e) {
} catch (Exception e) {
e.printStackTrace();
logger.error("链接获取凤凰信息出错",e.getMessage());
return dataList;
......
package com.zhiwei.parse;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
......@@ -18,6 +17,12 @@ public class Meipai {
private static Logger logger = LoggerFactory.getLogger(Meipai.class);
private static MeipaiByWordAnalysis meipaiByWordAnalysis = new MeipaiByWordAnalysis();
/**
*
* @Description 美拍关键词获取视频数据
* @param word
* @return
*/
public static List<Map<String,Object>> getMeipaiByWordData(String word) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try {
......@@ -44,6 +49,7 @@ public class Meipai {
return dataList;
} catch (Exception e) {
logger.error("获取数据出错",e.getMessage());
e.printStackTrace();
return dataList;
}
......
package com.zhiwei.parse;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
......@@ -15,7 +14,12 @@ import com.zhiwei.httpclient.HttpClient;
public class Miaopai {
private static Logger logger = LoggerFactory.getLogger(Miaopai.class);
/**
*
* @Description 秒拍依据链接获取数据
* @param url
* @return
*/
public static Map<String,Object> getMiaopaiDataByURL(String url) {
Map<String,String> headerMap = HeadGet.getMiaoPaiByURlHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap);
......
......@@ -7,15 +7,25 @@ import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class QQ {
private static Logger logger = LoggerFactory.getLogger(QQ.class);
private static QQAccountAnalysis qqAccountAnalysis = new QQAccountAnalysis();
public class QQKB {
private static Logger logger = LoggerFactory.getLogger(QQKB.class);
private static QQKBAccountAnalysis qqAccountAnalysis = new QQKBAccountAnalysis();
private static QQKBCommentAnalysis qqkbCommentAnalysis = new QQKBCommentAnalysis();
/**
*
* @Description 采集天天快报历史文章
* @param child
* @param cookie
* @return
*/
public static List<Map<String,Object>> getQQAccountData(String child,String cookie) {
String url = "http://r.cnews.qq.com/getSubNewsIndex";
Map<String,String> headerMap = HeadGet.getQQAccountHeaderMap(cookie);
......@@ -35,7 +45,7 @@ public class QQ {
try {
ids = ids.substring(0,ids.length()-1);
System.out.println(ids);
ZhiWeiTools.sleep(8000);
ZhiWeiTools.sleep(7000);
paramMap.clear();
paramMap = HeadGet.getQQAccountOtherParamMap(ids);
result = HttpClient.executeHttpRequestPost(url, headerMap, paramMap);
......@@ -68,5 +78,87 @@ public class QQ {
}
/**
*
* @Description 获取天天快报评论
* @param cookie
* @param comment_id
* @param article_id
* @return
*/
public static List<Map<String,Object>> getQQKBCommentData(String cookie,String comment_id,String article_id) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie);
try {
Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id);
int i = 1;
while(true) {
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment", headerMap, paramMap);
System.out.println(result);
paramMap.clear();
List<Map<String,Object>> lists = qqkbCommentAnalysis.getCommentData(result,cookie,comment_id, article_id);
if(lists == null || lists.size() < 1) {
break;
}
dataList.addAll(lists);
paramMap = qqkbCommentAnalysis.getParamMap(result,i,comment_id,article_id);
i++;
ZhiWeiTools.sleep(5000);
}
return dataList;
} catch (Exception e) {
logger.error("解析天天快报评论出错",e.getMessage());
return dataList;
}
}
/**
*
* @Description 获取天天快报评论数
* @param cookie
* @param comment_id
* @param article_id
* @return
*/
public static int getCommentCount(String cookie,String comment_id,String article_id) {
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie);
try {
Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id);
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment", headerMap, paramMap);
JSONObject json = JSONObject.parseObject(result);
return json.getJSONObject("comments").getInteger("count");
} catch (Exception e) {
logger.error("解析天天快报评论出错",e.getMessage());
return 0;
}
}
// public static List<Map<String,Object>> getQQKBByWordData(String word,String cookie) {
// List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
// Map<String,String> headerMap = HeadGet.getQQKBByWordHeaderMap(cookie);
// Map<String,Object> paramMap = HeadGet.getQQKBByWordParamMap(word,null,null,0);
// int i = 1;
// try {
// String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/search", headerMap, paramMap);
// System.out.println(result);
// JSONObject json = JSONObject.parseObject(result);
// String sid = json.getString("sid");
// String queryid = json.getString("queryid");
// System.out.println(sid + "================" + queryid);
// while(true) {
// ZhiWeiTools.sleep(5000);
// i++;
// paramMap.clear();
// paramMap = HeadGet.getQQKBByWordParamMap(word, sid, queryid, i);
// result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/searchMore", headerMap, paramMap);
// System.out.println(result);
// }
// } catch (Exception e) {
// logger.error("天天快报关键词采集出错",e.getMessage());
// return dataList;
// }
// }
}
package com.zhiwei.parse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
......@@ -23,6 +22,12 @@ public class Souhu {
private static SouhuAccountAnalysis souhuAccountAnalysis = new SouhuAccountAnalysis();
private static SouhuCommentAnalysis souhuCommentAnalysis = new SouhuCommentAnalysis();
/**
*
* @Description 获取链接评论数
* @param url
* @return
*/
public static int getSouhuCommentCount(String url) {
String newurl = souhuCommentAnalysis.getSouhuURL(url);
int i;
......@@ -70,7 +75,9 @@ public class Souhu {
if(jsonArray.size() < 1) {
break;
}
dataList.addAll(dataList1);
if(startTime == null) {
dataList.addAll(dataList1);
}
//判断时间
if(startTime != null) {
for(Map<String,Object> map : dataList1) {
......@@ -82,6 +89,7 @@ public class Souhu {
dataList.add(map);
}
}
logger.info("=============获取到的数据数目{}",dataList.size());
i++;
ZhiWeiTools.sleep(3000);
} catch (Exception e) {
......
package com.zhiwei.parse;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
......
......@@ -11,8 +11,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class QQAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQAccountAnalysis.class);
public class QQKBAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQKBAccountAnalysis.class);
/**
*
......
......@@ -33,6 +33,7 @@ public class SouhuCommentAnalysis {
source_id = s.split("_")[0];
} catch (Exception e) {
logger.error("链接解析错误",e.getMessage());
return null;
}
String newurl = "http://apiv2.sohu.com/api/comment/list?page_size=10&topic_id="+topic_id+"&source_id=mp_"+source_id;
return newurl;
......
......@@ -7,7 +7,7 @@ import java.util.Map;
import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQ;
import com.zhiwei.parse.QQKB;
public class QQAccountExample {
......@@ -16,7 +16,7 @@ public class QQAccountExample {
String child = "5002744";
String cookie = "phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=00030000db3c2ec2393ea968f523f50144db7ab5aec60e79d2509c271bdacdf784e88ac1f58b7493c23ceb15;%20uin=o0497332654;%20skey=M67MOgvFQJ;%20sigA2=D3046D543D9BA50CFE749D63B1F05AF28A281C29B4F1353374AB7A19D9527497A67E507C6829AE44F67C1EA032C2A3728301D2ABC864DA32BCA7D4C7A61609F9F3BC9AE0A7243003;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmUT_jxJCnY5yVwhmL3e2K5FOTRth6jz8SKVHGseA3v9s8UIZxw00LpF1uC9l7W5WL2trdb69LlCvE1s7twReOw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
List<Map<String,Object>> dataList = QQ.getQQAccountData(child, cookie);
List<Map<String,Object>> dataList = QQKB.getQQAccountData(child, cookie);
System.out.println(dataList.size());
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>();
......
package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Souhu;
......@@ -12,13 +14,19 @@ public class SouhuAccountExample {
@Test
public void souhuAccountTest() {
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("MjQ4MDQ5Nzg2MEBzaW5hLnNvaHUuY29t",null,true);
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("c29odXptdGh5YXRieUBzb2h1LmNvbQ==","2017-01-01 00:00:00",false);
System.out.println(lists.size());
int i = 0;
for(Map<String,Object> map : lists) {
System.out.println(map.toString());
System.out.println(i++);
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("content");
headList.add("url");
headList.add("comment");
headList.add("tags");
headList.add("newsid");
headList.add("newsPv");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\搜狐号历史文章.xlsx", "sasd", headList, lists);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment