Commit a731c54c by yangchen

修改 腾讯新闻关键词采集

parent d555bdda
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.0.3-SNAPSHOT</version> <version>0.0.8-SNAPSHOT</version>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -12,6 +12,8 @@ import java.util.Map; ...@@ -12,6 +12,8 @@ import java.util.Map;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import com.zhiwei.tools.httpclient.HeaderTool;
public class HeadGet { public class HeadGet {
/** /**
...@@ -931,12 +933,23 @@ public class HeadGet { ...@@ -931,12 +933,23 @@ public class HeadGet {
return paramMap; return paramMap;
} }
public static Map<String,Object> getKuaishouParamMap() {
Map<String,Object> paramMap = new HashMap<String, Object>();
paramMap.put("count", 20);
paramMap.put("user_id", "475195458");
paramMap.put("client_key", "56c3713c");
paramMap.put("token", "10e4f33e55c0488e99ae750c5f3d46ff-1032060898");
paramMap.put("sig", "ebd688038026858f30cdde57045996f9");
paramMap.put("__NStokensig", "f768b1f8d0ad8f0491be35c102742b278194faaa41f4ecd25a8f3ae44c7daa0a");
return paramMap;
}
public static void main(String[] args) throws UnsupportedEncodingException { public static void main(String[] args) throws UnsupportedEncodingException {
String url = "https://r.cnews.qq.com/searchByType"; String url = "http://180.186.38.200/rest/n/feed/profile2";
System.out.println(url); System.out.println(url);
String cookie = "luin=o0497332654;%20lskey=00030000d63ffaf7eba88c86106eac5f2910d45515222334b91c75a66b449c990c2be43cd202ba39b35bef60;%20uin=o0497332654;%20skey=MH3wukytS4;%20sigA2=7AB4D8DEDF73E313801FD348FD77EC3B05C06DBC4D9DA669B20CA04A8D6B80F300A69567FBD11A7B799E419BB796F22D47D3AE5FA95E708A0ABC66161061131B0B21A0031AA0807C;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmSQ3EqlyzseC9-qGEFY7Tkr0Ypk5vsnSwOaMC-IGxsBeY2K7knHrYstj_5dZpisJd5nihvLNQvCdsFhFwZQcT8;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"; String cookie = "";
Map<String,String> headerMap = HeadGet.getQQkbUserHeaderMap(cookie); Map<String,String> headerMap = HeaderTool.getCommonHead();
Map<String,Object> paramMap = HeadGet.getQQkbUserParamMap("虎嗅"); Map<String,Object> paramMap = HeadGet.getKuaishouParamMap();
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap); String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
System.out.println(result); System.out.println(result);
System.out.println(result.length()); System.out.println(result.length());
......
...@@ -10,15 +10,20 @@ import org.slf4j.LoggerFactory; ...@@ -10,15 +10,20 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BaijiaAccountAnalysis; import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Request;
public class Baijia { public class Baijia {
private static Logger logger = LoggerFactory.getLogger(Baijia.class); private static Logger logger = LoggerFactory.getLogger(Baijia.class);
private static BaijiaAccountAnalysis baijiaAccountAnalysis = new BaijiaAccountAnalysis(); private static BaijiaAccountAnalysis baijiaAccountAnalysis = new BaijiaAccountAnalysis();
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
...@@ -77,7 +82,8 @@ public class Baijia { ...@@ -77,7 +82,8 @@ public class Baijia {
for(int i = 1;i < 3;i++) { for(int i = 1;i < 3;i++) {
try { try {
String url = "https://author.baidu.com/list?type=article&context={%22offset%22:%22-1_"+n+"%22,%22app_id%22:%22"+app_id+"%22,%22pageSize%22:20}"; String url = "https://author.baidu.com/list?type=article&context={%22offset%22:%22-1_"+n+"%22,%22app_id%22:%22"+app_id+"%22,%22pageSize%22:20}";
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap); Request request = RequestUtils.wrapGet(url, headerMap);
String result = httpBoot.syncCall(request, proxy, false).body().string();
Map<String,Object> dMap = baijiaAccountAnalysis.getBaijiaAccountData3(result,name, startTime); Map<String,Object> dMap = baijiaAccountAnalysis.getBaijiaAccountData3(result,name, startTime);
List<Map<String,Object>> dList = (List<Map<String, Object>>) dMap.get("data"); List<Map<String,Object>> dList = (List<Map<String, Object>>) dMap.get("data");
dataList.addAll(dList); dataList.addAll(dList);
......
...@@ -11,10 +11,11 @@ import java.util.Map; ...@@ -11,10 +11,11 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.HttpClientBuilder;
import com.zhiwei.crawler.core.HttpRequestBuilder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis; import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis;
import com.zhiwei.tools.httpclient.HttpClientBuilder;
import com.zhiwei.tools.httpclient.HttpRequestBuilder;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.util.WordReadFile; import com.zhiwei.util.WordReadFile;
...@@ -25,17 +26,16 @@ import okhttp3.Request; ...@@ -25,17 +26,16 @@ import okhttp3.Request;
public class BiliBili { public class BiliBili {
private static Logger logger = LoggerFactory.getLogger(BiliBili.class); private static Logger logger = LoggerFactory.getLogger(BiliBili.class);
private static HttpBoot httpBoot = new HttpBoot();
public static List<Map<String,Object>> getData(String word,Proxy proxy,String cookie) { public static List<Map<String,Object>> getData(String word,Proxy proxy,String cookie) {
OkHttpClient client = HttpClientBuilder.newInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
try { try {
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&from_source=banner_search"; String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&order=pubdate&duration=0&tids_1=0";
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com"); Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
System.out.println(url);
Request request = HttpRequestBuilder.newGetRequest(url, header); Request request = HttpRequestBuilder.newGetRequest(url, header);
client = client.newBuilder().proxy(proxy).build(); String result = httpBoot.syncCall(request, proxy).body().string();
String result = client.newCall(request).execute().body().string(); // System.out.println(result);
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
Map<String,Object> map = BilibilikeyWordAnalysis.getData(result); Map<String,Object> map = BilibilikeyWordAnalysis.getData(result);
boolean more = (boolean) map.get("more"); boolean more = (boolean) map.get("more");
...@@ -48,7 +48,7 @@ public class BiliBili { ...@@ -48,7 +48,7 @@ public class BiliBili {
map.clear(); map.clear();
String ur = url + "&page=" + n; String ur = url + "&page=" + n;
request = HttpRequestBuilder.newGetRequest(ur, header); request = HttpRequestBuilder.newGetRequest(ur, header);
String result2 = client.newCall(request).execute().body().string(); String result2 = httpBoot.syncCall(request, proxy).body().string();
map = BilibilikeyWordAnalysis.getData(result2); map = BilibilikeyWordAnalysis.getData(result2);
List<Map<String,Object>> dataList2 = (List<Map<String, Object>>) map.get("data"); List<Map<String,Object>> dataList2 = (List<Map<String, Object>>) map.get("data");
if(dataList2 != null) { if(dataList2 != null) {
...@@ -89,7 +89,7 @@ public class BiliBili { ...@@ -89,7 +89,7 @@ public class BiliBili {
headlist.add("title"); headlist.add("title");
headlist.add("url"); headlist.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//bilibili关键词采集数据.xlsx", "B站数据", headlist, bodyList); poi.exportExcel("D://crawlerdata//bilibili关键词采集数据-竹鼠.xlsx", "B站数据", headlist, bodyList);
} }
......
...@@ -48,4 +48,32 @@ public class Maimai { ...@@ -48,4 +48,32 @@ public class Maimai {
return dataList; return dataList;
} }
public static List<Map<String,Object>> getDataByNoName(String key,String cookie,String time,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
boolean f = true;
try {
String url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1";
int i = 20;
while(f) {
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> map = maimaiBywordAnalysis.getDataByNoName(result, time);
f = (boolean) map.get("hasMore");
List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data");
if(daList != null && daList.size() > 0) {
dataList.addAll(daList);
url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"highlight=true&sortby=time&jsononly=1";
i+=20;
logger.info("{}==采集到的数据量=="+dataList.size(),key);
ZhiWeiTools.sleep(2000);
}else {
break;
}
}
} catch (Exception e) {
}
return dataList;
}
} }
...@@ -20,8 +20,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -20,8 +20,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.HistortyBean; import com.zhiwei.bean.HistortyBean;
import com.zhiwei.bean.QQKandianUser; import com.zhiwei.bean.QQKandianUser;
import com.zhiwei.tools.httpclient.HttpClientBuilder; import com.zhiwei.crawler.core.HttpClientBuilder;
import com.zhiwei.tools.httpclient.HttpRequestBuilder; import com.zhiwei.crawler.core.HttpRequestBuilder;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -17,23 +17,26 @@ public class TXNews { ...@@ -17,23 +17,26 @@ public class TXNews {
private static Logger logger = LoggerFactory.getLogger(TXNews.class); private static Logger logger = LoggerFactory.getLogger(TXNews.class);
private static TXNewsByWordAnalysis txNewsByWordAnalysis = new TXNewsByWordAnalysis(); private static TXNewsByWordAnalysis txNewsByWordAnalysis = new TXNewsByWordAnalysis();
public static boolean hasMore = true; public static boolean txNewshasMoreData = true;
public static List<Map<String,Object>> getData(String word,Proxy proxy) { public static List<Map<String,Object>> getData(String word,String devid,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getTxNewspage1HeaderMap(null); Map<String,String> headerMap = HeadGet.getTxNewspage1HeaderMap(null);
Map<String,Object> paramMap = HeadGet.getTxNewspage1ParamMap(word); Map<String,Object> paramMap = HeadGet.getTxNewspage1ParamMap(word);
String result = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC",proxy, headerMap, paramMap); // b3dd1e7d-9d3c-4e75-bf3e-3a76f326ee34
String result = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid="+devid,proxy, headerMap, paramMap);
List<Map<String,Object>> dList = txNewsByWordAnalysis.getData(result); List<Map<String,Object>> dList = txNewsByWordAnalysis.getData(result);
dataList.addAll(dList); dataList.addAll(dList);
int page = 2; int page = 2;
int count = 0; int count = 0;
Map<String,String> header2Map = HeadGet.getTxNewspage2HeaderMap(null); Map<String,String> header2Map = HeadGet.getTxNewspage2HeaderMap(null);
while(hasMore) { while(txNewshasMoreData) {
try { try {
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
//
Map<String,Object> param2Map = HeadGet.getTxNewspagemoreParamMap(word, page); Map<String,Object> param2Map = HeadGet.getTxNewspagemoreParamMap(word, page);
String result2 = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC",proxy, header2Map, param2Map); //6D33F35F-880D-42A6-A23F-881BEC6960EC
String result2 = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=496d3626-9684-45ef-8d22-7a71fbfd22da",proxy, header2Map, param2Map);
page++; page++;
List<Map<String,Object>> dList2 = txNewsByWordAnalysis.getData(result2); List<Map<String,Object>> dList2 = txNewsByWordAnalysis.getData(result2);
dataList.addAll(dList2); dataList.addAll(dList2);
......
...@@ -126,6 +126,7 @@ public class Yidianzixun { ...@@ -126,6 +126,7 @@ public class Yidianzixun {
int i = 0; int i = 0;
while(true) { while(true) {
String url = "http://www.yidianzixun.com/home/q/news_list_for_keyword?display="+URLEncoder.encode(word, "UTF-8")+"&cstart="+i+"&cend="+(i+10)+"&word_type=token"; String url = "http://www.yidianzixun.com/home/q/news_list_for_keyword?display="+URLEncoder.encode(word, "UTF-8")+"&cstart="+i+"&cend="+(i+10)+"&word_type=token";
System.out.println(url);
Map<String,String> headerMap = HeadGet.getYidianzixunWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getYidianzixunWordHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> list = yidianzixunByWordAnalysis.getOnePageData(result); List<Map<String,Object>> list = yidianzixunByWordAnalysis.getOnePageData(result);
......
...@@ -54,11 +54,16 @@ public class BaijiaAccountAnalysis { ...@@ -54,11 +54,16 @@ public class BaijiaAccountAnalysis {
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("items"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("items");
if(json.getJSONObject("data") != null && json.getJSONObject("data").getBoolean("has_more") != null) {
if(json.getJSONObject("data").getBoolean("has_more")) {
more = true;
}
}
for(int i = 0;i < jsonArry.size();i++) { for(int i = 0;i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<>(); Map<String,Object> map = new HashMap<>();
JSONObject data = jsonArry.getJSONObject(i); JSONObject data = jsonArry.getJSONObject(i);
String id = data.getString("article_id"); String id = data.getString("article_id");
int t = data.getInteger("created_at"); int t = data.getInteger("updated_at");
String time = TimeParse.dateFormartString(new Date(t*1000L), "yyyy-MM-dd HH:mm:ss"); String time = TimeParse.dateFormartString(new Date(t*1000L), "yyyy-MM-dd HH:mm:ss");
System.out.println(time); System.out.println(time);
if(startTime != null && startTime.length() > 1) { if(startTime != null && startTime.length() > 1) {
...@@ -80,9 +85,6 @@ public class BaijiaAccountAnalysis { ...@@ -80,9 +85,6 @@ public class BaijiaAccountAnalysis {
map.put("source", name); map.put("source", name);
dataList.add(map); dataList.add(map);
} }
if(json.getJSONObject("data").getBoolean("has_more")) {
more = true;
}
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
......
...@@ -44,7 +44,7 @@ public class BilibilikeyWordAnalysis { ...@@ -44,7 +44,7 @@ public class BilibilikeyWordAnalysis {
map.put("source", source); map.put("source", source);
map.put("submitcount", submitcount); map.put("submitcount", submitcount);
dataList.add(map); dataList.add(map);
// System.out.println(map.toString()); System.out.println(map.toString());
} }
Map<String,Object> rmap = new HashMap<String,Object>(); Map<String,Object> rmap = new HashMap<String,Object>();
rmap.put("more", more); rmap.put("more", more);
......
...@@ -38,6 +38,38 @@ public class MaimaiBywordAnalysis { ...@@ -38,6 +38,38 @@ public class MaimaiBywordAnalysis {
map.put("like", data.getJSONObject("feed").getInteger("likes")); map.put("like", data.getJSONObject("feed").getInteger("likes"));
map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt")); map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数 map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数
System.out.println(map.toString());
dataList.add(map);
}
map1.put("data", dataList);
map1.put("hasMore", f);
return map1;
}
public Map<String,Object> getDataByNoName(String result,String time) {
Map<String,Object> map1 = new HashMap<String,Object>();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("gossips");
boolean f = true;
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
f = json.getJSONObject("data").getInteger("more")==1?true:false;
for(int i = 0;i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>();
String url = "https://maimai.cn/web/gossip_detail?encode_id="+data.getJSONObject("gossip").getString("encode_id");
String atime = data.getJSONObject("gossip").getString("crtime_string");
if(time.compareTo(atime) > -1) {
f = false;
continue;
}
map.put("time", atime);
map.put("url", url);
map.put("text", data.getJSONObject("gossip").getString("text"));
map.put("name", data.getJSONObject("gossip").getString("username"));
map.put("like", data.getJSONObject("gossip").getInteger("likes"));
map.put("comment_count", data.getJSONObject("gossip").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("gossip").getInteger("search_order")); //传播数
System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
map1.put("data", dataList); map1.put("data", dataList);
......
...@@ -21,9 +21,9 @@ public class TXNewsByWordAnalysis { ...@@ -21,9 +21,9 @@ public class TXNewsByWordAnalysis {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
JSONArray jsonArry = json.getJSONArray("secList"); JSONArray jsonArry = json.getJSONArray("secList");
if(json.getInteger("hasMore") == 1) { if(json.getInteger("hasMore") == 1) {
TXNews.hasMore = true; TXNews.txNewshasMoreData = true;
}else { }else {
TXNews.hasMore = false; TXNews.txNewshasMoreData = false;
} }
for(int i = 0; i < jsonArry.size();i++) { for(int i = 0; i < jsonArry.size();i++) {
JSONObject js = jsonArry.getJSONObject(i); JSONObject js = jsonArry.getJSONObject(i);
...@@ -40,7 +40,7 @@ public class TXNewsByWordAnalysis { ...@@ -40,7 +40,7 @@ public class TXNewsByWordAnalysis {
map.put("id", js2.getString("id")); map.put("id", js2.getString("id"));
map.put("url", js2.getString("url")); map.put("url", js2.getString("url"));
dataList.add(map); dataList.add(map);
// System.out.println(map.toString()); System.out.println(map.toString());
} catch (Exception e) { } catch (Exception e) {
logger.error("采集出错:{}",e.getMessage()); logger.error("采集出错:{}",e.getMessage());
System.out.println(js2.toString()); System.out.println(js2.toString());
......
...@@ -60,15 +60,16 @@ public class BaijiaAccountExample { ...@@ -60,15 +60,16 @@ public class BaijiaAccountExample {
public void test3() { public void test3() {
String path = "D://crawlerdata//自媒体/百家号采集.xlsx"; String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
String startTime = "2018-01-01 00:00:00"; String startTime = "2018-05-01 00:00:00";
Map<String,Object> map = poi.importExcel(path, 0); Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body"); List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
for(Map<String,Object> m : list) { for(Map<String,Object> m : list) {
try { try {
String app_id = m.get("id").toString(); String app_id = m.get("id").toString();
app_id = "1563725611969509";
String name = m.get("name").toString(); String name = m.get("name").toString();
String cookie = "BAIDUID=BA1090A5857735165A2A419CBA37957A:FG=1"; String cookie = "__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5";
List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null); List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
if(lists != null) { if(lists != null) {
bodyList.addAll(lists); bodyList.addAll(lists);
...@@ -83,7 +84,7 @@ public class BaijiaAccountExample { ...@@ -83,7 +84,7 @@ public class BaijiaAccountExample {
headList.add("url"); headList.add("url");
headList.add("content"); headList.add("content");
headList.add("read_amount"); headList.add("read_amount");
poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj.xlsx", "娱乐资本论", headList, bodyList); poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
} }
} }
...@@ -12,7 +12,7 @@ public class DayuByWordExample { ...@@ -12,7 +12,7 @@ public class DayuByWordExample {
@Test @Test
public void dayuByWordTest() { public void dayuByWordTest() {
String word = "沃尔玛"; String word = "11";
List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null); List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null);
......
...@@ -10,13 +10,14 @@ import com.zhiwei.parse.Maimai; ...@@ -10,13 +10,14 @@ import com.zhiwei.parse.Maimai;
public class MaimaiBywordExample { public class MaimaiBywordExample {
public static void main(String[] args) { public static void main(String[] args) {
String word = "小米 上市|小米 IPO|雷军 IPO|小米 招股书|雷军 上市"; String word = "美团 晋升";
String cookie = "sessionid=njbswswdrvwf4vpg0836xu6m7ve4ziso; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4bGRIdEx4bVhwZBB0ZHwVDWEtMS3kKExkbBBMfGRkEGgQcHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1526952692556; token=\"nv0ZM3AICKHOmB1sdBi2QrvA0fFDgtRwdZJV+DzF3KsZdPIsvD1I2HOdRVyurjQi8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0Ijoid2s0MWRLbDBtWlFwTlJoWmdwc1JUZHR2IiwiX2V4cGlyZSI6MTUyNzAzOTEwMzE5MiwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=ssvF7IeeQYlwCjdh8GaY3mhr0SY"; String cookie = "sessionid=y87knknqrc3fi6xto2zv0s4kugmleepk; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4fHhwcGhgbVhwZBB0ZHwVDWEtMS3kKGhobBB0THhkEGgQTHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1539933372113; token=\"ZTjnEij9jsL4ZCdnKF2CaUAwcJHgcem/zHvAbXp3MXdY+uSPva8scjbe2zHl2gE98CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiSFVMLVhKb2g5TkJGNHRJanljUW5Qa1V5IiwiX2V4cGlyZSI6MTU0MDAxOTc5MTUwNSwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=dJmy52LHX-stqroAbm66u2zJaZA";
String time = "2018-05-01 00:00:00"; String time = "2018-10-15 00:00:00";
String[] words = word.split("\\|"); String[] words = word.split("\\|");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null); List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
// List<Map<String,Object>> c = Maimai.getDataByNoName(w, cookie, time, null);
bodyList.addAll(c); bodyList.addAll(c);
} }
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
...@@ -28,7 +29,7 @@ public class MaimaiBywordExample { ...@@ -28,7 +29,7 @@ public class MaimaiBywordExample {
headList.add("comment_count"); headList.add("comment_count");
headList.add("spreads"); headList.add("spreads");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-1.xlsx", "脉脉关键词", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团 晋升-1015.xlsx", "脉脉关键词", headList, bodyList);
} }
} }
...@@ -11,7 +11,8 @@ public class TXNewsByWordExample { ...@@ -11,7 +11,8 @@ public class TXNewsByWordExample {
public static void main(String[] args) { public static void main(String[] args) {
String word = "唐嫣"; String word = "唐嫣";
List<Map<String,Object>> dataList = TXNews.getData(word,null); String devid = "6D33F35F-880D-42A6-A23F-881BEC6960EC";
List<Map<String,Object>> dataList = TXNews.getData(word,devid,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
...@@ -20,7 +21,7 @@ public class TXNewsByWordExample { ...@@ -20,7 +21,7 @@ public class TXNewsByWordExample {
headList.add("url"); headList.add("url");
headList.add("id"); headList.add("id");
headList.add("source"); headList.add("source");
poi.exportExcel("D://crawlerdata/腾讯新闻-唐嫣.xlsx", "腾讯新闻数据", headList, dataList); poi.exportExcel("D://crawlerdata/腾讯新闻-唐嫣-1.xlsx", "腾讯新闻数据", headList, dataList);
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment