Commit d555bdda by yangchen

qq看点 百家号采集等更新

parent 17c44df3
......@@ -3,12 +3,17 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId>
<version>0.0.1-SNAPSHOT</version>
<version>0.0.2-SNAPSHOT</version>
<name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
<dependencies>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.14.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
......@@ -19,11 +24,6 @@
<version>1.2.29</version>
</dependency>
<dependency>
<groupId>com.zhiwei</groupId>
<artifactId>zhiweiTools</artifactId>
<version>0.0.6-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
......@@ -33,6 +33,11 @@
<artifactId>excelpoi</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.0.3-SNAPSHOT</version>
</dependency>
</dependencies>
<!-- 打包管理 -->
......
package com.zhiwei.bean;
import java.util.Date;
public class HistortyBean {
private String url;
private String title;
private Date time;
private String content;
private String source;
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "HistortyBean [url=" + url + ", title=" + title
+ ", time=" + time + ", content=" + content + ", source="
+ source + "]";
}
}
package com.zhiwei.bean;
public class QQKandianUser {
private String url;
private String name;
private String id;
private String desc;
private boolean verify;
public boolean isVerify() {
return verify;
}
public void setVerify(boolean verify) {
this.verify = verify;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
@Override
public String toString() {
return "QQKandianUser [url=" + url + ", name=" + name + ", id=" + id
+ ", desc=" + desc + ", verify=" + verify + "]";
}
}
package com.zhiwei.bean;
public class QQkbUser {
private String id;
private String name;
private String desc;
private Integer guanzhu;
private String url;
private String img_url;
private Integer vip;
public Integer getVip() {
return vip;
}
public void setVip(Integer vip) {
this.vip = vip;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public Integer getGuanzhu() {
return guanzhu;
}
public void setGuanzhu(Integer guanzhu) {
this.guanzhu = guanzhu;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getImg_url() {
return img_url;
}
public void setImg_url(String img_url) {
this.img_url = img_url;
}
}
......@@ -877,12 +877,67 @@ public class HeadGet {
return headerMap;
}
public static Map<String,String> getToutiaoHeaderMap(String cookie) {
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36");
headerMap.put("Accept-Language", "zh-cn");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "is.snssdk.com");
headerMap.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,String> getQQkbUserHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-Hans-CN;q=1");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "r.cnews.qq.com");
headerMap.put("Referer", "http://r.cnews.qq.com/inews/iphone/");
headerMap.put("devid", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
headerMap.put("sngn", "rl3bDHe6ylqkTnZgjMNwyqay3cvYlIEt38n%2Fd3Wssv%2Bmik9D%2Bt06p7slposTw7oT%0D%0ArkVovLykCt4CIur9VZ8nAj91G7%2FH%2BZArRk3MSQrW0HFPGRu39PLQy27HWirnn2aF");
headerMap.put("idfv", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
headerMap.put("deviceToken", "<585bee8d f6739b65 1248b40d 7be9dc4a 126bbf27 85ad470e ce6b7923 bbcb7c1c>");
headerMap.put("qn-rid", "7E351889-6E94-4246-89BF-219458B7964C");
headerMap.put("qn-sig", "5CB3DEE05F8CA7C76755D1AB53599587");
headerMap.put("omgbizid", "5144dee3f39a8d4dad994e5391fcebd1a0d50090112b14");
headerMap.put("omgid", "0f63f8e68f041746372b9ceecc8e97f028e90010112b14");
headerMap.put("idfa", "FE659B7E-5104-44C2-8A31-F88DEE7A2747");
headerMap.put("appver", "11.2.1_qnreading_4.8.30");
headerMap.put("devid", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,Object> getQQkbUserParamMap(String word) {
Map<String,Object> paramMap = new HashMap<String, Object>();
paramMap.put("query", word);
paramMap.put("curTab", "kuaibao");
paramMap.put("curChannel", "kb_news_young1");
paramMap.put("source", "search_history");
paramMap.put("type", "media");
return paramMap;
}
public static void main(String[] args) throws UnsupportedEncodingException {
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_6452&page=1&pagesize=20&tag=article&uid=fe659b7e510444c28a31f88dee7a2747";
String url = "https://r.cnews.qq.com/searchByType";
System.out.println(url);
String cookie = "__utma=51854390.454838676.1510118174.1528502507.1529542643.327; __utmb=51854390.0.10.1529542643; __utmc=51854390; __utmv=51854390.010--; __utmz=51854390.1510118174.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _zap=4e09de9f-d212-48b9-af42-9173e1092406; d_c0=ACACkXJlZQxLBTcU1Z70bp9TpD_qDHF6sAY=|1529542618; q_c0=2|1:0|10:1528360082|4:q_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|c6f0a98f0d30bdfda124c276e9e88ce945dcf50103663cedeab1be9415a42101; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000; z_c0=2|1:0|10:1528360082|4:z_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|021aae122258a6476003fd206423140b8c2ee162e406b628051471c64021b211; zst_82=1.0ANCm7PYOyA0LAAAASwUAADEuMDv9KlsAAAAAYiqmK0gPDsX0FVknWJlXcKc07NA=; __DAYU_PP=VFZF3fmJavniAMQf2fnf23a6dd5221ec; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000";
Map<String,String> headerMap = HeadGet.getFenghuangAccountHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url,null, headerMap);
String cookie = "luin=o0497332654;%20lskey=00030000d63ffaf7eba88c86106eac5f2910d45515222334b91c75a66b449c990c2be43cd202ba39b35bef60;%20uin=o0497332654;%20skey=MH3wukytS4;%20sigA2=7AB4D8DEDF73E313801FD348FD77EC3B05C06DBC4D9DA669B20CA04A8D6B80F300A69567FBD11A7B799E419BB796F22D47D3AE5FA95E708A0ABC66161061131B0B21A0031AA0807C;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmSQ3EqlyzseC9-qGEFY7Tkr0Ypk5vsnSwOaMC-IGxsBeY2K7knHrYstj_5dZpisJd5nihvLNQvCdsFhFwZQcT8;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
Map<String,String> headerMap = HeadGet.getQQkbUserHeaderMap(cookie);
Map<String,Object> paramMap = HeadGet.getQQkbUserParamMap("虎嗅");
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
System.out.println(result);
System.out.println(result.length());
}
......
......@@ -7,7 +7,7 @@ import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
public class HttpClient {
private static Logger logger = LoggerFactory.getLogger(HttpClient.class);
......
......@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.AiqiyiByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Aiqiyi {
private static Logger logger = LoggerFactory.getLogger(Aiqiyi.class);
......
......@@ -13,7 +13,8 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Baijia {
private static Logger logger = LoggerFactory.getLogger(Baijia.class);
......@@ -60,6 +61,46 @@ public class Baijia {
/**
*
* @Description 获取百家号第三种方法
* @param app_id
* @param startTime
* @param proxy
* @return
*/
public static List<Map<String,Object>> getBaijiaAccountByBaiduData(String app_id,String name,String startTime,String cookie,Proxy proxy) {
Map<String,String> headerMap = HeaderTool.getCommonHead();
List<Map<String,Object>> dataList = new ArrayList<>();
headerMap.put("cookie",cookie);
boolean f = true;
int n = 0;
while(f) {
for(int i = 1;i < 3;i++) {
try {
String url = "https://author.baidu.com/list?type=article&context={%22offset%22:%22-1_"+n+"%22,%22app_id%22:%22"+app_id+"%22,%22pageSize%22:20}";
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> dMap = baijiaAccountAnalysis.getBaijiaAccountData3(result,name, startTime);
List<Map<String,Object>> dList = (List<Map<String, Object>>) dMap.get("data");
dataList.addAll(dList);
logger.info("{} 数据采集结果 {}",name, dataList.size());
if(!(boolean) dMap.get("more")) {
f = false;
}
ZhiWeiTools.sleep(3000);
n += 20;
break;
} catch (Exception e) {
e.printStackTrace();
ZhiWeiTools.sleep(3000);
continue;
}
}
}
return dataList;
}
/**
*
* @Description 百家号历史文章采集
* @param app_id
* @param startTime
......
package com.zhiwei.parse;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis;
import com.zhiwei.tools.httpclient.HttpClientBuilder;
import com.zhiwei.tools.httpclient.HttpRequestBuilder;
import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.util.WordReadFile;
import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
public class BiliBili {
private static Logger logger = LoggerFactory.getLogger(BiliBili.class);
public static List<Map<String,Object>> getData(String word,Proxy proxy,String cookie) {
OkHttpClient client = HttpClientBuilder.newInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
try {
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&from_source=banner_search";
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
System.out.println(url);
Request request = HttpRequestBuilder.newGetRequest(url, header);
client = client.newBuilder().proxy(proxy).build();
String result = client.newCall(request).execute().body().string();
ZhiWeiTools.sleep(3000);
Map<String,Object> map = BilibilikeyWordAnalysis.getData(result);
boolean more = (boolean) map.get("more");
List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("data");
if(dataList != null) {
bodyList.addAll(dataList);
}
int n = 2;
while(more) {
map.clear();
String ur = url + "&page=" + n;
request = HttpRequestBuilder.newGetRequest(ur, header);
String result2 = client.newCall(request).execute().body().string();
map = BilibilikeyWordAnalysis.getData(result2);
List<Map<String,Object>> dataList2 = (List<Map<String, Object>>) map.get("data");
if(dataList2 != null) {
bodyList.addAll(dataList2);
}
System.out.println(n + "页,数据总量为 -- " + bodyList.size() );
more = (boolean) map.get("more");
n++;
ZhiWeiTools.sleep(3000);
}
return bodyList;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for(String word : wordList) {
List<Map<String,Object>> dataList = BiliBili.getData(word, null,cookie);
if(dataList != null) {
System.out.println(word + " ----- " + dataList.size());
bodyList.addAll(dataList);
}
}
List<String> headlist = new ArrayList<String>();
headlist.add("submitcount");
headlist.add("playcount");
headlist.add("time");
headlist.add("source");
headlist.add("title");
headlist.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//bilibili关键词采集数据.xlsx", "B站数据", headlist, bodyList);
}
}
......@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DayuAccountAnalysis;
import com.zhiwei.parse.analysis.DayuByWordAnalysis;
import com.zhiwei.parse.analysis.DayuCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Dayu {
private static Logger logger = LoggerFactory.getLogger(Dayu.class);
......
......@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
......@@ -12,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DouyinHotDataAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Douyin {
......
......@@ -14,7 +14,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.FenghuangAccountAnalysis;
import com.zhiwei.parse.analysis.FenghuangByWordAnalysis;
import com.zhiwei.parse.analysis.FenghuangCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Fenghuang {
private static Logger logger = LoggerFactory.getLogger(Fenghuang.class);
......
......@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Maimai {
......
......@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MeipaiByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Meipai {
private static Logger logger = LoggerFactory.getLogger(Meipai.class);
......
......@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.PearVideoByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class PearVideo {
private static Logger logger = LoggerFactory.getLogger(PearVideo.class);
......
......@@ -8,12 +8,14 @@ import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.QQkbUser;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKB {
private static Logger logger = LoggerFactory.getLogger(QQKB.class);
......@@ -131,6 +133,44 @@ public class QQKB {
}
}
public static QQkbUser getUserData(String name) {
try {
String url = "https://r.cnews.qq.com/searchByType";
String cookie = "luin=o0497332654;%20lskey=00030000d63ffaf7eba88c86106eac5f2910d45515222334b91c75a66b449c990c2be43cd202ba39b35bef60;%20uin=o0497332654;%20skey=MH3wukytS4;%20sigA2=7AB4D8DEDF73E313801FD348FD77EC3B05C06DBC4D9DA669B20CA04A8D6B80F300A69567FBD11A7B799E419BB796F22D47D3AE5FA95E708A0ABC66161061131B0B21A0031AA0807C;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmSQ3EqlyzseC9-qGEFY7Tkr0Ypk5vsnSwOaMC-IGxsBeY2K7knHrYstj_5dZpisJd5nihvLNQvCdsFhFwZQcT8;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
Map<String,String> headerMap = HeadGet.getQQkbUserHeaderMap(cookie);
Map<String,Object> paramMap = HeadGet.getQQkbUserParamMap(name);
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
JSONObject json = JSONObject.parseObject(result);
JSONObject json1 = json.getJSONObject("new_list");
JSONObject json2 = json1.getJSONArray("data").getJSONObject(0);
JSONObject json3 = json2.getJSONObject("channellist");
JSONArray jsonArry = json3.getJSONArray("media");
for(int i = 0;i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
String na = data.getString("chlname");
if(name.equals(na)) {
QQkbUser qqQkbUser = new QQkbUser();
String id = data.getString("chlid");
String desc = data.getString("desc");
String img_url = data.getString("sicon");
int guanzhu = data.getIntValue("subCount");
int vip = data.getIntValue("vip");
qqQkbUser.setDesc(desc);
qqQkbUser.setGuanzhu(guanzhu);
qqQkbUser.setId(id);
qqQkbUser.setImg_url(img_url);
qqQkbUser.setName(name);
qqQkbUser.setUrl("https://kuaibao.qq.com/s/MEDIANEWSLIST?chlid="+id);
qqQkbUser.setVip(vip);
return qqQkbUser;
}
}
} catch (Exception e) {
}
return null;
}
/**
*
* @Description 获取cid
......
package com.zhiwei.parse;
import java.io.IOException;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.HistortyBean;
import com.zhiwei.bean.QQKandianUser;
import com.zhiwei.tools.httpclient.HttpClientBuilder;
import com.zhiwei.tools.httpclient.HttpRequestBuilder;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
public class QQKandian {
private static Logger logger = LoggerFactory.getLogger(QQKandian.class);
public List<QQKandianUser> getUser(String name,Proxy proxy) {
if(name != null && name.length() > 0) {
List<QQKandianUser> dataList = new ArrayList<QQKandianUser>();
OkHttpClient okhttp = HttpClientBuilder.newInstance();
Map<String,String> map = new HashMap<String,String>();
map.put("Host", "sou.qq.com");
map.put("Referer", "https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E7%9F%A5%E5%90%8D%E5%A4%A7V%E7%AB%A0%E6%96%87%E6%B6%89%E6%80%A7%E4%BE%B5");
map.put("Cookie", "skey=MUzU7gdtRz; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6");
try {
//https://sou.qq.com/cgi-bin/kandian/tab_search?key_word=%E9%98%BF%E9%87%8C&business=64&page_size=20&cookie=&Group_masks=1003&bkn=1215238072
String url = "https://sou.qq.com/cgi-bin/kandian/tab_search?key_word="+URLEncoder.encode(name, "utf-8")+"&business=64&page_size=20&cookie=&Group_masks=1003&bkn=1215238072";
Request request = HttpRequestBuilder.newGetRequest(url, Headers.of(map));
okhttp = okhttp.newBuilder().proxy(proxy).build();
String result = okhttp.newCall(request).execute().body().string();
System.out.println(result);
JSONObject json = JSONObject.parseObject(result);
JSONObject json2 = json.getJSONObject("result").getJSONArray("item_groups").getJSONObject(0);
JSONArray jsonArray = json2.getJSONArray("result_items");
for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
if(name.equals(data.getString("name"))) {
QQKandianUser kandianUser = new QQKandianUser();
if(data.getString("layout_content")!=null) {
String m = data.getString("layout_content");
JSONObject m1 = JSONObject.parseObject(m);
kandianUser.setVerify(m1.getBoolean("verify"));
kandianUser.setDesc(m1.getString("secondLineText"));
}
String nam = data.getString("name");
String ur = data.getString("jmp_url");
String id = data.getString("result_id");
System.out.println(data.toString());
kandianUser.setId(id);
kandianUser.setName(nam);
kandianUser.setUrl(ur);
dataList.add(kandianUser);
}
}
return dataList;
} catch (Exception e) {
return null;
}
}
return null;
}
public List<HistortyBean> getHistoryData(String uid,Proxy proxy) {
String url = "https://kandian.qq.com/cgi-bin/social/getHomePage?uin="+uid+"&pageNo=1&pageSize=10&pageCookies=&is715=1&isInQQ=1&g_tk=1066845421&bkn=1066845421&_="+new Date().getTime();
List<HistortyBean> dataList = new ArrayList<HistortyBean>();
OkHttpClient okhttp = HttpClientBuilder.newInstance();
Map<String,String> map = new HashMap<String,String>();
map.put("Host", "kandian.qq.com");
map.put("Referer", "https://kandian.qq.com/mqq/vue/main?_wv=10145&_bid=2378&adfrom=search&x5PreFetch=1&accountId=MjY2MTY0MjM4Ng%3D%3D");
map.put("Cookie", "skey=MQmBo5A1N7; uin=o0497332654; pgv_pvid=8990378504");
try {
okhttp = okhttp.newBuilder().proxy(proxy).build();
while(true) {
try {
Request request = HttpRequestBuilder.newGetRequest(url, Headers.of(map));
String result = okhttp.newCall(request).execute().body().string();
JSONObject json = JSONObject.parseObject(result).getJSONObject("result");
JSONArray jsonArray = json.getJSONArray("articleinfos");
for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
String ur = data.getString("articleurl");
HistortyBean history = getOnhistoryData(ur);
if(history != null) {
dataList.add(history);
}
ZhiWeiTools.sleep(1500);
}
String pageCookies = json.getString("pageCookies");
String pacs = request.url().queryParameter("pageCookies");
int pageno = Integer.valueOf(request.url().queryParameter("pageNo"));
url = request.url().toString().replace("pageNo="+pageno, "pageNo="+(pageno+1)).replace("&pageCookies="+pacs, "&pageCookies="+pageCookies);
ZhiWeiTools.sleep(5000);
} catch (Exception e) {
break;
}
}
return dataList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static HistortyBean getOnhistoryData(String url) {
OkHttpClient okhttp = HttpClientBuilder.newInstance();
Request request = HttpRequestBuilder.newGetRequest(url, Headers.of("Host","post.mp.qq.com"));
try {
HistortyBean histortyBean = new HistortyBean();
String result = okhttp.newCall(request).execute().body().string();
Date date = getTime(result);
String source = getSource(result);
if(date != null && source != null) {
Document doc = Jsoup.parse(result);
String content = doc.select("div#main-content").select("section").text();
String title = doc.select("meta[itemprop=name]").attr("content");
histortyBean.setSource(source);
histortyBean.setTime(date);
histortyBean.setTitle(title);
histortyBean.setContent(content);
histortyBean.setUrl(url);
return histortyBean;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
public List<HistortyBean> getDataByword(String word,Proxy proxy) {
List<HistortyBean> dataList = new ArrayList<HistortyBean>();
OkHttpClient okhttp = HttpClientBuilder.newInstance();
Map<String,String> map = new HashMap<String,String>();
map.put("Host", "sou.qq.com");
map.put("Referer", "https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E8%BF%AA%E4%B8%BD%E7%83%AD%E5%B7%B4%E9%82%93%E4%BC%A6%E7%94%B5%E6%A2%AF%E5%90%BB");
map.put("Cookie", "skey=MU7zbaRXu8; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6");
try {
String url = "https://sou.qq.com/cgi-bin/kandian/unite_search?key_word="+URLEncoder.encode(word, "utf-8")+"&business=64&page_size=20&cookie=&bkn=2000031506";
//https://sou.qq.com/cgi-bin/kandian/unite_search?key_word=%E5%94%90%E5%AB%A3&business=64&page_size=20&cookie=&bkn=2000031506
List<String> urlList = new ArrayList<String>();
int count = 0;
while(true) {
try {
okhttp = okhttp.newBuilder().proxy(proxy).build();
Request request = HttpRequestBuilder.newGetRequest(url, Headers.of(map));
String result = okhttp.newCall(request).execute().body().string();
JSONObject json = JSONObject.parseObject(result).getJSONObject("result");
JSONArray jsonArray = json.getJSONArray("item_groups");
count = urlList.size();
for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
String type = data.getString("group_name");
if("视频".equals(type) || "小视频".equals(type) || "相关搜索".equals(type) || "话题".equals(type)) {
}else {
JSONObject da = data.getJSONArray("result_items").getJSONObject(0);
String title = da.getString("name");
String ur = da.getString("jmp_url");
if(!urlList.contains(ur.split("\\?")[0])) {
urlList.add(ur.split("\\?")[0]);
String extension = da.getString("extension");
JSONObject obj = JSONObject.parseObject(extension);
String time = obj.getString("create_time");
String content = obj.getString("content");
if(content == null) {
content = obj.getString("brief");
}
String source = obj.getString("from");
HistortyBean histortyBean = new HistortyBean();
System.out.println(title + " -- " + ur.split("\\?")[0]);
histortyBean.setTime(TimeParse.stringFormartDate(time+"000"));
histortyBean.setContent(content);
histortyBean.setTitle(title);
histortyBean.setSource(source);
histortyBean.setUrl(ur);
dataList.add(histortyBean);
}
}
}
if(count == urlList.size()) {
break;
}
String pageCookies = json.getString("cookie");
String pacs = request.url().queryParameter("cookie");
url = request.url().toString().replace("&cookie="+pacs, "&cookie="+pageCookies);
ZhiWeiTools.sleep(3000);
} catch (Exception e) {
break;
}
}
return dataList;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
*
* @Description 获取时间
* @param result
* @return
*/
private static Date getTime(String result) {
Pattern pa = Pattern.compile("data-timestamp=\"(.*?)\"");
Matcher ma = pa.matcher(result);
while(ma.find()) {
String t = ma.group(0);
t = t.split("ata-timestamp=\"")[1].split("\"")[0];
return TimeParse.stringFormartDate(t+"000");
}
return null;
}
/**
*
* @Description 获取来源
* @param result
* @return
*/
private static String getSource(String result) {
Pattern pa = Pattern.compile("ata-author=\"(.*?)\"");
Matcher ma = pa.matcher(result);
while(ma.find()) {
String t = ma.group(0);
t = t.split("ata-author=\"")[1].split("\"")[0];
return t;
}
return null;
}
}
......@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SoKuByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Soku {
private static Logger logger = LoggerFactory.getLogger(Soku.class);
......
......@@ -19,7 +19,7 @@ import org.jsoup.select.Elements;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SouBao {
......@@ -30,7 +30,7 @@ public class SouBao {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String word : wordList) {
ZhiWeiTools.sleep(2000);
List<Map<String,Object>> dataList = getData(word, cookie,"2018-06-11","2018-06-12");
List<Map<String,Object>> dataList = getData(word, cookie,"2017-12-27","2018-06-27");
for(Map<String,Object> map : dataList) {
boolean f = false;
String time = (String) map.get("time");
......
......@@ -17,8 +17,8 @@ import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
import com.zhiwei.parse.analysis.SouhuCommentAnalysis;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Souhu {
private static Logger logger = LoggerFactory.getLogger(Souhu.class);
......@@ -61,6 +61,8 @@ public class Souhu {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null);
boolean f = true;
int j = 0;
while(f) {
try {
String url = "http://mp.sohu.com/apiV2/profile/newsListAjax?xpt="+xpt+"&pageNumber="+i+"&pageSize=10";
......@@ -99,7 +101,12 @@ public class Souhu {
i++;
ZhiWeiTools.sleep(3000);
} catch (Exception e) {
ZhiWeiTools.sleep(3000);
logger.error("出错了",e.getMessage());
j++;
if(j > 5) {
f = false;
}
continue;
}
}
......
......@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.TXNewsByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class TXNews {
......
package com.zhiwei.parse;
import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Toutiao {
private static Logger logger = LoggerFactory.getLogger(Toutiao.class);
private static ToutiaoKeyWordAnalysis toutiaoKeyWordAnalysis = new ToutiaoKeyWordAnalysis();
public static List<Map<String,Object>> getKeyWordData(String word,Proxy proxy,String devoid) {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getToutiaoHeaderMap(null);
String url = null;
try {
url = "http://is.snssdk.com/api/2/wap/search_content/?from=news&keyword="+URLEncoder.encode(word, "utf-8")+"&cur_tab_title=search_tab&device_id="+devoid+"&format=json&offset=";
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
for(int i = 0;i < 30;i+=10) {
System.out.println(url+i);
String result = HttpClient.executeHttpRequestGet(url+i, proxy, headerMap);
List<Map<String,Object>> list = toutiaoKeyWordAnalysis.getData(result);
if(list != null) {
if(list.size() < 1) {
break;
}
bodyList.addAll(list);
}
ZhiWeiTools.sleep(5000);
}
return bodyList;
}
}
......@@ -9,13 +9,12 @@ import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.WangyiCommentAnalysis;
import com.zhiwei.parse.analysis.WangyiHistoryAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Wangyi {
private static Logger logger = LoggerFactory.getLogger(Wangyi.class);
......@@ -79,8 +78,9 @@ public class Wangyi {
String source = Jsoup.parse(result).select("body > div.colum_wrap.fl > div > div.colum_des > div.normal > div.colum_info > h4").text();
boolean f = true;
url = "http://dy.163.com/v2/article/list.do?wemediaId="+wemediaid+"&size=20&pageNo=";
int i = 20;
int i = 1;
ZhiWeiTools.sleep(1000);
int j = 0;
while(f) {
try {
result = "";
......@@ -100,6 +100,11 @@ public class Wangyi {
i++;
} catch (Exception e) {
ZhiWeiTools.sleep(1000);
logger.info("出错 {}",e.getMessage());
j++;
if(j > 5) {
f = false;
}
continue;
}
}
......
......@@ -15,8 +15,8 @@ import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.XiguaAccountAnalysis;
import com.zhiwei.parse.analysis.XiguaByWordAnalysis;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class XiGua {
private static Logger logger = LoggerFactory.getLogger(XiGua.class);
......
......@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.XiaomiShequByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Xiaomi {
private static Logger logger = LoggerFactory.getLogger(Xiaomi.class);
......
......@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.YidianzixunAccountAnalysis;
import com.zhiwei.parse.analysis.YidianzixunByWordAnalysis;
import com.zhiwei.parse.analysis.YidianzixunCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Yidianzixun {
private static Logger logger = LoggerFactory.getLogger(Yidianzixun.class);
......
......@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class AiqiyiByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class);
......
......@@ -2,6 +2,7 @@ package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
......@@ -15,8 +16,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaijiaAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(BaijiaAccountAnalysis.class);
......@@ -41,13 +42,82 @@ public class BaijiaAccountAnalysis {
/**
*
* @Description 第三种解析
* @param result
* @param startTime
* @return
*/
public Map<String,Object> getBaijiaAccountData3(String result,String name,String startTime) {
List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,Object> rmap = new HashMap<>();
boolean more = false;
try {
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("items");
for(int i = 0;i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<>();
JSONObject data = jsonArry.getJSONObject(i);
String id = data.getString("article_id");
int t = data.getInteger("created_at");
String time = TimeParse.dateFormartString(new Date(t*1000L), "yyyy-MM-dd HH:mm:ss");
System.out.println(time);
if(startTime != null && startTime.length() > 1) {
if(time.compareTo(startTime) < 1) {
more = false;
continue;
}
}
map.put("title", data.getString("title"));
String url = data.getString("url");
if(url == null) {
url = "https://baijia.baidu.com/s?old_id=" + id;
}
map.put("content", ZhiWeiTools.delHTMLTag(getContent3(data)));
map.put("read_amount", data.getString("read_amount")==null?0:data.getString("read_amount"));
map.put("app_id", data.getString("app_id"));
map.put("time", time);
map.put("url", url);
map.put("source", name);
dataList.add(map);
}
if(json.getJSONObject("data").getBoolean("has_more")) {
more = true;
}
} catch (Exception e) {
e.printStackTrace();
}
rmap.put("data", dataList);
rmap.put("more", more);
return rmap;
}
private String getContent3(JSONObject data) {
try {
JSONObject cdata = data.getJSONObject("content");
JSONArray jsonArry = cdata.getJSONArray("items");
StringBuffer sb = new StringBuffer();
for(int i = 0;i < jsonArry.size();i++) {
JSONObject da = jsonArry.getJSONObject(i);
if("text".equals(da.getString("type"))) {
sb.append(da.getString("data"));
}
}
return sb.toString();
} catch (Exception e) {
return "";
}
}
/**
*
* @Description 解析一页历史文章
* @param result
* @param startTime
* @return
*/
public List<Map<String,Object>> getBaijiaAccountData(String result,String startTime) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
List<Map<String,Object>> dataList = new ArrayList<>();
try {
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONObject("WriterArticleList").getJSONArray("items");
......@@ -67,7 +137,6 @@ public class BaijiaAccountAnalysis {
if(url == null) {
url = "https://baijia.baidu.com/s?old_id=" + id;
}
// map.put("content", getBaijiaContent(url));
map.put("content", data.getString("abstract"));
map.put("read_amount", data.getString("read_amount")==null?0:data.getString("read_amount"));
map.put("app_id", data.getString("app_id"));
......
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class BilibilikeyWordAnalysis {
public static Map<String,Object> getData(String result) {
try {
Document doc = Jsoup.parse(result);
boolean more = false;
if(doc.select("#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button").text().contains("下一页")) {
more = true;
}
String title = null;
String url = null;
String time = null;
String playcount = null;
String source = null;
String submitcount = null;
Elements elements = doc.select("ul.video-contain.clearfix").select("li");
System.out.println(elements.size() + " --- " + more);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
for(Element element : elements) {
Map<String,Object> map = new HashMap<String,Object>();
title = element.select("a").attr("title");
url = element.select("a").attr("href");
playcount = element.select("div.tags").select("span.watch-num").text();
submitcount = element.select("div.tags").select("span.hide").text();
time = element.select("div.tags").select("span.time").text();
source = element.select("div.tags").select("a.up-name").text();
map.put("title", title);
map.put("url", url);
map.put("playcount", playcount);
map.put("time", time);
map.put("source", source);
map.put("submitcount", submitcount);
dataList.add(map);
// System.out.println(map.toString());
}
Map<String,Object> rmap = new HashMap<String,Object>();
rmap.put("more", more);
rmap.put("data", dataList);
return rmap;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
......@@ -16,8 +16,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DayuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class);
......
......@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DayuCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuCommentAnalysis.class);
......
......@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotDataAnalysis {
......
......@@ -13,7 +13,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class FenghuangAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangAccountAnalysis.class);
......
......@@ -14,7 +14,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.tools.timeparse.TimeParse;
public class FenghuangCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class);
......
......@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKBCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQKBCommentAnalysis.class);
......
......@@ -13,8 +13,8 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.timeParse.TimeUtil;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.timeparse.TimeUtil;
public class SoKuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(SoKuByWordAnalysis.class);
......
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class ToutiaoKeyWordAnalysis {
public List<Map<String,Object>> getData(String result) {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("data");
for(int i = 0; i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>();
if(data.getJSONArray("merge_weitoutiao") != null) {
JSONArray jsonArry2 = data.getJSONArray("merge_weitoutiao");
for(int j = 0; j < jsonArry2.size() ;j++) {
Map<String,Object> map2 = new HashMap<String,Object>();
JSONObject data2 = jsonArry2.getJSONObject(j);
String source = data2.getString("media_name");
String content = data2.getString("content");
String time = data2.getString("datetime");
String id = data2.getString("id");
String title = "";
if(content != null) {
if(content.length() > 16) {
title = content.substring(0,16);
}else {
title = content;
}
}
map2.put("title", title);
map2.put("content", content);
map2.put("source", source);
map2.put("time", time);
map2.put("url", "https://www.toutiao.com/a"+id+"/");
System.out.println(map2.toString());
bodyList.add(map2);
}
continue;
}
String title = data.getString("title");
String content = "";
String abstract1 = data.getString("abstract");
String summary = data.getString("summary");
String con = data.getString("content");
if(title == null) {
if(con != null) {
if(con.length() > 16) {
title = con.substring(0, 16);
}else {
title = con;
}
content = con;
if(summary != null) {
content = content + summary;
}
}
}else {
if(summary != null) {
content = summary;
}
if(abstract1 != null) {
content = content + abstract1;
}
}
String source = data.getString("source");
String time = data.getString("datetime");
String id = data.getString("id");
map.put("title", title);
map.put("content", content);
map.put("source", source);
map.put("time", time);
map.put("url", "https://www.toutiao.com/a"+id+"/");
System.out.println(map.toString());
bodyList.add(map);
}
System.out.println("====================");
return bodyList;
}
}
......@@ -15,7 +15,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WangyiHistoryAnalysis {
......@@ -41,7 +41,7 @@ public class WangyiHistoryAnalysis {
String url = "http://dy.163.com/v2/article/detail/"+data.getString("docid")+".html";
String reuslt = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Document doc = Jsoup.parse(reuslt);
map.put("content", doc.select("div.content").text());
map.put("content", doc.select("div.content").text().replaceAll("<.*?>", ""));
map.put("url", url);
map.put("source", source);
dataList.add(map);
......
......@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.tools.timeparse.TimeParse;
public class XiguaAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class);
......
......@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.tools.timeparse.TimeParse;
public class XiguaByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class);
......
......@@ -8,6 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Baijia;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaijiaAccountExample {
......@@ -29,12 +30,22 @@ public class BaijiaAccountExample {
poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists);
}
@Test
// @Test
public void baijiaAccount2Test() {
String app_id = "b_1536766622381605";
String startTime = "2017-01-01 00:00:00";
String app_id = "b_1548519002063358";
String startTime = "2018-01-01 00:00:00";
//2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(app_id,startTime,null);
List<String> idList = new ArrayList<>();
idList.add("b_1548519002063358");
idList.add("b_1536766292852334");
idList.add("b_1536766781763274");
idList.add("b_1536766200338498");
List<Map<String,Object>> bodyList = new ArrayList<>();
for(String id : idList) {
ZhiWeiTools.sleep(5000);
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(id,startTime,null);
bodyList.addAll(lists);
}
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>();
headList.add("title");
......@@ -42,7 +53,37 @@ public class BaijiaAccountExample {
headList.add("source");
headList.add("url");
headList.add("content");
poi.exportExcel("D://crawlerdata//自媒体/百家号-科学的fan.xlsx", "科学的fan", headList, lists);
poi.exportExcel("D://crawlerdata//自媒体/百家号-all.xlsx", "科学的fan", headList, bodyList);
}
@Test
public void test3() {
String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String startTime = "2018-01-01 00:00:00";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<>();
for(Map<String,Object> m : list) {
try {
String app_id = m.get("id").toString();
String name = m.get("name").toString();
String cookie = "BAIDUID=BA1090A5857735165A2A419CBA37957A:FG=1";
List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
if(lists != null) {
bodyList.addAll(lists);
}
} catch (Exception e) {
}
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("source");
headList.add("url");
headList.add("content");
headList.add("read_amount");
poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj.xlsx", "娱乐资本论", headList, bodyList);
}
}
......@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Fenghuang;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class FenghuangCommentExample {
......
package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Maimai;
public class MaimaiBywordExample {
public static void main(String[] args) {
String word = "小米 上市|小米 IPO|雷军 IPO|小米 招股书|雷军 上市";
String cookie = "sessionid=njbswswdrvwf4vpg0836xu6m7ve4ziso; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4bGRIdEx4bVhwZBB0ZHwVDWEtMS3kKExkbBBMfGRkEGgQcHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1526952692556; token=\"nv0ZM3AICKHOmB1sdBi2QrvA0fFDgtRwdZJV+DzF3KsZdPIsvD1I2HOdRVyurjQi8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0Ijoid2s0MWRLbDBtWlFwTlJoWmdwc1JUZHR2IiwiX2V4cGlyZSI6MTUyNzAzOTEwMzE5MiwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=ssvF7IeeQYlwCjdh8GaY3mhr0SY";
String time = "2018-05-01 00:00:00";
String[] words = word.split("\\|");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) {
List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
bodyList.addAll(c);
}
List<String> headList = new ArrayList<String>();
headList.add("time");
headList.add("url");
headList.add("text");
headList.add("name");
headList.add("like");
headList.add("comment_count");
headList.add("spreads");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-1.xlsx", "脉脉关键词", headList, bodyList);
}
}
......@@ -7,9 +7,8 @@ import java.util.Map;
import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Fenghuang;
import com.zhiwei.parse.Miaopai;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class MiaopaiByUrlExample {
......
......@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKB;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQAccountExample {
......
......@@ -9,9 +9,9 @@ import java.util.Map;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.tools.timeparse.TimeParse;
/**
* @ClassName: QQNewsCommentListTest
......@@ -128,6 +128,8 @@ public class QQNewsCommentListTest {
}
} catch (IOException e) {
return null;
} catch (Exception e) {
e.printStackTrace();
}
return cmt_id;
}
......
......@@ -8,8 +8,8 @@ import java.util.Map;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
/**
* @ClassName: SinaCommentListTest
......@@ -86,6 +86,8 @@ public class SinaCommentListTest {
}
} catch (IOException e) {
return null;
} catch (Exception e) {
e.printStackTrace();
}
return newsid;
}
......
......@@ -16,7 +16,7 @@ public class SouhuAccountExample {
@Test
public void souhuAccountTest() {
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("b1NlSFRzM1JaeURMR252VFY0VGpjVkJGckpyWUB3ZWNoYXQuc29odS5jb20=","2015-01-01 00:00:00",false,null);
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("c29odXptdHNmbjZ0cnRAc29odS5jb20=","2018-05-01 00:00:00",false,null);
System.out.println(lists.size());
List<String> headList = new ArrayList<String>();
headList.add("title");
......
package com.zhiwei.crawler;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
public class Test1 {
public static void main(String[] args) {
String time = "https://view.inews.qq.com/a/NEW2018021000440002";
System.out.println(time.split("/")[4]);
}
}
......@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Wangyi;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class WangyiCommentExample {
......
......@@ -11,9 +11,9 @@ public class WangyiHistoryExample {
public static void main(String[] args) {
String url = "http://dy.163.com/v2/article/detail/DJK1G41H0519A6FP.html";
String url = "http://dy.163.com/v2/article/detail/DPLAOP1605198CJN.html";
List<Map<String,Object>> list = Wangyi.getHistoryData(url, null, "2017-01-01 00:00:00");
List<Map<String,Object>> list = Wangyi.getHistoryData(url, null, "2018-05-01 00:00:00");
List<String> headList = new ArrayList<String>();
headList.add("title");
......@@ -22,7 +22,7 @@ public class WangyiHistoryExample {
headList.add("source");
headList.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//自媒体/网易-号外财经-1.xlsx", "asdasd", headList, list);
poi.exportExcel("D://crawlerdata//自媒体/网易-财联社.xlsx", "财联社", headList, list);
}
......
......@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.XiGua;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class XiguaByWordExample {
......
package com.zhiwei.hsitory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.HistortyBean;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
public class QQkandianHistoryExample {
@Test
public void f() {
String uid = "2661642386";
QQKandian qqKandian = new QQKandian();
List<HistortyBean> dataList = qqKandian.getHistoryData(uid, null);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(HistortyBean h : dataList) {
Map<String, Object> map = new HashMap<String,Object>();
map.put("标题", h.getTitle());
map.put("时间", h.getTime());
map.put("来源", h.getSource());
map.put("正文", h.getContent());
map.put("链接", h.getUrl());
bodyList.add(map);
}
List<String> headList = new ArrayList<String>();
headList.add("标题");
headList.add("来源");
headList.add("链接");
headList.add("正文");
headList.add("时间");
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx", "数据", headList, bodyList);
}
}
package com.zhiwei.keyword;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.HistortyBean;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKandianKeyWordExample {
@Test
public void f() {
String word = "今日头条 算法|今日头条 侵权|今日头条 起诉|字节跳动|张一鸣|抖音 涉黄|抖音 未成年|抖音";
String[] words = word.split("\\|");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
QQKandian qqKandian = new QQKandian();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) {
System.out.println(w);
List<HistortyBean> dataList = qqKandian.getDataByword(w, null);
System.out.println(w + " ---- " + dataList.size());
for(HistortyBean h : dataList) {
Map<String, Object> map = new HashMap<String,Object>();
map.put("标题", h.getTitle());
map.put("时间", h.getTime());
map.put("来源", h.getSource());
map.put("正文", h.getContent());
map.put("链接", h.getUrl());
bodyList.add(map);
}
ZhiWeiTools.sleep(3000);
}
List<String> headList = new ArrayList<String>();
headList.add("标题");
headList.add("来源");
headList.add("链接");
headList.add("正文");
headList.add("时间");
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-今日头条 算法.xlsx", "马化腾", headList, bodyList);
}
}
package com.zhiwei.keyword;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Toutiao;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ToutiaoKeyWordExample {
public static void main(String[] args) {
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String path = "D:\\crawlerdata\\关键词.xlsx";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
String startTime = "2018-06-28 00:00:00";
String endTime = "2018-06-28 23:59:59";
String devoid = "54381805805";
for(Map<String,Object> map1 : list) {
String word = map1.get("关键词")+"";
List<Map<String,Object>> dataList = Toutiao.getKeyWordData(word, null,devoid);
if(dataList != null) {
for(Map<String,Object> m : dataList) {
String time = m.get("time")+"";
System.out.println(time);
m.put("word", word);
String ma = m.get("title") + "--" + m.get("content");
if(time.compareTo(startTime) > -1 && time.compareTo(endTime) < 1) {
System.out.println(1);
if(ma.contains(word)) {
bodyList.add(m);
}
}
}
}
ZhiWeiTools.sleep(2000);
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("content");
headList.add("source");
headList.add("url");
headList.add("word");
poi.exportExcel(path, "雅培", headList, bodyList);
}
}
package com.zhiwei.qqkb;
import java.util.List;
import java.util.Map;
import com.zhiwei.bean.QQkbUser;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKB;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQkbUserExample {
public static void main(String[] args) {
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String path = "D:\\crawlerdata\\用户采集\\qq看点用户.xlsx";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> bodyList = (List<Map<String, Object>>) map.get("body");
for(Map<String,Object> map1 : bodyList) {
String name = map1.get("渠道")+"";
QQkbUser qkbUser = QQKB.getUserData(name);
if(qkbUser != null) {
map1.put("guanzhu", qkbUser.getGuanzhu());
map1.put("desc", qkbUser.getDesc());
map1.put("url", qkbUser.getUrl());
map1.put("vip", qkbUser.getVip());
System.out.println(map1.toString());
}
ZhiWeiTools.sleep(2000);
}
List<String> headList = (List<String>) map.get("head");
headList.add("guanzhu");
headList.add("desc");
headList.add("url");
headList.add("vip");
poi.exportExcel(path, map.get("sheetName")+"-更新后", headList, bodyList);
}
}
package com.zhiwei.shipin;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Douyin;
public class DouyinHotExample {
public static void main(String[] args) {
List<Map<String,Object>> bodyList = Douyin.getDouyinHotData("https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501",null);
List<String> headList = new ArrayList<String>();
headList.add("text");
headList.add("url");
headList.add("time");
headList.add("author");
headList.add("comment_count");
headList.add("like_count");
headList.add("share_count");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\抖音热门采集测试.xlsx", "asd", headList, bodyList);
}
}
package com.zhiwei.user;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.QQKandianUser;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQkandianExample {
@Test
public void f() {
QQKandian qqKandian = new QQKandian();
String path = "D:\\crawlerdata\\用户采集\\qq看点用户.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("body");
List<QQKandianUser> allList = new ArrayList<QQKandianUser>();
for(Map<String,Object> m : dataList) {
String name = m.get("渠道")+"";
System.out.println(name);
List<QQKandianUser> qqKandianUsers = qqKandian.getUser(name, null);
if(qqKandianUsers != null) {
System.out.println(qqKandianUsers.size());
allList.addAll(qqKandianUsers);
}else {
System.out.println( name + "--- null");
}
ZhiWeiTools.sleep(3000);
}
List<String> headList = new ArrayList<String>();
headList.add("name");
headList.add("url");
headList.add("verity");
headList.add("desc");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(QQKandianUser qqKandianUser : allList) {
Map<String,Object> m = new HashMap<String,Object>();
m.put("name", qqKandianUser.getName());
m.put("url", qqKandianUser.getUrl());
m.put("verity", qqKandianUser.isVerify());
m.put("desc", qqKandianUser.getDesc());
bodyList.add(m);
}
poi.exportExcel(path, "数据完成后", headList, bodyList);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment