Commit d555bdda by yangchen

qq看点 百家号采集等更新

parent 17c44df3
...@@ -3,12 +3,17 @@ ...@@ -3,12 +3,17 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId> <artifactId>articlenewscrawler</artifactId>
<version>0.0.1-SNAPSHOT</version> <version>0.0.2-SNAPSHOT</version>
<name>articlenewscrawler</name> <name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description> <description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.14.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId> <groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId> <artifactId>jsoup</artifactId>
<version>1.8.3</version> <version>1.8.3</version>
...@@ -19,11 +24,6 @@ ...@@ -19,11 +24,6 @@
<version>1.2.29</version> <version>1.2.29</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei</groupId>
<artifactId>zhiweiTools</artifactId>
<version>0.0.6-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>junit</groupId> <groupId>junit</groupId>
<artifactId>junit</artifactId> <artifactId>junit</artifactId>
<version>4.11</version> <version>4.11</version>
...@@ -33,6 +33,11 @@ ...@@ -33,6 +33,11 @@
<artifactId>excelpoi</artifactId> <artifactId>excelpoi</artifactId>
<version>0.0.1-SNAPSHOT</version> <version>0.0.1-SNAPSHOT</version>
</dependency> </dependency>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.0.3-SNAPSHOT</version>
</dependency>
</dependencies> </dependencies>
<!-- 打包管理 --> <!-- 打包管理 -->
......
package com.zhiwei.bean;
import java.util.Date;
public class HistortyBean {
private String url;
private String title;
private Date time;
private String content;
private String source;
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public Date getTime() {
return time;
}
public void setTime(Date time) {
this.time = time;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "HistortyBean [url=" + url + ", title=" + title
+ ", time=" + time + ", content=" + content + ", source="
+ source + "]";
}
}
package com.zhiwei.bean;
public class QQKandianUser {
private String url;
private String name;
private String id;
private String desc;
private boolean verify;
public boolean isVerify() {
return verify;
}
public void setVerify(boolean verify) {
this.verify = verify;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
@Override
public String toString() {
return "QQKandianUser [url=" + url + ", name=" + name + ", id=" + id
+ ", desc=" + desc + ", verify=" + verify + "]";
}
}
package com.zhiwei.bean;
public class QQkbUser {
private String id;
private String name;
private String desc;
private Integer guanzhu;
private String url;
private String img_url;
private Integer vip;
public Integer getVip() {
return vip;
}
public void setVip(Integer vip) {
this.vip = vip;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDesc() {
return desc;
}
public void setDesc(String desc) {
this.desc = desc;
}
public Integer getGuanzhu() {
return guanzhu;
}
public void setGuanzhu(Integer guanzhu) {
this.guanzhu = guanzhu;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getImg_url() {
return img_url;
}
public void setImg_url(String img_url) {
this.img_url = img_url;
}
}
...@@ -877,12 +877,67 @@ public class HeadGet { ...@@ -877,12 +877,67 @@ public class HeadGet {
return headerMap; return headerMap;
} }
public static Map<String,String> getToutiaoHeaderMap(String cookie) {
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36");
headerMap.put("Accept-Language", "zh-cn");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "is.snssdk.com");
headerMap.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,String> getQQkbUserHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap.put("Accept",
"*/*");
headerMap.put("Accept-Language", "zh-Hans-CN;q=1");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "r.cnews.qq.com");
headerMap.put("Referer", "http://r.cnews.qq.com/inews/iphone/");
headerMap.put("devid", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
headerMap.put("sngn", "rl3bDHe6ylqkTnZgjMNwyqay3cvYlIEt38n%2Fd3Wssv%2Bmik9D%2Bt06p7slposTw7oT%0D%0ArkVovLykCt4CIur9VZ8nAj91G7%2FH%2BZArRk3MSQrW0HFPGRu39PLQy27HWirnn2aF");
headerMap.put("idfv", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
headerMap.put("deviceToken", "<585bee8d f6739b65 1248b40d 7be9dc4a 126bbf27 85ad470e ce6b7923 bbcb7c1c>");
headerMap.put("qn-rid", "7E351889-6E94-4246-89BF-219458B7964C");
headerMap.put("qn-sig", "5CB3DEE05F8CA7C76755D1AB53599587");
headerMap.put("omgbizid", "5144dee3f39a8d4dad994e5391fcebd1a0d50090112b14");
headerMap.put("omgid", "0f63f8e68f041746372b9ceecc8e97f028e90010112b14");
headerMap.put("idfa", "FE659B7E-5104-44C2-8A31-F88DEE7A2747");
headerMap.put("appver", "11.2.1_qnreading_4.8.30");
headerMap.put("devid", "6D33F35F-880D-42A6-A23F-881BEC6960EC");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,Object> getQQkbUserParamMap(String word) {
Map<String,Object> paramMap = new HashMap<String, Object>();
paramMap.put("query", word);
paramMap.put("curTab", "kuaibao");
paramMap.put("curChannel", "kb_news_young1");
paramMap.put("source", "search_history");
paramMap.put("type", "media");
return paramMap;
}
public static void main(String[] args) throws UnsupportedEncodingException { public static void main(String[] args) throws UnsupportedEncodingException {
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_6452&page=1&pagesize=20&tag=article&uid=fe659b7e510444c28a31f88dee7a2747"; String url = "https://r.cnews.qq.com/searchByType";
System.out.println(url); System.out.println(url);
String cookie = "__utma=51854390.454838676.1510118174.1528502507.1529542643.327; __utmb=51854390.0.10.1529542643; __utmc=51854390; __utmv=51854390.010--; __utmz=51854390.1510118174.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _zap=4e09de9f-d212-48b9-af42-9173e1092406; d_c0=ACACkXJlZQxLBTcU1Z70bp9TpD_qDHF6sAY=|1529542618; q_c0=2|1:0|10:1528360082|4:q_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|c6f0a98f0d30bdfda124c276e9e88ce945dcf50103663cedeab1be9415a42101; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000; z_c0=2|1:0|10:1528360082|4:z_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|021aae122258a6476003fd206423140b8c2ee162e406b628051471c64021b211; zst_82=1.0ANCm7PYOyA0LAAAASwUAADEuMDv9KlsAAAAAYiqmK0gPDsX0FVknWJlXcKc07NA=; __DAYU_PP=VFZF3fmJavniAMQf2fnf23a6dd5221ec; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000"; String cookie = "luin=o0497332654;%20lskey=00030000d63ffaf7eba88c86106eac5f2910d45515222334b91c75a66b449c990c2be43cd202ba39b35bef60;%20uin=o0497332654;%20skey=MH3wukytS4;%20sigA2=7AB4D8DEDF73E313801FD348FD77EC3B05C06DBC4D9DA669B20CA04A8D6B80F300A69567FBD11A7B799E419BB796F22D47D3AE5FA95E708A0ABC66161061131B0B21A0031AA0807C;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmSQ3EqlyzseC9-qGEFY7Tkr0Ypk5vsnSwOaMC-IGxsBeY2K7knHrYstj_5dZpisJd5nihvLNQvCdsFhFwZQcT8;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
Map<String,String> headerMap = HeadGet.getFenghuangAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getQQkbUserHeaderMap(cookie);
String result = HttpClient.executeHttpRequestGet(url,null, headerMap); Map<String,Object> paramMap = HeadGet.getQQkbUserParamMap("虎嗅");
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
System.out.println(result); System.out.println(result);
System.out.println(result.length()); System.out.println(result.length());
} }
......
...@@ -7,7 +7,7 @@ import java.util.Map; ...@@ -7,7 +7,7 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
public class HttpClient { public class HttpClient {
private static Logger logger = LoggerFactory.getLogger(HttpClient.class); private static Logger logger = LoggerFactory.getLogger(HttpClient.class);
......
...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.AiqiyiByWordAnalysis; import com.zhiwei.parse.analysis.AiqiyiByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Aiqiyi { public class Aiqiyi {
private static Logger logger = LoggerFactory.getLogger(Aiqiyi.class); private static Logger logger = LoggerFactory.getLogger(Aiqiyi.class);
......
...@@ -13,7 +13,8 @@ import com.alibaba.fastjson.JSONObject; ...@@ -13,7 +13,8 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BaijiaAccountAnalysis; import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Baijia { public class Baijia {
private static Logger logger = LoggerFactory.getLogger(Baijia.class); private static Logger logger = LoggerFactory.getLogger(Baijia.class);
...@@ -60,6 +61,46 @@ public class Baijia { ...@@ -60,6 +61,46 @@ public class Baijia {
/** /**
* *
* @Description 获取百家号第三种方法
* @param app_id
* @param startTime
* @param proxy
* @return
*/
public static List<Map<String,Object>> getBaijiaAccountByBaiduData(String app_id,String name,String startTime,String cookie,Proxy proxy) {
Map<String,String> headerMap = HeaderTool.getCommonHead();
List<Map<String,Object>> dataList = new ArrayList<>();
headerMap.put("cookie",cookie);
boolean f = true;
int n = 0;
while(f) {
for(int i = 1;i < 3;i++) {
try {
String url = "https://author.baidu.com/list?type=article&context={%22offset%22:%22-1_"+n+"%22,%22app_id%22:%22"+app_id+"%22,%22pageSize%22:20}";
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> dMap = baijiaAccountAnalysis.getBaijiaAccountData3(result,name, startTime);
List<Map<String,Object>> dList = (List<Map<String, Object>>) dMap.get("data");
dataList.addAll(dList);
logger.info("{} 数据采集结果 {}",name, dataList.size());
if(!(boolean) dMap.get("more")) {
f = false;
}
ZhiWeiTools.sleep(3000);
n += 20;
break;
} catch (Exception e) {
e.printStackTrace();
ZhiWeiTools.sleep(3000);
continue;
}
}
}
return dataList;
}
/**
*
* @Description 百家号历史文章采集 * @Description 百家号历史文章采集
* @param app_id * @param app_id
* @param startTime * @param startTime
......
package com.zhiwei.parse;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.analysis.BilibilikeyWordAnalysis;
import com.zhiwei.tools.httpclient.HttpClientBuilder;
import com.zhiwei.tools.httpclient.HttpRequestBuilder;
import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.util.WordReadFile;
import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
public class BiliBili {
private static Logger logger = LoggerFactory.getLogger(BiliBili.class);
public static List<Map<String,Object>> getData(String word,Proxy proxy,String cookie) {
OkHttpClient client = HttpClientBuilder.newInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
try {
String url = "https://search.bilibili.com/all?keyword="+URLEncoder.encode(word, "utf-8")+"&from_source=banner_search";
Headers header = Headers.of("cookie",cookie,"Referer","https://www.bilibili.com/","Host","search.bilibili.com");
System.out.println(url);
Request request = HttpRequestBuilder.newGetRequest(url, header);
client = client.newBuilder().proxy(proxy).build();
String result = client.newCall(request).execute().body().string();
ZhiWeiTools.sleep(3000);
Map<String,Object> map = BilibilikeyWordAnalysis.getData(result);
boolean more = (boolean) map.get("more");
List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("data");
if(dataList != null) {
bodyList.addAll(dataList);
}
int n = 2;
while(more) {
map.clear();
String ur = url + "&page=" + n;
request = HttpRequestBuilder.newGetRequest(ur, header);
String result2 = client.newCall(request).execute().body().string();
map = BilibilikeyWordAnalysis.getData(result2);
List<Map<String,Object>> dataList2 = (List<Map<String, Object>>) map.get("data");
if(dataList2 != null) {
bodyList.addAll(dataList2);
}
System.out.println(n + "页,数据总量为 -- " + bodyList.size() );
more = (boolean) map.get("more");
n++;
ZhiWeiTools.sleep(3000);
}
return bodyList;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for(String word : wordList) {
List<Map<String,Object>> dataList = BiliBili.getData(word, null,cookie);
if(dataList != null) {
System.out.println(word + " ----- " + dataList.size());
bodyList.addAll(dataList);
}
}
List<String> headlist = new ArrayList<String>();
headlist.add("submitcount");
headlist.add("playcount");
headlist.add("time");
headlist.add("source");
headlist.add("title");
headlist.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//bilibili关键词采集数据.xlsx", "B站数据", headlist, bodyList);
}
}
...@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient; ...@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DayuAccountAnalysis; import com.zhiwei.parse.analysis.DayuAccountAnalysis;
import com.zhiwei.parse.analysis.DayuByWordAnalysis; import com.zhiwei.parse.analysis.DayuByWordAnalysis;
import com.zhiwei.parse.analysis.DayuCommentAnalysis; import com.zhiwei.parse.analysis.DayuCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Dayu { public class Dayu {
private static Logger logger = LoggerFactory.getLogger(Dayu.class); private static Logger logger = LoggerFactory.getLogger(Dayu.class);
......
...@@ -2,7 +2,6 @@ package com.zhiwei.parse; ...@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -12,7 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -12,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DouyinHotDataAnalysis; import com.zhiwei.parse.analysis.DouyinHotDataAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Douyin { public class Douyin {
......
...@@ -14,7 +14,7 @@ import com.zhiwei.httpclient.HttpClient; ...@@ -14,7 +14,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.FenghuangAccountAnalysis; import com.zhiwei.parse.analysis.FenghuangAccountAnalysis;
import com.zhiwei.parse.analysis.FenghuangByWordAnalysis; import com.zhiwei.parse.analysis.FenghuangByWordAnalysis;
import com.zhiwei.parse.analysis.FenghuangCommentAnalysis; import com.zhiwei.parse.analysis.FenghuangCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Fenghuang { public class Fenghuang {
private static Logger logger = LoggerFactory.getLogger(Fenghuang.class); private static Logger logger = LoggerFactory.getLogger(Fenghuang.class);
......
...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory; ...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis; import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Maimai { public class Maimai {
......
...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory; ...@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MeipaiByWordAnalysis; import com.zhiwei.parse.analysis.MeipaiByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Meipai { public class Meipai {
private static Logger logger = LoggerFactory.getLogger(Meipai.class); private static Logger logger = LoggerFactory.getLogger(Meipai.class);
......
...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.PearVideoByWordAnalysis; import com.zhiwei.parse.analysis.PearVideoByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class PearVideo { public class PearVideo {
private static Logger logger = LoggerFactory.getLogger(PearVideo.class); private static Logger logger = LoggerFactory.getLogger(PearVideo.class);
......
...@@ -8,12 +8,14 @@ import java.util.Map; ...@@ -8,12 +8,14 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.QQkbUser;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis; import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis; import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKB { public class QQKB {
private static Logger logger = LoggerFactory.getLogger(QQKB.class); private static Logger logger = LoggerFactory.getLogger(QQKB.class);
...@@ -131,6 +133,44 @@ public class QQKB { ...@@ -131,6 +133,44 @@ public class QQKB {
} }
} }
public static QQkbUser getUserData(String name) {
try {
String url = "https://r.cnews.qq.com/searchByType";
String cookie = "luin=o0497332654;%20lskey=00030000d63ffaf7eba88c86106eac5f2910d45515222334b91c75a66b449c990c2be43cd202ba39b35bef60;%20uin=o0497332654;%20skey=MH3wukytS4;%20sigA2=7AB4D8DEDF73E313801FD348FD77EC3B05C06DBC4D9DA669B20CA04A8D6B80F300A69567FBD11A7B799E419BB796F22D47D3AE5FA95E708A0ABC66161061131B0B21A0031AA0807C;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwmSQ3EqlyzseC9-qGEFY7Tkr0Ypk5vsnSwOaMC-IGxsBeY2K7knHrYstj_5dZpisJd5nihvLNQvCdsFhFwZQcT8;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
Map<String,String> headerMap = HeadGet.getQQkbUserHeaderMap(cookie);
Map<String,Object> paramMap = HeadGet.getQQkbUserParamMap(name);
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
JSONObject json = JSONObject.parseObject(result);
JSONObject json1 = json.getJSONObject("new_list");
JSONObject json2 = json1.getJSONArray("data").getJSONObject(0);
JSONObject json3 = json2.getJSONObject("channellist");
JSONArray jsonArry = json3.getJSONArray("media");
for(int i = 0;i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
String na = data.getString("chlname");
if(name.equals(na)) {
QQkbUser qqQkbUser = new QQkbUser();
String id = data.getString("chlid");
String desc = data.getString("desc");
String img_url = data.getString("sicon");
int guanzhu = data.getIntValue("subCount");
int vip = data.getIntValue("vip");
qqQkbUser.setDesc(desc);
qqQkbUser.setGuanzhu(guanzhu);
qqQkbUser.setId(id);
qqQkbUser.setImg_url(img_url);
qqQkbUser.setName(name);
qqQkbUser.setUrl("https://kuaibao.qq.com/s/MEDIANEWSLIST?chlid="+id);
qqQkbUser.setVip(vip);
return qqQkbUser;
}
}
} catch (Exception e) {
}
return null;
}
/** /**
* *
* @Description 获取cid * @Description 获取cid
......
...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SoKuByWordAnalysis; import com.zhiwei.parse.analysis.SoKuByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Soku { public class Soku {
private static Logger logger = LoggerFactory.getLogger(Soku.class); private static Logger logger = LoggerFactory.getLogger(Soku.class);
......
...@@ -19,7 +19,7 @@ import org.jsoup.select.Elements; ...@@ -19,7 +19,7 @@ import org.jsoup.select.Elements;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class SouBao { public class SouBao {
...@@ -30,7 +30,7 @@ public class SouBao { ...@@ -30,7 +30,7 @@ public class SouBao {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String word : wordList) { for(String word : wordList) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
List<Map<String,Object>> dataList = getData(word, cookie,"2018-06-11","2018-06-12"); List<Map<String,Object>> dataList = getData(word, cookie,"2017-12-27","2018-06-27");
for(Map<String,Object> map : dataList) { for(Map<String,Object> map : dataList) {
boolean f = false; boolean f = false;
String time = (String) map.get("time"); String time = (String) map.get("time");
......
...@@ -17,8 +17,8 @@ import com.zhiwei.httpclient.HeadGet; ...@@ -17,8 +17,8 @@ import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SouhuAccountAnalysis; import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
import com.zhiwei.parse.analysis.SouhuCommentAnalysis; import com.zhiwei.parse.analysis.SouhuCommentAnalysis;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Souhu { public class Souhu {
private static Logger logger = LoggerFactory.getLogger(Souhu.class); private static Logger logger = LoggerFactory.getLogger(Souhu.class);
...@@ -61,6 +61,8 @@ public class Souhu { ...@@ -61,6 +61,8 @@ public class Souhu {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null);
boolean f = true; boolean f = true;
int j = 0;
while(f) { while(f) {
try { try {
String url = "http://mp.sohu.com/apiV2/profile/newsListAjax?xpt="+xpt+"&pageNumber="+i+"&pageSize=10"; String url = "http://mp.sohu.com/apiV2/profile/newsListAjax?xpt="+xpt+"&pageNumber="+i+"&pageSize=10";
...@@ -99,7 +101,12 @@ public class Souhu { ...@@ -99,7 +101,12 @@ public class Souhu {
i++; i++;
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
} catch (Exception e) { } catch (Exception e) {
ZhiWeiTools.sleep(3000);
logger.error("出错了",e.getMessage()); logger.error("出错了",e.getMessage());
j++;
if(j > 5) {
f = false;
}
continue; continue;
} }
} }
......
...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.TXNewsByWordAnalysis; import com.zhiwei.parse.analysis.TXNewsByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class TXNews { public class TXNews {
......
package com.zhiwei.parse;
import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class Toutiao {
private static Logger logger = LoggerFactory.getLogger(Toutiao.class);
private static ToutiaoKeyWordAnalysis toutiaoKeyWordAnalysis = new ToutiaoKeyWordAnalysis();
public static List<Map<String,Object>> getKeyWordData(String word,Proxy proxy,String devoid) {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getToutiaoHeaderMap(null);
String url = null;
try {
url = "http://is.snssdk.com/api/2/wap/search_content/?from=news&keyword="+URLEncoder.encode(word, "utf-8")+"&cur_tab_title=search_tab&device_id="+devoid+"&format=json&offset=";
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
for(int i = 0;i < 30;i+=10) {
System.out.println(url+i);
String result = HttpClient.executeHttpRequestGet(url+i, proxy, headerMap);
List<Map<String,Object>> list = toutiaoKeyWordAnalysis.getData(result);
if(list != null) {
if(list.size() < 1) {
break;
}
bodyList.addAll(list);
}
ZhiWeiTools.sleep(5000);
}
return bodyList;
}
}
...@@ -9,13 +9,12 @@ import org.jsoup.Jsoup; ...@@ -9,13 +9,12 @@ import org.jsoup.Jsoup;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.WangyiCommentAnalysis; import com.zhiwei.parse.analysis.WangyiCommentAnalysis;
import com.zhiwei.parse.analysis.WangyiHistoryAnalysis; import com.zhiwei.parse.analysis.WangyiHistoryAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Wangyi { public class Wangyi {
private static Logger logger = LoggerFactory.getLogger(Wangyi.class); private static Logger logger = LoggerFactory.getLogger(Wangyi.class);
...@@ -79,8 +78,9 @@ public class Wangyi { ...@@ -79,8 +78,9 @@ public class Wangyi {
String source = Jsoup.parse(result).select("body > div.colum_wrap.fl > div > div.colum_des > div.normal > div.colum_info > h4").text(); String source = Jsoup.parse(result).select("body > div.colum_wrap.fl > div > div.colum_des > div.normal > div.colum_info > h4").text();
boolean f = true; boolean f = true;
url = "http://dy.163.com/v2/article/list.do?wemediaId="+wemediaid+"&size=20&pageNo="; url = "http://dy.163.com/v2/article/list.do?wemediaId="+wemediaid+"&size=20&pageNo=";
int i = 20; int i = 1;
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
int j = 0;
while(f) { while(f) {
try { try {
result = ""; result = "";
...@@ -100,6 +100,11 @@ public class Wangyi { ...@@ -100,6 +100,11 @@ public class Wangyi {
i++; i++;
} catch (Exception e) { } catch (Exception e) {
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
logger.info("出错 {}",e.getMessage());
j++;
if(j > 5) {
f = false;
}
continue; continue;
} }
} }
......
...@@ -15,8 +15,8 @@ import com.zhiwei.httpclient.HeadGet; ...@@ -15,8 +15,8 @@ import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.XiguaAccountAnalysis; import com.zhiwei.parse.analysis.XiguaAccountAnalysis;
import com.zhiwei.parse.analysis.XiguaByWordAnalysis; import com.zhiwei.parse.analysis.XiguaByWordAnalysis;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class XiGua { public class XiGua {
private static Logger logger = LoggerFactory.getLogger(XiGua.class); private static Logger logger = LoggerFactory.getLogger(XiGua.class);
......
...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,7 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.XiaomiShequByWordAnalysis; import com.zhiwei.parse.analysis.XiaomiShequByWordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Xiaomi { public class Xiaomi {
private static Logger logger = LoggerFactory.getLogger(Xiaomi.class); private static Logger logger = LoggerFactory.getLogger(Xiaomi.class);
......
...@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient; ...@@ -16,7 +16,7 @@ import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.YidianzixunAccountAnalysis; import com.zhiwei.parse.analysis.YidianzixunAccountAnalysis;
import com.zhiwei.parse.analysis.YidianzixunByWordAnalysis; import com.zhiwei.parse.analysis.YidianzixunByWordAnalysis;
import com.zhiwei.parse.analysis.YidianzixunCommentAnalysis; import com.zhiwei.parse.analysis.YidianzixunCommentAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class Yidianzixun { public class Yidianzixun {
private static Logger logger = LoggerFactory.getLogger(Yidianzixun.class); private static Logger logger = LoggerFactory.getLogger(Yidianzixun.class);
......
...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory; ...@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class AiqiyiByWordAnalysis { public class AiqiyiByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class);
......
...@@ -2,6 +2,7 @@ package com.zhiwei.parse.analysis; ...@@ -2,6 +2,7 @@ package com.zhiwei.parse.analysis;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -15,8 +16,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -15,8 +16,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaijiaAccountAnalysis { public class BaijiaAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(BaijiaAccountAnalysis.class); private static Logger logger = LoggerFactory.getLogger(BaijiaAccountAnalysis.class);
...@@ -41,13 +42,82 @@ public class BaijiaAccountAnalysis { ...@@ -41,13 +42,82 @@ public class BaijiaAccountAnalysis {
/** /**
* *
* @Description 第三种解析
* @param result
* @param startTime
* @return
*/
public Map<String,Object> getBaijiaAccountData3(String result,String name,String startTime) {
List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,Object> rmap = new HashMap<>();
boolean more = false;
try {
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("items");
for(int i = 0;i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<>();
JSONObject data = jsonArry.getJSONObject(i);
String id = data.getString("article_id");
int t = data.getInteger("created_at");
String time = TimeParse.dateFormartString(new Date(t*1000L), "yyyy-MM-dd HH:mm:ss");
System.out.println(time);
if(startTime != null && startTime.length() > 1) {
if(time.compareTo(startTime) < 1) {
more = false;
continue;
}
}
map.put("title", data.getString("title"));
String url = data.getString("url");
if(url == null) {
url = "https://baijia.baidu.com/s?old_id=" + id;
}
map.put("content", ZhiWeiTools.delHTMLTag(getContent3(data)));
map.put("read_amount", data.getString("read_amount")==null?0:data.getString("read_amount"));
map.put("app_id", data.getString("app_id"));
map.put("time", time);
map.put("url", url);
map.put("source", name);
dataList.add(map);
}
if(json.getJSONObject("data").getBoolean("has_more")) {
more = true;
}
} catch (Exception e) {
e.printStackTrace();
}
rmap.put("data", dataList);
rmap.put("more", more);
return rmap;
}
private String getContent3(JSONObject data) {
try {
JSONObject cdata = data.getJSONObject("content");
JSONArray jsonArry = cdata.getJSONArray("items");
StringBuffer sb = new StringBuffer();
for(int i = 0;i < jsonArry.size();i++) {
JSONObject da = jsonArry.getJSONObject(i);
if("text".equals(da.getString("type"))) {
sb.append(da.getString("data"));
}
}
return sb.toString();
} catch (Exception e) {
return "";
}
}
/**
*
* @Description 解析一页历史文章 * @Description 解析一页历史文章
* @param result * @param result
* @param startTime * @param startTime
* @return * @return
*/ */
public List<Map<String,Object>> getBaijiaAccountData(String result,String startTime) { public List<Map<String,Object>> getBaijiaAccountData(String result,String startTime) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONObject("WriterArticleList").getJSONArray("items"); JSONArray jsonArry = json.getJSONObject("data").getJSONObject("WriterArticleList").getJSONArray("items");
...@@ -67,7 +137,6 @@ public class BaijiaAccountAnalysis { ...@@ -67,7 +137,6 @@ public class BaijiaAccountAnalysis {
if(url == null) { if(url == null) {
url = "https://baijia.baidu.com/s?old_id=" + id; url = "https://baijia.baidu.com/s?old_id=" + id;
} }
// map.put("content", getBaijiaContent(url));
map.put("content", data.getString("abstract")); map.put("content", data.getString("abstract"));
map.put("read_amount", data.getString("read_amount")==null?0:data.getString("read_amount")); map.put("read_amount", data.getString("read_amount")==null?0:data.getString("read_amount"));
map.put("app_id", data.getString("app_id")); map.put("app_id", data.getString("app_id"));
......
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class BilibilikeyWordAnalysis {
public static Map<String,Object> getData(String result) {
try {
Document doc = Jsoup.parse(result);
boolean more = false;
if(doc.select("#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button").text().contains("下一页")) {
more = true;
}
String title = null;
String url = null;
String time = null;
String playcount = null;
String source = null;
String submitcount = null;
Elements elements = doc.select("ul.video-contain.clearfix").select("li");
System.out.println(elements.size() + " --- " + more);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
for(Element element : elements) {
Map<String,Object> map = new HashMap<String,Object>();
title = element.select("a").attr("title");
url = element.select("a").attr("href");
playcount = element.select("div.tags").select("span.watch-num").text();
submitcount = element.select("div.tags").select("span.hide").text();
time = element.select("div.tags").select("span.time").text();
source = element.select("div.tags").select("a.up-name").text();
map.put("title", title);
map.put("url", url);
map.put("playcount", playcount);
map.put("time", time);
map.put("source", source);
map.put("submitcount", submitcount);
dataList.add(map);
// System.out.println(map.toString());
}
Map<String,Object> rmap = new HashMap<String,Object>();
rmap.put("more", more);
rmap.put("data", dataList);
return rmap;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
...@@ -16,8 +16,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -16,8 +16,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class DayuByWordAnalysis { public class DayuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class);
......
...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class DayuCommentAnalysis { public class DayuCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(DayuCommentAnalysis.class);
......
...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotDataAnalysis { public class DouyinHotDataAnalysis {
......
...@@ -13,7 +13,7 @@ import com.alibaba.fastjson.JSONArray; ...@@ -13,7 +13,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class FenghuangAccountAnalysis { public class FenghuangAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangAccountAnalysis.class); private static Logger logger = LoggerFactory.getLogger(FenghuangAccountAnalysis.class);
......
...@@ -14,7 +14,7 @@ import com.alibaba.fastjson.JSONArray; ...@@ -14,7 +14,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class FenghuangCommentAnalysis { public class FenghuangCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class);
......
...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKBCommentAnalysis { public class QQKBCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQKBCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(QQKBCommentAnalysis.class);
......
...@@ -13,8 +13,8 @@ import org.jsoup.select.Elements; ...@@ -13,8 +13,8 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.zhiweiTools.timeParse.TimeUtil; import com.zhiwei.tools.timeparse.TimeUtil;
public class SoKuByWordAnalysis { public class SoKuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(SoKuByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(SoKuByWordAnalysis.class);
......
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class ToutiaoKeyWordAnalysis {
public List<Map<String,Object>> getData(String result) {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("data");
for(int i = 0; i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>();
if(data.getJSONArray("merge_weitoutiao") != null) {
JSONArray jsonArry2 = data.getJSONArray("merge_weitoutiao");
for(int j = 0; j < jsonArry2.size() ;j++) {
Map<String,Object> map2 = new HashMap<String,Object>();
JSONObject data2 = jsonArry2.getJSONObject(j);
String source = data2.getString("media_name");
String content = data2.getString("content");
String time = data2.getString("datetime");
String id = data2.getString("id");
String title = "";
if(content != null) {
if(content.length() > 16) {
title = content.substring(0,16);
}else {
title = content;
}
}
map2.put("title", title);
map2.put("content", content);
map2.put("source", source);
map2.put("time", time);
map2.put("url", "https://www.toutiao.com/a"+id+"/");
System.out.println(map2.toString());
bodyList.add(map2);
}
continue;
}
String title = data.getString("title");
String content = "";
String abstract1 = data.getString("abstract");
String summary = data.getString("summary");
String con = data.getString("content");
if(title == null) {
if(con != null) {
if(con.length() > 16) {
title = con.substring(0, 16);
}else {
title = con;
}
content = con;
if(summary != null) {
content = content + summary;
}
}
}else {
if(summary != null) {
content = summary;
}
if(abstract1 != null) {
content = content + abstract1;
}
}
String source = data.getString("source");
String time = data.getString("datetime");
String id = data.getString("id");
map.put("title", title);
map.put("content", content);
map.put("source", source);
map.put("time", time);
map.put("url", "https://www.toutiao.com/a"+id+"/");
System.out.println(map.toString());
bodyList.add(map);
}
System.out.println("====================");
return bodyList;
}
}
...@@ -15,7 +15,7 @@ import com.alibaba.fastjson.JSONArray; ...@@ -15,7 +15,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class WangyiHistoryAnalysis { public class WangyiHistoryAnalysis {
...@@ -41,7 +41,7 @@ public class WangyiHistoryAnalysis { ...@@ -41,7 +41,7 @@ public class WangyiHistoryAnalysis {
String url = "http://dy.163.com/v2/article/detail/"+data.getString("docid")+".html"; String url = "http://dy.163.com/v2/article/detail/"+data.getString("docid")+".html";
String reuslt = HttpClient.executeHttpRequestGet(url, proxy, headerMap); String reuslt = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Document doc = Jsoup.parse(reuslt); Document doc = Jsoup.parse(reuslt);
map.put("content", doc.select("div.content").text()); map.put("content", doc.select("div.content").text().replaceAll("<.*?>", ""));
map.put("url", url); map.put("url", url);
map.put("source", source); map.put("source", source);
dataList.add(map); dataList.add(map);
......
...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class XiguaAccountAnalysis { public class XiguaAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class);
......
...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class XiguaByWordAnalysis { public class XiguaByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(XiguaByWordAnalysis.class);
......
...@@ -8,6 +8,7 @@ import org.junit.Test; ...@@ -8,6 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Baijia; import com.zhiwei.parse.Baijia;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class BaijiaAccountExample { public class BaijiaAccountExample {
...@@ -29,12 +30,22 @@ public class BaijiaAccountExample { ...@@ -29,12 +30,22 @@ public class BaijiaAccountExample {
poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists); poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists);
} }
@Test // @Test
public void baijiaAccount2Test() { public void baijiaAccount2Test() {
String app_id = "b_1536766622381605"; String app_id = "b_1548519002063358";
String startTime = "2017-01-01 00:00:00"; String startTime = "2018-01-01 00:00:00";
//2017-11-30 17:48:17 //2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(app_id,startTime,null); List<String> idList = new ArrayList<>();
idList.add("b_1548519002063358");
idList.add("b_1536766292852334");
idList.add("b_1536766781763274");
idList.add("b_1536766200338498");
List<Map<String,Object>> bodyList = new ArrayList<>();
for(String id : idList) {
ZhiWeiTools.sleep(5000);
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(id,startTime,null);
bodyList.addAll(lists);
}
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
...@@ -42,7 +53,37 @@ public class BaijiaAccountExample { ...@@ -42,7 +53,37 @@ public class BaijiaAccountExample {
headList.add("source"); headList.add("source");
headList.add("url"); headList.add("url");
headList.add("content"); headList.add("content");
poi.exportExcel("D://crawlerdata//自媒体/百家号-科学的fan.xlsx", "科学的fan", headList, lists); poi.exportExcel("D://crawlerdata//自媒体/百家号-all.xlsx", "科学的fan", headList, bodyList);
}
@Test
public void test3() {
String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String startTime = "2018-01-01 00:00:00";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<>();
for(Map<String,Object> m : list) {
try {
String app_id = m.get("id").toString();
String name = m.get("name").toString();
String cookie = "BAIDUID=BA1090A5857735165A2A419CBA37957A:FG=1";
List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
if(lists != null) {
bodyList.addAll(lists);
}
} catch (Exception e) {
}
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("source");
headList.add("url");
headList.add("content");
headList.add("read_amount");
poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj.xlsx", "娱乐资本论", headList, bodyList);
} }
} }
...@@ -8,7 +8,7 @@ import org.junit.Test; ...@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Fenghuang; import com.zhiwei.parse.Fenghuang;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class FenghuangCommentExample { public class FenghuangCommentExample {
......
package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Maimai;
public class MaimaiBywordExample {
public static void main(String[] args) {
String word = "小米 上市|小米 IPO|雷军 IPO|小米 招股书|雷军 上市";
String cookie = "sessionid=njbswswdrvwf4vpg0836xu6m7ve4ziso; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4bGRIdEx4bVhwZBB0ZHwVDWEtMS3kKExkbBBMfGRkEGgQcHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1526952692556; token=\"nv0ZM3AICKHOmB1sdBi2QrvA0fFDgtRwdZJV+DzF3KsZdPIsvD1I2HOdRVyurjQi8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0Ijoid2s0MWRLbDBtWlFwTlJoWmdwc1JUZHR2IiwiX2V4cGlyZSI6MTUyNzAzOTEwMzE5MiwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=ssvF7IeeQYlwCjdh8GaY3mhr0SY";
String time = "2018-05-01 00:00:00";
String[] words = word.split("\\|");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) {
List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
bodyList.addAll(c);
}
List<String> headList = new ArrayList<String>();
headList.add("time");
headList.add("url");
headList.add("text");
headList.add("name");
headList.add("like");
headList.add("comment_count");
headList.add("spreads");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-1.xlsx", "脉脉关键词", headList, bodyList);
}
}
...@@ -7,9 +7,8 @@ import java.util.Map; ...@@ -7,9 +7,8 @@ import java.util.Map;
import org.junit.Test; import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Fenghuang;
import com.zhiwei.parse.Miaopai; import com.zhiwei.parse.Miaopai;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class MiaopaiByUrlExample { public class MiaopaiByUrlExample {
......
...@@ -8,7 +8,7 @@ import org.junit.Test; ...@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKB; import com.zhiwei.parse.QQKB;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQAccountExample { public class QQAccountExample {
......
...@@ -9,9 +9,9 @@ import java.util.Map; ...@@ -9,9 +9,9 @@ import java.util.Map;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
/** /**
* @ClassName: QQNewsCommentListTest * @ClassName: QQNewsCommentListTest
...@@ -128,6 +128,8 @@ public class QQNewsCommentListTest { ...@@ -128,6 +128,8 @@ public class QQNewsCommentListTest {
} }
} catch (IOException e) { } catch (IOException e) {
return null; return null;
} catch (Exception e) {
e.printStackTrace();
} }
return cmt_id; return cmt_id;
} }
......
...@@ -8,8 +8,8 @@ import java.util.Map; ...@@ -8,8 +8,8 @@ import java.util.Map;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.httpClient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK; import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
/** /**
* @ClassName: SinaCommentListTest * @ClassName: SinaCommentListTest
...@@ -86,6 +86,8 @@ public class SinaCommentListTest { ...@@ -86,6 +86,8 @@ public class SinaCommentListTest {
} }
} catch (IOException e) { } catch (IOException e) {
return null; return null;
} catch (Exception e) {
e.printStackTrace();
} }
return newsid; return newsid;
} }
......
...@@ -16,7 +16,7 @@ public class SouhuAccountExample { ...@@ -16,7 +16,7 @@ public class SouhuAccountExample {
@Test @Test
public void souhuAccountTest() { public void souhuAccountTest() {
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("b1NlSFRzM1JaeURMR252VFY0VGpjVkJGckpyWUB3ZWNoYXQuc29odS5jb20=","2015-01-01 00:00:00",false,null); List<Map<String,Object>> lists = Souhu.getSouHuAccountData("c29odXptdHNmbjZ0cnRAc29odS5jb20=","2018-05-01 00:00:00",false,null);
System.out.println(lists.size()); System.out.println(lists.size());
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
......
package com.zhiwei.crawler;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
public class Test1 {
public static void main(String[] args) {
String time = "https://view.inews.qq.com/a/NEW2018021000440002";
System.out.println(time.split("/")[4]);
}
}
...@@ -8,7 +8,7 @@ import org.junit.Test; ...@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Wangyi; import com.zhiwei.parse.Wangyi;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class WangyiCommentExample { public class WangyiCommentExample {
......
...@@ -11,9 +11,9 @@ public class WangyiHistoryExample { ...@@ -11,9 +11,9 @@ public class WangyiHistoryExample {
public static void main(String[] args) { public static void main(String[] args) {
String url = "http://dy.163.com/v2/article/detail/DJK1G41H0519A6FP.html"; String url = "http://dy.163.com/v2/article/detail/DPLAOP1605198CJN.html";
List<Map<String,Object>> list = Wangyi.getHistoryData(url, null, "2017-01-01 00:00:00"); List<Map<String,Object>> list = Wangyi.getHistoryData(url, null, "2018-05-01 00:00:00");
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
...@@ -22,7 +22,7 @@ public class WangyiHistoryExample { ...@@ -22,7 +22,7 @@ public class WangyiHistoryExample {
headList.add("source"); headList.add("source");
headList.add("url"); headList.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//自媒体/网易-号外财经-1.xlsx", "asdasd", headList, list); poi.exportExcel("D://crawlerdata//自媒体/网易-财联社.xlsx", "财联社", headList, list);
} }
......
...@@ -8,7 +8,7 @@ import org.junit.Test; ...@@ -8,7 +8,7 @@ import org.junit.Test;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.XiGua; import com.zhiwei.parse.XiGua;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
public class XiguaByWordExample { public class XiguaByWordExample {
......
package com.zhiwei.hsitory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.HistortyBean;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
public class QQkandianHistoryExample {
@Test
public void f() {
String uid = "2661642386";
QQKandian qqKandian = new QQKandian();
List<HistortyBean> dataList = qqKandian.getHistoryData(uid, null);
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(HistortyBean h : dataList) {
Map<String, Object> map = new HashMap<String,Object>();
map.put("标题", h.getTitle());
map.put("时间", h.getTime());
map.put("来源", h.getSource());
map.put("正文", h.getContent());
map.put("链接", h.getUrl());
bodyList.add(map);
}
List<String> headList = new ArrayList<String>();
headList.add("标题");
headList.add("来源");
headList.add("链接");
headList.add("正文");
headList.add("时间");
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx", "数据", headList, bodyList);
}
}
package com.zhiwei.keyword;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.HistortyBean;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKandianKeyWordExample {
@Test
public void f() {
String word = "今日头条 算法|今日头条 侵权|今日头条 起诉|字节跳动|张一鸣|抖音 涉黄|抖音 未成年|抖音";
String[] words = word.split("\\|");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
QQKandian qqKandian = new QQKandian();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) {
System.out.println(w);
List<HistortyBean> dataList = qqKandian.getDataByword(w, null);
System.out.println(w + " ---- " + dataList.size());
for(HistortyBean h : dataList) {
Map<String, Object> map = new HashMap<String,Object>();
map.put("标题", h.getTitle());
map.put("时间", h.getTime());
map.put("来源", h.getSource());
map.put("正文", h.getContent());
map.put("链接", h.getUrl());
bodyList.add(map);
}
ZhiWeiTools.sleep(3000);
}
List<String> headList = new ArrayList<String>();
headList.add("标题");
headList.add("来源");
headList.add("链接");
headList.add("正文");
headList.add("时间");
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-今日头条 算法.xlsx", "马化腾", headList, bodyList);
}
}
package com.zhiwei.keyword;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Toutiao;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class ToutiaoKeyWordExample {
public static void main(String[] args) {
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String path = "D:\\crawlerdata\\关键词.xlsx";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
String startTime = "2018-06-28 00:00:00";
String endTime = "2018-06-28 23:59:59";
String devoid = "54381805805";
for(Map<String,Object> map1 : list) {
String word = map1.get("关键词")+"";
List<Map<String,Object>> dataList = Toutiao.getKeyWordData(word, null,devoid);
if(dataList != null) {
for(Map<String,Object> m : dataList) {
String time = m.get("time")+"";
System.out.println(time);
m.put("word", word);
String ma = m.get("title") + "--" + m.get("content");
if(time.compareTo(startTime) > -1 && time.compareTo(endTime) < 1) {
System.out.println(1);
if(ma.contains(word)) {
bodyList.add(m);
}
}
}
}
ZhiWeiTools.sleep(2000);
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("content");
headList.add("source");
headList.add("url");
headList.add("word");
poi.exportExcel(path, "雅培", headList, bodyList);
}
}
package com.zhiwei.qqkb;
import java.util.List;
import java.util.Map;
import com.zhiwei.bean.QQkbUser;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKB;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQkbUserExample {
public static void main(String[] args) {
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String path = "D:\\crawlerdata\\用户采集\\qq看点用户.xlsx";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> bodyList = (List<Map<String, Object>>) map.get("body");
for(Map<String,Object> map1 : bodyList) {
String name = map1.get("渠道")+"";
QQkbUser qkbUser = QQKB.getUserData(name);
if(qkbUser != null) {
map1.put("guanzhu", qkbUser.getGuanzhu());
map1.put("desc", qkbUser.getDesc());
map1.put("url", qkbUser.getUrl());
map1.put("vip", qkbUser.getVip());
System.out.println(map1.toString());
}
ZhiWeiTools.sleep(2000);
}
List<String> headList = (List<String>) map.get("head");
headList.add("guanzhu");
headList.add("desc");
headList.add("url");
headList.add("vip");
poi.exportExcel(path, map.get("sheetName")+"-更新后", headList, bodyList);
}
}
package com.zhiwei.shipin;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Douyin;
public class DouyinHotExample {
public static void main(String[] args) {
List<Map<String,Object>> bodyList = Douyin.getDouyinHotData("https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501",null);
List<String> headList = new ArrayList<String>();
headList.add("text");
headList.add("url");
headList.add("time");
headList.add("author");
headList.add("comment_count");
headList.add("like_count");
headList.add("share_count");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\抖音热门采集测试.xlsx", "asd", headList, bodyList);
}
}
package com.zhiwei.user;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.bean.QQKandianUser;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQkandianExample {
@Test
public void f() {
QQKandian qqKandian = new QQKandian();
String path = "D:\\crawlerdata\\用户采集\\qq看点用户.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> dataList = (List<Map<String, Object>>) map.get("body");
List<QQKandianUser> allList = new ArrayList<QQKandianUser>();
for(Map<String,Object> m : dataList) {
String name = m.get("渠道")+"";
System.out.println(name);
List<QQKandianUser> qqKandianUsers = qqKandian.getUser(name, null);
if(qqKandianUsers != null) {
System.out.println(qqKandianUsers.size());
allList.addAll(qqKandianUsers);
}else {
System.out.println( name + "--- null");
}
ZhiWeiTools.sleep(3000);
}
List<String> headList = new ArrayList<String>();
headList.add("name");
headList.add("url");
headList.add("verity");
headList.add("desc");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(QQKandianUser qqKandianUser : allList) {
Map<String,Object> m = new HashMap<String,Object>();
m.put("name", qqKandianUser.getName());
m.put("url", qqKandianUser.getUrl());
m.put("verity", qqKandianUser.isVerify());
m.put("desc", qqKandianUser.getDesc());
bodyList.add(m);
}
poi.exportExcel(path, "数据完成后", headList, bodyList);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment