Commit 2a35dd02 by yangchen

提升版本 修改脉脉采集

parent b3d545a3
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId> <artifactId>articlenewscrawler</artifactId>
<version>0.0.8-SNAPSHOT</version> <version>0.0.9-SNAPSHOT</version>
<name>articlenewscrawler</name> <name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description> <description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
......
...@@ -9,6 +9,7 @@ import org.slf4j.LoggerFactory; ...@@ -9,6 +9,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import okhttp3.Response; import okhttp3.Response;
...@@ -32,7 +33,23 @@ public class HttpClient { ...@@ -32,7 +33,23 @@ public class HttpClient {
logger.error("httpClient 获取数据出现问题:{}", e); logger.error("httpClient 获取数据出现问题:{}", e);
return null; return null;
} }
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param url
* @param cookie
* @return
* @throws IOException
*/
public static String executeHttpRequestGet(String url,ProxyHolder proxy,Map<String, String> headerMap) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
return response.body().string();
} catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e);
return null;
}
} }
public static String executeHttpRequestPost(String url,Proxy proxy,Map<String, String> headerMap,Map<String, Object> paramMap) { public static String executeHttpRequestPost(String url,Proxy proxy,Map<String, String> headerMap,Map<String, Object> paramMap) {
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy; import static java.util.Objects.nonNull;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import static java.util.Objects.nonNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.AikaCommentAnalysis; import com.zhiwei.parse.analysis.AikaCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -22,7 +23,7 @@ public class Aika { ...@@ -22,7 +23,7 @@ public class Aika {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<Map<String,Object>> getAikaComment(String url,Proxy proxy) { public static List<Map<String,Object>> getAikaComment(String url,ProxyHolder proxy) {
String commentId = getCommentId(url); String commentId = getCommentId(url);
if(nonNull(commentId)) { if(nonNull(commentId)) {
List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
......
...@@ -29,7 +29,7 @@ public class Aiqiyi { ...@@ -29,7 +29,7 @@ public class Aiqiyi {
Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null); Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
for(int i = 1;i <= 20;i++) { for(int i = 1;i <= 5;i++) {
int count = dataList.size(); int count = dataList.size();
String url = "https://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg__t_0_page_"+i+"_p_1_qc_0_rd__site__m_4_bitrate_"; String url = "https://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg__t_0_page_"+i+"_p_1_qc_0_rd__site__m_4_bitrate_";
System.out.println(url); System.out.println(url);
......
...@@ -2,7 +2,6 @@ package com.zhiwei.parse; ...@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
...@@ -17,6 +16,7 @@ import com.alibaba.fastjson.JSONArray; ...@@ -17,6 +16,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -34,7 +34,7 @@ public class Chejia { ...@@ -34,7 +34,7 @@ public class Chejia {
* @param proxy * @param proxy
* @return * @return
*/ */
public static int getChejiaCommentCount(String url,Proxy proxy) { public static int getChejiaCommentCount(String url,ProxyHolder proxy) {
String id = getCommentUrl(url, proxy); String id = getCommentUrl(url, proxy);
if(nonNull(id)) { if(nonNull(id)) {
System.out.println(id); System.out.println(id);
...@@ -57,7 +57,7 @@ public class Chejia { ...@@ -57,7 +57,7 @@ public class Chejia {
* @param proxy * @param proxy
* @return * @return
*/ */
public static List<Map<String,Object>> getChejiaComment(String url,Proxy proxy) { public static List<Map<String,Object>> getChejiaComment(String url,ProxyHolder proxy) {
String nUrl = getCommentUrl(url, proxy); String nUrl = getCommentUrl(url, proxy);
if(nonNull(nUrl)) { if(nonNull(nUrl)) {
int page = 1; int page = 1;
...@@ -98,7 +98,7 @@ public class Chejia { ...@@ -98,7 +98,7 @@ public class Chejia {
return Collections.emptyList(); return Collections.emptyList();
} }
private static String getCommentUrl(String url,Proxy proxy) { private static String getCommentUrl(String url,ProxyHolder proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String objectID = response.body().string().split("pvTrack.object = ")[1].split(";")[0].replace("\"", ""); String objectID = response.body().string().split("pvTrack.object = ")[1].split(";")[0].replace("\"", "");
return "https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="+objectID; return "https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="+objectID;
......
...@@ -10,6 +10,7 @@ import java.util.Map; ...@@ -10,6 +10,7 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.FenghuangAccountAnalysis; import com.zhiwei.parse.analysis.FenghuangAccountAnalysis;
...@@ -64,7 +65,7 @@ public class Fenghuang { ...@@ -64,7 +65,7 @@ public class Fenghuang {
* @param docUrl * @param docUrl
* @return * @return
*/ */
public static List<Map<String,Object>> getFenghuangCommentData(String url,Proxy proxy) { public static List<Map<String,Object>> getFenghuangCommentData(String url,ProxyHolder proxy) {
url = fenghuangCommentAnalysis.getdocUrl(url,proxy); url = fenghuangCommentAnalysis.getdocUrl(url,proxy);
if(url == null) { if(url == null) {
return Collections.emptyList(); return Collections.emptyList();
...@@ -92,7 +93,7 @@ public class Fenghuang { ...@@ -92,7 +93,7 @@ public class Fenghuang {
* @param proxy * @param proxy
* @return * @return
*/ */
public static List<Map<String,Object>> getFenghuangCommentData2(String url,Proxy proxy) { public static List<Map<String,Object>> getFenghuangCommentData2(String url,ProxyHolder proxy) {
url = fenghuangCommentAnalysis.getdocUrl(url,proxy); url = fenghuangCommentAnalysis.getdocUrl(url,proxy);
if(url == null) { if(url == null) {
return Collections.emptyList(); return Collections.emptyList();
...@@ -118,7 +119,7 @@ public class Fenghuang { ...@@ -118,7 +119,7 @@ public class Fenghuang {
* @param url * @param url
* @return * @return
*/ */
public static Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) { public static Map<String,Object> getFenghuangCommentCount(String url,ProxyHolder proxy) {
url = fenghuangCommentAnalysis.getdocUrl(url,proxy); url = fenghuangCommentAnalysis.getdocUrl(url,proxy);
System.out.println(url); System.out.println(url);
if(url == null) { if(url == null) {
......
...@@ -9,6 +9,7 @@ import java.util.Collections; ...@@ -9,6 +9,7 @@ import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -21,6 +22,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder; ...@@ -21,6 +22,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis; import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response; import okhttp3.Response;
...@@ -51,7 +53,7 @@ public class Maimai { ...@@ -51,7 +53,7 @@ public class Maimai {
int i = 20; int i = 20;
while(f) { while(f) {
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap); String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> map = maimaiBywordAnalysis.getData(result, time); Map<String,Object> map = maimaiBywordAnalysis.getData(result, time,key);
f = (boolean) map.get("hasMore"); f = (boolean) map.get("hasMore");
List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data"); List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data");
if(daList != null && !daList.isEmpty()) { if(daList != null && !daList.isEmpty()) {
...@@ -89,7 +91,7 @@ public class Maimai { ...@@ -89,7 +91,7 @@ public class Maimai {
int i = 20; int i = 20;
while(f) { while(f) {
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap); String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> map = maimaiBywordAnalysis.getDataByNoName(result, time); Map<String,Object> map = maimaiBywordAnalysis.getDataByNoName(result, time,key);
f = (boolean) map.get("hasMore"); f = (boolean) map.get("hasMore");
List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data"); List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data");
if(daList != null && daList.size() > 0) { if(daList != null && daList.size() > 0) {
...@@ -129,6 +131,40 @@ public class Maimai { ...@@ -129,6 +131,40 @@ public class Maimai {
map.put("gid", data.getLong("id")); map.put("gid", data.getLong("id"));
map.put("title", data.getString("text")); map.put("title", data.getString("text"));
map.put("author", data.getString("author")); map.put("author", data.getString("author"));
map.put("userId", data.getString("mmid"));
return map;
} catch (Exception e) {
logger.error(" 脉脉 转评攒 获取失败 {}",e);
}
return Collections.emptyMap();
}
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 获取脉脉转评赞
* @param url
* @param proxy
* @return
*/
public static Map<String,Object> getMaiaiCount(String url,String cookie,ProxyHolder proxy) {
Map<String,Object> headers = new HashMap<>();
if(Objects.nonNull(cookie) && !cookie.isEmpty()) {
headers.put("cookie", cookie);
}
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headers), proxy)){
String result = response.body().string();
result = result.split("JSON.parse\\(\"")[1].split("\"\\);\\</script\\>")[0];
result = ZhiWeiTools.decodeUnicode(result);
JSONObject json = JSONObject.parseObject(result);
Map<String,Object> map = new HashMap<>();
JSONObject data = json.getJSONObject("data").getJSONObject("gossip");
map.put("like", data.getInteger("likes"));
map.put("spreads", data.getInteger("spreads"));
map.put("cmts", data.getInteger("cmts"));
map.put("gid", data.getLong("id"));
map.put("title", data.getString("text"));
map.put("author", data.getString("author"));
map.put("userId", data.getString("mmid"));
return map; return map;
} catch (Exception e) { } catch (Exception e) {
logger.error(" 脉脉 转评攒 获取失败 {}",e); logger.error(" 脉脉 转评攒 获取失败 {}",e);
...@@ -144,9 +180,13 @@ public class Maimai { ...@@ -144,9 +180,13 @@ public class Maimai {
* @return * @return
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<Map<String,Object>> getMaimaiCommentList(String url,ProxyHolder proxy) { public static List<Map<String,Object>> getMaimaiCommentList(String url,String cookie,ProxyHolder proxy) {
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,Object> mmid = getMaiaiCount(url, proxy); Map<String,Object> mmid = getMaiaiCount(url,cookie, proxy);
Map<String,Object> headers = new HashMap<>();
if(Objects.nonNull(cookie) && !cookie.isEmpty()) {
headers.put("cookie", cookie);
}
if(mmid!=null) { if(mmid!=null) {
String gid = String.valueOf(mmid.get("gid")); String gid = String.valueOf(mmid.get("gid"));
boolean more = true; boolean more = true;
...@@ -154,7 +194,10 @@ public class Maimai { ...@@ -154,7 +194,10 @@ public class Maimai {
while(more) { while(more) {
try { try {
String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100"; String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100";
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link),proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link,headers),proxy).body().string();
if(Objects.nonNull(cookie) && !cookie.isEmpty()) {
ZhiWeiTools.sleep(2000);
}
if(htmlBody!=null && htmlBody.length()>0) { if(htmlBody!=null && htmlBody.length()>0) {
JSONObject dataJson = JSONObject.parseObject(htmlBody); JSONObject dataJson = JSONObject.parseObject(htmlBody);
JSONArray commentJson = dataJson.getJSONArray("comments"); JSONArray commentJson = dataJson.getJSONArray("comments");
...@@ -184,4 +227,54 @@ public class Maimai { ...@@ -184,4 +227,54 @@ public class Maimai {
return Collections.emptyList(); return Collections.emptyList();
} }
public static List<Map<String,Object>> getUserList(String word,String cookie,Proxy proxy) {
String url = "https://maimai.cn/search/contacts?count=50&query="+URLCodeUtil.getURLEncode(word, "utf-8")+"&dist=0&searchTokens=&highlight=true&jsononly=1&pc=1&page=";
List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,Object> headers = new HashMap<>();
System.out.println(url);
headers.put("cookie", cookie);
headers.put("referer", "https://maimai.cn/web/search_center?type=contact&query="+URLCodeUtil.getURLEncode(word, "utf-8")+"&highlight=true");
int page = 0;
while(true) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url+page, headers), proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArray = json.getJSONObject("data").getJSONArray("contacts");
for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
Map<String,Object> map = getUserMap(data);
dataList.add(map);
}
page++;
logger.info(" 采集到 {} 页 ,一共采集到 {} 条",page,dataList.size());
ZhiWeiTools.sleep(2000);
if(jsonArray.isEmpty()) {
break;
}
} catch (Exception e) {
e.printStackTrace();
break;
}
}
return dataList;
}
private static Map<String,Object> getUserMap(JSONObject data) {
try {
Map<String,Object> map = new HashMap<>();
JSONObject da = data.getJSONObject("contact");
map.put("id", data.getString("uid"));
map.put("name", da.getString("name"));
map.put("gender", da.getInteger("gender"));
map.put("url", "https://maimai.cn/contact/detail/"+da.getString("encode_mmid"));
map.put("rank", da.getInteger("rank"));
map.put("compos", da.getString("compos"));
map.put("city", da.getString("city"));
return map;
} catch (Exception e) {
logger.error(" 脉脉用户解析出错 ");
}
return Collections.emptyMap();
}
} }
package com.zhiwei.parse; package com.zhiwei.parse;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
...@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory; ...@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.PcautoCommentAnalysis; import com.zhiwei.parse.analysis.PcautoCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -24,7 +25,7 @@ public class Pcauto { ...@@ -24,7 +25,7 @@ public class Pcauto {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<Map<String, Object>> getPcAutoComment(String url,Proxy proxy) { public static List<Map<String, Object>> getPcAutoComment(String url,ProxyHolder proxy) {
String newUrl = getCommentUrl(url, proxy); String newUrl = getCommentUrl(url, proxy);
if(nonNull(newUrl)) { if(nonNull(newUrl)) {
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
...@@ -53,7 +54,7 @@ public class Pcauto { ...@@ -53,7 +54,7 @@ public class Pcauto {
return Collections.emptyList(); return Collections.emptyList();
} }
private static String getCommentUrl(String url,Proxy proxy) { private static String getCommentUrl(String url,ProxyHolder proxy) {
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try {
String result = httpBoot.syncCall(RequestUtils.wrapGet("https://cmt.pcauto.com.cn/action/topic/get_data.jsp?url="+url), proxy).body().string(); String result = httpBoot.syncCall(RequestUtils.wrapGet("https://cmt.pcauto.com.cn/action/topic/get_data.jsp?url="+url), proxy).body().string();
......
...@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory; ...@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.QQkbUser; import com.zhiwei.bean.QQkbUser;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
...@@ -19,13 +18,10 @@ import com.zhiwei.parse.analysis.QQKBAccountAnalysis; ...@@ -19,13 +18,10 @@ import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis; import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class QQKB { public class QQKB {
private static Logger logger = LoggerFactory.getLogger(QQKB.class); private static Logger logger = LoggerFactory.getLogger(QQKB.class);
private static QQKBAccountAnalysis qqAccountAnalysis = new QQKBAccountAnalysis(); private static QQKBAccountAnalysis qqAccountAnalysis = new QQKBAccountAnalysis();
private static QQKBCommentAnalysis qqkbCommentAnalysis = new QQKBCommentAnalysis(); private static QQKBCommentAnalysis qqkbCommentAnalysis = new QQKBCommentAnalysis();
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
......
...@@ -32,9 +32,9 @@ public class QQKandian { ...@@ -32,9 +32,9 @@ public class QQKandian {
public List<QQKandianUser> getUser(String name,Proxy proxy) { public List<QQKandianUser> getUser(String name,Proxy proxy) {
if(name != null && name.length() > 0) { if(name != null && name.length() > 0) {
List<QQKandianUser> dataList = new ArrayList<QQKandianUser>(); List<QQKandianUser> dataList = new ArrayList<>();
OkHttpClient okhttp = HttpClientBuilder.newInstance(); OkHttpClient okhttp = HttpClientBuilder.newInstance();
Map<String,String> map = new HashMap<String,String>(); Map<String,String> map = new HashMap<>();
map.put("Host", "sou.qq.com"); map.put("Host", "sou.qq.com");
map.put("Referer", "https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E7%9F%A5%E5%90%8D%E5%A4%A7V%E7%AB%A0%E6%96%87%E6%B6%89%E6%80%A7%E4%BE%B5"); map.put("Referer", "https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E7%9F%A5%E5%90%8D%E5%A4%A7V%E7%AB%A0%E6%96%87%E6%B6%89%E6%80%A7%E4%BE%B5");
map.put("Cookie", "skey=MUzU7gdtRz; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6"); map.put("Cookie", "skey=MUzU7gdtRz; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6");
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis; import com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -21,7 +21,7 @@ public class QicheHome { ...@@ -21,7 +21,7 @@ public class QicheHome {
private static QicheHomeKwyWordAnalysis qicheHomeKwyWordAnalysis = new QicheHomeKwyWordAnalysis(); private static QicheHomeKwyWordAnalysis qicheHomeKwyWordAnalysis = new QicheHomeKwyWordAnalysis();
public static List<Map<String,Object>> getQiCheComment(String articleid,Proxy proxy) { public static List<Map<String,Object>> getQiCheComment(String articleid,ProxyHolder proxy) {
List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
int page = 1; int page = 1;
int count = 2; int count = 2;
...@@ -35,7 +35,7 @@ public class QicheHome { ...@@ -35,7 +35,7 @@ public class QicheHome {
} }
bodyList.addAll(qicheHomeKwyWordAnalysis.getData(result)); bodyList.addAll(qicheHomeKwyWordAnalysis.getData(result));
logger.info("采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}",articleid,count,page,bodyList.size()); logger.info("采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}",articleid,count,page,bodyList.size());
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(200);
if(page > count) { if(page > count) {
break; break;
} }
......
...@@ -3,7 +3,6 @@ package com.zhiwei.parse; ...@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
...@@ -14,6 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -14,6 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.SinaKejiCommentAnalysis; import com.zhiwei.parse.analysis.SinaKejiCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -30,7 +30,7 @@ public class SinaKeji { ...@@ -30,7 +30,7 @@ public class SinaKeji {
* @param proxy * @param proxy
* @return * @return
*/ */
public static List<Map<String, Object>> getSinaKejiComment(String url,Proxy proxy) { public static List<Map<String, Object>> getSinaKejiComment(String url,ProxyHolder proxy) {
String commentId = getCommentId(url, proxy); String commentId = getCommentId(url, proxy);
if(nonNull(commentId)) { if(nonNull(commentId)) {
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
...@@ -60,7 +60,7 @@ public class SinaKeji { ...@@ -60,7 +60,7 @@ public class SinaKeji {
return Collections.emptyList(); return Collections.emptyList();
} }
private static String getCommentId(String url,Proxy proxy) { private static String getCommentId(String url,ProxyHolder proxy) {
String commentId = null; String commentId = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try {
......
...@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory; ...@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SouhuAccountAnalysis; import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
...@@ -34,7 +35,7 @@ public class Souhu { ...@@ -34,7 +35,7 @@ public class Souhu {
* @param url * @param url
* @return * @return
*/ */
public static int getSouhuCommentCount(String url,Proxy proxy) { public static int getSouhuCommentCount(String url,ProxyHolder proxy) {
try { try {
String newurl = souhuCommentAnalysis.getSouhuURL(url,proxy); String newurl = souhuCommentAnalysis.getSouhuURL(url,proxy);
if(nonNull(newurl)) { if(nonNull(newurl)) {
...@@ -139,13 +140,13 @@ public class Souhu { ...@@ -139,13 +140,13 @@ public class Souhu {
* @param cookie * @param cookie
* @return * @return
*/ */
public static List<Map<String,Object>> getSouhuCommentData(String url,Proxy proxy) { public static List<Map<String,Object>> getSouhuCommentData(String url,ProxyHolder proxy) {
Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
int j = 1; int j = 1;
try { try {
while(true) { while(true) {
String newurl = souhuCommentAnalysis.getSouhuURL(url,ProxyFactory.getNatProxy()) + "&page_no=" + j; String newurl = souhuCommentAnalysis.getSouhuURL(url,proxy) + "&page_no=" + j;
String result = HttpClient.executeHttpRequestGet(newurl,ProxyFactory.getNatProxy(),headerMap); String result = HttpClient.executeHttpRequestGet(newurl,ProxyFactory.getNatProxy(),headerMap);
System.out.println(newurl); System.out.println(newurl);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
......
package com.zhiwei.parse; package com.zhiwei.parse;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
...@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.TechTxCommentAnalysis; import com.zhiwei.parse.analysis.TechTxCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -23,7 +24,7 @@ public class TechTx { ...@@ -23,7 +24,7 @@ public class TechTx {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static List<Map<String,Object>> getTechTxComment(String url,Proxy proxy) { public static List<Map<String,Object>> getTechTxComment(String url,ProxyHolder proxy) {
String commentID = getCommentId(url, proxy); String commentID = getCommentId(url, proxy);
String next = ""; String next = "";
if(nonNull(commentID)) { if(nonNull(commentID)) {
...@@ -53,7 +54,7 @@ public class TechTx { ...@@ -53,7 +54,7 @@ public class TechTx {
return Collections.emptyList(); return Collections.emptyList();
} }
private static String getCommentId(String url,Proxy proxy) { private static String getCommentId(String url,ProxyHolder proxy) {
String commentID = null; String commentID = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try {
......
...@@ -10,6 +10,7 @@ import org.slf4j.Logger; ...@@ -10,6 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.WangyiCommentAnalysis; import com.zhiwei.parse.analysis.WangyiCommentAnalysis;
...@@ -61,7 +62,7 @@ public class Wangyi { ...@@ -61,7 +62,7 @@ public class Wangyi {
* @param id * @param id
* @return * @return
*/ */
public static int getWangyiCommentCount(String id,Proxy proxy) { public static int getWangyiCommentCount(String id,ProxyHolder proxy) {
try { try {
String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id; String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id;
Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null);
......
...@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException; ...@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy; import static java.util.Objects.nonNull;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
...@@ -14,9 +15,9 @@ import com.alibaba.fastjson.JSONArray; ...@@ -14,9 +15,9 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import static java.util.Objects.nonNull;
import okhttp3.Response; import okhttp3.Response;
public class Yiche { public class Yiche {
...@@ -31,7 +32,7 @@ public class Yiche { ...@@ -31,7 +32,7 @@ public class Yiche {
* @param proxy * @param proxy
* @return * @return
*/ */
public static int getYicheCount(String url,Proxy proxy) { public static int getYicheCount(String url,ProxyHolder proxy) {
String nurl = getnewsId(url, proxy); String nurl = getnewsId(url, proxy);
if(nonNull(nurl)) { if(nonNull(nurl)) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(nurl), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(nurl), proxy)){
...@@ -52,7 +53,7 @@ public class Yiche { ...@@ -52,7 +53,7 @@ public class Yiche {
* @param proxy * @param proxy
* @return * @return
*/ */
public static List<Map<String,Object>> getYicheComment(String url,Proxy proxy) { public static List<Map<String,Object>> getYicheComment(String url,ProxyHolder proxy) {
String nUrl = getnewsId(url, proxy); String nUrl = getnewsId(url, proxy);
if(nonNull(nUrl)) { if(nonNull(nUrl)) {
int page = 1; int page = 1;
...@@ -92,7 +93,7 @@ public class Yiche { ...@@ -92,7 +93,7 @@ public class Yiche {
return Collections.emptyList(); return Collections.emptyList();
} }
private static String getnewsId(String url,Proxy proxy) { private static String getnewsId(String url,ProxyHolder proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string(); String result = response.body().string();
String productId = result.split("productId: ")[1].split(",")[0]; String productId = result.split("productId: ")[1].split(",")[0];
......
...@@ -56,8 +56,6 @@ public class Youku { ...@@ -56,8 +56,6 @@ public class Youku {
} catch (Exception e) { } catch (Exception e) {
logger.error(" Exception {} ",e); logger.error(" Exception {} ",e);
} }
} }
return list; return list;
......
...@@ -13,14 +13,8 @@ import org.jsoup.select.Elements; ...@@ -13,14 +13,8 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import okhttp3.Response;
public class AiqiyiByWordAnalysis { public class AiqiyiByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class);
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
...@@ -15,6 +14,7 @@ import com.alibaba.fastjson.JSONArray; ...@@ -15,6 +14,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
...@@ -25,7 +25,7 @@ public class FenghuangCommentAnalysis { ...@@ -25,7 +25,7 @@ public class FenghuangCommentAnalysis {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
public Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) { public Map<String,Object> getFenghuangCommentCount(String url,ProxyHolder proxy) {
Map<String,Object> map = new HashMap<>(); Map<String,Object> map = new HashMap<>();
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string(); String result = response.body().string();
...@@ -46,7 +46,7 @@ public class FenghuangCommentAnalysis { ...@@ -46,7 +46,7 @@ public class FenghuangCommentAnalysis {
* @param url * @param url
* @return * @return
*/ */
public String getdocUrl(String url,Proxy proxy) { public String getdocUrl(String url,ProxyHolder proxy) {
String docUrl = null; String docUrl = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
...@@ -79,7 +79,7 @@ public class FenghuangCommentAnalysis { ...@@ -79,7 +79,7 @@ public class FenghuangCommentAnalysis {
* @param url * @param url
* @return * @return
*/ */
public List<Map<String,Object>> getData(String url,Proxy proxy) { public List<Map<String,Object>> getData(String url,ProxyHolder proxy) {
Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy)){
...@@ -106,7 +106,7 @@ public class FenghuangCommentAnalysis { ...@@ -106,7 +106,7 @@ public class FenghuangCommentAnalysis {
* @param proxy * @param proxy
* @return * @return
*/ */
public List<Map<String,Object>> getData2(String url,Proxy proxy) { public List<Map<String,Object>> getData2(String url,ProxyHolder proxy) {
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string(); String result = response.body().string();
......
...@@ -11,17 +11,17 @@ import com.alibaba.fastjson.JSONObject; ...@@ -11,17 +11,17 @@ import com.alibaba.fastjson.JSONObject;
public class MaimaiBywordAnalysis { public class MaimaiBywordAnalysis {
public Map<String,Object> getData(String result,String time) { public Map<String,Object> getData(String result,String time,String key) {
Map<String,Object> map1 = new HashMap<String,Object>(); Map<String,Object> map1 = new HashMap<>();
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("feeds"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("feeds");
boolean f = true; boolean f = true;
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
f = json.getJSONObject("data").getInteger("more")==1?true:false; f = json.getJSONObject("data").getInteger("more")==1?true:false;
for(int i = 0;i < jsonArry.size();i++) { for(int i = 0;i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i); JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
String url = "https://maimai.cn/article/detail?fid="+data.getJSONObject("feed").getString("id"); String url = "https://maimai.cn/article/detail?fid="+data.getJSONObject("feed").getString("id") + "&efid=" + data.getString("efid");
String atime = data.getJSONObject("feed").getString("crtime_string"); String atime = data.getJSONObject("feed").getString("crtime_string");
if(time.compareTo(atime) > -1) { if(time.compareTo(atime) > -1) {
f = false; f = false;
...@@ -34,6 +34,8 @@ public class MaimaiBywordAnalysis { ...@@ -34,6 +34,8 @@ public class MaimaiBywordAnalysis {
map.put("like", data.getJSONObject("feed").getInteger("likes")); map.put("like", data.getJSONObject("feed").getInteger("likes"));
map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt")); map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数 map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数
map.put("career", data.getJSONObject("contact").getString("career"));
map.put("word", key);
// System.out.println(map.toString()); // System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
...@@ -42,7 +44,7 @@ public class MaimaiBywordAnalysis { ...@@ -42,7 +44,7 @@ public class MaimaiBywordAnalysis {
return map1; return map1;
} }
public Map<String,Object> getDataByNoName(String result,String time) { public Map<String,Object> getDataByNoName(String result,String time,String key) {
Map<String,Object> map1 = new HashMap<String,Object>(); Map<String,Object> map1 = new HashMap<String,Object>();
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("gossips"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("gossips");
...@@ -65,6 +67,7 @@ public class MaimaiBywordAnalysis { ...@@ -65,6 +67,7 @@ public class MaimaiBywordAnalysis {
map.put("like", data.getJSONObject("gossip").getInteger("likes")); map.put("like", data.getJSONObject("gossip").getInteger("likes"));
map.put("comment_count", data.getJSONObject("gossip").getInteger("total_cnt")); map.put("comment_count", data.getJSONObject("gossip").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("gossip").getInteger("search_order")); //传播数 map.put("spreads", data.getJSONObject("gossip").getInteger("search_order")); //传播数
map.put("word", key);
// System.out.println(map.toString()); // System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
...@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory; ...@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
...@@ -27,7 +27,7 @@ public class SouhuCommentAnalysis { ...@@ -27,7 +27,7 @@ public class SouhuCommentAnalysis {
* @param url * @param url
* @return * @return
*/ */
public String getSouhuURL(String url,Proxy proxy) { public String getSouhuURL(String url,ProxyHolder proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string(); String result = response.body().string();
String source_id = result.split("news_id: \"")[1].split("\",")[0]; String source_id = result.split("news_id: \"")[1].split("\",")[0];
...@@ -39,7 +39,7 @@ public class SouhuCommentAnalysis { ...@@ -39,7 +39,7 @@ public class SouhuCommentAnalysis {
return null; return null;
} }
public int getSouhuCommentCount(String url,Proxy proxy) { public int getSouhuCommentCount(String url,ProxyHolder proxy) {
Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null);
int i; int i;
try { try {
......
...@@ -4,7 +4,6 @@ import java.net.Proxy; ...@@ -4,7 +4,6 @@ import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -55,7 +54,7 @@ public class QQTV { ...@@ -55,7 +54,7 @@ public class QQTV {
String nurl = element.select("h2.result_title").select("a").attr("href"); String nurl = element.select("h2.result_title").select("a").attr("href");
Map<String,Object> map = getUrlData(nurl, ProxyFactory.getNatProxy()); Map<String,Object> map = getUrlData(nurl, ProxyFactory.getNatProxy());
if(Objects.nonNull(map) && time.compareTo(String.valueOf(map.get("time"))) < 1) { if(Objects.nonNull(map) && time.compareTo(String.valueOf(map.get("time"))) < 1) {
System.out.println(map.toString()); // System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
ZhiWeiTools.sleep(50); ZhiWeiTools.sleep(50);
...@@ -64,6 +63,7 @@ public class QQTV { ...@@ -64,6 +63,7 @@ public class QQTV {
if(count != dataList.size()) { if(count != dataList.size()) {
continue; continue;
} }
break; break;
} catch (Exception e) { } catch (Exception e) {
logger.error(" 数据采集出错 {} ",e); logger.error(" 数据采集出错 {} ",e);
......
...@@ -35,6 +35,7 @@ public class SohuTV { ...@@ -35,6 +35,7 @@ public class SohuTV {
headers.put("cookie", cookie); headers.put("cookie", cookie);
while(true) { while(true) {
int count = dataList.size(); int count = dataList.size();
System.out.println(url+page);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url+page,headers), proxy)){ try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url+page,headers), proxy)){
String result = response.body().string(); String result = response.body().string();
Document document = Jsoup.parse(result); Document document = Jsoup.parse(result);
......
//package com.zhiwei.Comment; package com.zhiwei.Comment;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import org.testng.annotations.Test; import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType; import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.Maimai; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Yiche; import com.zhiwei.parse.Maimai;
//import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class MaimaiCommentCountTest { public class MaimaiCommentCountTest {
// @Test @Test
// public void f() { public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER); GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", 0); .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body"); List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>(); String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"rhItcea5qkO6WCSnVcczW/NRVLLCTsq3kQbpUCGAwQ0ceLunVJRjT5rgoFVYrIBA8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAyMTY3MDQ5LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=zbs4cHtzTcHWvjtkpjAZmoqLXsQ";
// List<String> headList = (List<String>) map.get("head"); List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// for (Map<String, Object> map1 : list) { List<String> headList = (List<String>) map.get("head");
// String url = map1.get("地址") + ""; for (Map<String, Object> map1 : list) {
// Map<String,Object> map3 = Maimai.getMaiaiCount(url, ProxyFactory.getNatProxy()); String url = map1.get("地址") + "";
// map1.putAll(map3); Map<String,Object> map3 = Maimai.getMaiaiCount(url,null, ProxyHolder.NAT_PROXY);
// ZhiWeiTools.sleep(100); System.out.println(map3.toString());
// } System.out.println(url);
// headList.add("like"); map1.putAll(map3);
// headList.add("spreads"); ZhiWeiTools.sleep(500);
// headList.add("cmts"); System.out.println("--------------------------");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", "评论采集", headList, }
// list); headList.add("like");
// } headList.add("spreads");
//} headList.add("cmts");
poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
list);
}
}
package com.zhiwei.crawler; //package com.zhiwei.crawler;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.common.config.GroupType; //import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Aiqiyi; //import com.zhiwei.parse.Aiqiyi;
import com.zhiwei.util.WordReadFile; //import com.zhiwei.util.WordReadFile;
//
public class AiqiyiByWordExample { //public class AiqiyiByWordExample {
//
//
@Test // @Test
public void aiqiyiByWordTest() { // public void aiqiyiByWordTest() {
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER); // ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt"); // List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); // List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : wordList) { // for(String w : wordList) {
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null); // List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
if(dataList != null && dataList.size() >= 1) { // if(dataList != null && dataList.size() >= 1) {
bodyList.addAll(dataList); // bodyList.addAll(dataList);
} // }
} // }
List<String> headList = new ArrayList<String>(); // List<String> headList = new ArrayList<String>();
headList.add("count"); // headList.add("count");
headList.add("time"); // headList.add("time");
headList.add("source"); // headList.add("source");
headList.add("content"); // headList.add("content");
headList.add("url"); // headList.add("url");
headList.add("title"); // headList.add("title");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList); // poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList);
//
//
//
} // }
//
//
//
} //}
package com.zhiwei.crawler; package com.zhiwei.crawler;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -11,8 +12,8 @@ public class MaimaiBywordExample { ...@@ -11,8 +12,8 @@ public class MaimaiBywordExample {
public static void main(String[] args) { public static void main(String[] args) {
String word = "美团|某团|MT|大众点评|新美大|美团点评"; String word = "美团|某团|MT|大众点评|新美大|美团点评";
String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=3oatshv55and4kwcz9gdpie7qdpj27yt; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHxwdGRMcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1548984672861; token=\"Ap1u6QzIdn8FCrohEAEPI86n9mNSKk1qJWlauQ8KeSbn7fDKTu6bN2Yv6B9V19nO8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoibVVlSlRTUW1NdVdUTUUtRjV0SjBZbExtIiwibWlkNDU2ODc2MCI6ZmFsc2UsInN0YXR1cyI6dHJ1ZSwiX2V4cGlyZSI6MTU0OTA3MTEzOTA2NywiX21heEFnZSI6ODY0MDAwMDB9; session.sig=UOz44C2rF-uJFxFvSwHyII5aJxM"; String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"G8eNNNylPoi3oIPLUr/d/RDaMgtnpZCskxT7wu1pRRrkiy3J8G7StHgTx9DQBq4O8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAwNjY1Njg4LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=b_tga85tZskxsgKX8YIM_JKByi0";
String time = "2019-01-24 00:00:00"; String time = "2019-02-15 00:00:00";
String[] words = word.split("\\|"); String[] words = word.split("\\|");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
...@@ -23,16 +24,9 @@ public class MaimaiBywordExample { ...@@ -23,16 +24,9 @@ public class MaimaiBywordExample {
// bodyList.addAll(c); // bodyList.addAll(c);
bodyList.addAll(c2); bodyList.addAll(c2);
} }
List<String> headList = new ArrayList<String>(); List<String> headList = Arrays.asList("time","url","text","name","like","comment_count","spreads","word");
headList.add("time");
headList.add("url");
headList.add("text");
headList.add("name");
headList.add("like");
headList.add("comment_count");
headList.add("spreads");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-0201.xlsx", "脉脉关键词", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-0222.xlsx", "脉脉关键词", headList, bodyList);
} }
} }
package com.zhiwei.crawler; package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -8,13 +7,14 @@ import org.junit.Test; ...@@ -8,13 +7,14 @@ import org.junit.Test;
import com.zhiwei.common.config.GroupType; import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Souhu; import com.zhiwei.parse.Souhu;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SouhuCommentCountExample { public class SouhuCommentCountExample {
@SuppressWarnings("unchecked")
@Test @Test
public void souhuCommentCountTest() { public void souhuCommentCountTest() {
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
...@@ -28,7 +28,7 @@ public class SouhuCommentCountExample { ...@@ -28,7 +28,7 @@ public class SouhuCommentCountExample {
try { try {
url = map1.get("url")+""; url = map1.get("url")+"";
System.out.println(url); System.out.println(url);
int i = Souhu.getSouhuCommentCount(url,ProxyFactory.getNatProxy()); int i = Souhu.getSouhuCommentCount(url,ProxyHolder.NAT_PROXY);
map1.put("count", i); map1.put("count", i);
System.out.println(map1.toString()); System.out.println(map1.toString());
} catch (Exception e) { } catch (Exception e) {
......
package com.zhiwei.keyword;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.testng.annotations.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Youku;
public class YoukuKeyWordTest {
@Test
public void f() {
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType.PROVIDER);
String word = "帮宝适 二噁英," +
"帮宝适 二恶英," +
"帮宝适 甲醛," +
"帮宝适 荧光," +
"帮宝适 致癌," +
"帮宝适 有毒," +
"帮宝适 超标," +
"帮宝适 防腐剂," +
"帮宝适 起诉," +
"帮宝适 伤害," +
"帮宝适 气味," +
"帮宝适 异味," +
"帮宝适 起坨," +
"帮宝适 异物," +
"帮宝适 漏," +
"帮宝适 刺鼻," +
"帮宝适 勒," +
"帮宝适 脱皮," +
"帮宝适 划伤," +
"绿帮 二噁英," +
"绿帮 二恶英," +
"绿帮 甲醛," +
"绿帮 荧光," +
"绿帮 致癌," +
"绿帮 有毒," +
"绿帮 超标," +
"绿帮 起诉," +
"绿帮 气味," +
"绿帮 异味," +
"绿帮 异物," +
"绿帮 漏," +
"绿帮 刺鼻," +
"绿帮 勒," +
"绿帮 脱皮";
List<Map<String,Object>> bodyList = new ArrayList<>();
String[] words = word.split(",");
for(String w : words) {
System.out.println(w);
bodyList.addAll(Youku.getDataList(w));
}
List<String> headList = new ArrayList<>();
headList.add("title");
headList.add("time");
headList.add("url");
headList.add("uper");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\优酷数据-txh-0121.xlsx", "数据", headList, bodyList);
}
}
...@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile; ...@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile;
public class BilibiliTest { public class BilibiliTest {
@Test @Test
public void f() { public void f() {
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt"); List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词-2.txt");
List<Map<String, Object>> bodyList = new ArrayList<>(); List<Map<String, Object>> bodyList = new ArrayList<>();
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"; String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for (String word : wordList) { for (String word : wordList) {
...@@ -32,7 +32,7 @@ public class BilibiliTest { ...@@ -32,7 +32,7 @@ public class BilibiliTest {
headlist.add("title"); headlist.add("title");
headlist.add("url"); headlist.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//bilibili关键词采集数据-txh-0130.xlsx", "B站数据", headlist, bodyList); poi.exportExcel("D://crawlerdata//bilibili关键词采集数据-txh-0219-农药.xlsx", "B站数据", headlist, bodyList);
} }
} }
package com.zhiwei.shipin; //package com.zhiwei.shipin;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Douyin; //import com.zhiwei.parse.Douyin;
//
public class DouyinHotExample { //public class DouyinHotExample {
//
public static void main(String[] args) { // public static void main(String[] args) {
//
List<Map<String,Object>> bodyList = Douyin.getDouyinHotData("https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501",null); // List<Map<String,Object>> bodyList = Douyin.getDouyinHotData("https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501",null);
List<String> headList = new ArrayList<String>(); // List<String> headList = new ArrayList<String>();
headList.add("text"); // headList.add("text");
headList.add("url"); // headList.add("url");
headList.add("time"); // headList.add("time");
headList.add("author"); // headList.add("author");
headList.add("comment_count"); // headList.add("comment_count");
headList.add("like_count"); // headList.add("like_count");
headList.add("share_count"); // headList.add("share_count");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\抖音热门采集测试.xlsx", "asd", headList, bodyList); // poi.exportExcel("D:\\crawlerdata\\抖音热门采集测试.xlsx", "asd", headList, bodyList);
} // }
//
} //}
...@@ -11,7 +11,6 @@ import com.zhiwei.crawler.proxy.ProxyFactory; ...@@ -11,7 +11,6 @@ import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.shipin.QQTV; import com.zhiwei.parse.shipin.QQTV;
import com.zhiwei.parse.shipin.SohuTV;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.util.WordReadFile; import com.zhiwei.util.WordReadFile;
...@@ -19,7 +18,7 @@ public class QQTVTest { ...@@ -19,7 +18,7 @@ public class QQTVTest {
@Test @Test
public void f() { public void f() {
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER); ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER);
String time = "2018-07-01 00:00:00"; String time = "1970-07-01 00:00:00";
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt"); List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List<Map<String, Object>> bodyList = new ArrayList<>(); List<Map<String, Object>> bodyList = new ArrayList<>();
for (String word : wordList) { for (String word : wordList) {
......
package com.zhiwei.shipin; //package com.zhiwei.shipin;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.testng.annotations.Test; //import org.testng.annotations.Test;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.BiliBili; //import com.zhiwei.parse.shipin.SohuTV;
import com.zhiwei.parse.shipin.SohuTV; //import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.tools.tools.ZhiWeiTools; //import com.zhiwei.util.WordReadFile;
import com.zhiwei.util.WordReadFile; //
//public class SohuTVTest {
public class SohuTVTest { // @Test
@Test // public void f() {
public void f() { // List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt"); // List<Map<String, Object>> bodyList = new ArrayList<>();
List<Map<String, Object>> bodyList = new ArrayList<>(); // String cookie = "SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321";
String cookie = "SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321"; // for (String word : wordList) {
for (String word : wordList) { // List<Map<String, Object>> dataList = SohuTV.sohuTVData(word, cookie, null);
List<Map<String, Object>> dataList = SohuTV.sohuTVData(word, cookie, null); // if (dataList != null) {
if (dataList != null) { // System.out.println(word + " ----- " + dataList.size());
System.out.println(word + " ----- " + dataList.size()); // bodyList.addAll(dataList);
bodyList.addAll(dataList); // }
} // ZhiWeiTools.sleep(1000);
ZhiWeiTools.sleep(1000); // }
} // List<String> headlist = new ArrayList<>();
List<String> headlist = new ArrayList<>(); // headlist.add("playCount");
headlist.add("playCount"); // headlist.add("time");
headlist.add("time"); // headlist.add("source");
headlist.add("source"); // headlist.add("title");
headlist.add("title"); // headlist.add("url");
headlist.add("url"); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // poi.exportExcel("D://crawlerdata//搜狐视频关键词采集数据-txh-0219.xlsx", "B站数据", headlist, bodyList);
poi.exportExcel("D://crawlerdata//搜狐视频关键词采集数据-txh-0130.xlsx", "B站数据", headlist, bodyList); //
// }
} //}
}
//package com.zhiwei.shipin;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Youku;
//import com.zhiwei.util.WordReadFile;
//
//public class YoukuKeyWordTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// List<String> words = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String w : words) {
// System.out.println(w);
// bodyList.addAll(Youku.getDataList(w));
// }
// List<String> headList = new ArrayList<>();
// headList.add("title");
// headList.add("time");
// headList.add("url");
// headList.add("uper");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//优酷数据-txh-0219.xlsx", "数据", headList, bodyList);
//
// }
//}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment