Commit d979d793 by yangchen

脉脉 评论采集 和部分视频采集

parent 1116d3c5
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>articlenewscrawler</artifactId> <artifactId>articlenewscrawler</artifactId>
<version>0.0.4-SNAPSHOT</version> <version>0.0.8-SNAPSHOT</version>
<name>articlenewscrawler</name> <name>articlenewscrawler</name>
<description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description> <description>采集凤凰,一点资讯,搜狐历时文章和文章评论</description>
......
package com.zhiwei.httpclient; package com.zhiwei.httpclient;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.SocketAddress;
import java.net.URLEncoder;
import java.net.Proxy.Type;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import com.zhiwei.tools.httpclient.HeaderTool;
public class HeadGet { public class HeadGet {
/** /**
...@@ -409,12 +401,10 @@ public class HeadGet { ...@@ -409,12 +401,10 @@ public class HeadGet {
* @return * @return
*/ */
public static Map<String,String> getPearVideoByWordHeaderMap(String cookie) { public static Map<String,String> getPearVideoByWordHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>(); Map<String,String> headerMap = new HashMap<>();
headerMap.put("Host", "www.pearvideo.com"); headerMap.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
headerMap.put("Connection", "keep-alive"); headerMap.put("Accept", "text/html, */*; q=0.01");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
headerMap.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
if(cookie != null) { if(cookie != null) {
headerMap.put("Cookie", cookie); headerMap.put("Cookie", cookie);
} }
...@@ -492,8 +482,8 @@ public class HeadGet { ...@@ -492,8 +482,8 @@ public class HeadGet {
*/ */
public static Map<String,String> getQQKBCommentHeaderMap(String cookie) { public static Map<String,String> getQQKBCommentHeaderMap(String cookie) {
Map<String,String> headerMap = new HashMap<String, String>(); Map<String,String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent", // headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)"); // "天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap.put("Accept", headerMap.put("Accept",
"*/*"); "*/*");
headerMap.put("Accept-Language", "zh-Hans-CN;q=1"); headerMap.put("Accept-Language", "zh-Hans-CN;q=1");
...@@ -514,7 +504,7 @@ public class HeadGet { ...@@ -514,7 +504,7 @@ public class HeadGet {
* @return * @return
*/ */
public static Map<String,Object> getQQKBCommentParamMap(String comment_id,String article_id){ public static Map<String,Object> getQQKBCommentParamMap(String comment_id,String article_id){
Map<String,Object> param = new HashMap<String,Object>(); Map<String,Object> param = new HashMap<>();
param.put("chlid", "daily_timeline"); param.put("chlid", "daily_timeline");
param.put("comment_id", comment_id); param.put("comment_id", comment_id);
param.put("page", 1); param.put("page", 1);
...@@ -944,15 +934,5 @@ public class HeadGet { ...@@ -944,15 +934,5 @@ public class HeadGet {
return paramMap; return paramMap;
} }
public static void main(String[] args) throws UnsupportedEncodingException {
String url = "http://180.186.38.200/rest/n/feed/profile2";
System.out.println(url);
String cookie = "";
Map<String,String> headerMap = HeaderTool.getCommonHead();
Map<String,Object> paramMap = HeadGet.getKuaishouParamMap();
String result = HttpClient.executeHttpRequestPost(url, null, headerMap, paramMap);
System.out.println(result);
System.out.println(result.length());
}
} }
...@@ -7,11 +7,16 @@ import java.util.Map; ...@@ -7,11 +7,16 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import okhttp3.Response;
public class HttpClient { public class HttpClient {
private static Logger logger = LoggerFactory.getLogger(HttpClient.class); private static Logger logger = LoggerFactory.getLogger(HttpClient.class);
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
* @Description (TODO这里用一句话描述这个方法的作用) * @Description (TODO这里用一句话描述这个方法的作用)
...@@ -21,22 +26,20 @@ public class HttpClient { ...@@ -21,22 +26,20 @@ public class HttpClient {
* @throws IOException * @throws IOException
*/ */
public static String executeHttpRequestGet(String url,Proxy proxy,Map<String, String> headerMap) { public static String executeHttpRequestGet(String url,Proxy proxy,Map<String, String> headerMap) {
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
String result = HttpClientTemplateOK.get(url, proxy, headerMap); return response.body().string();
return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e.getMessage()); logger.error("httpClient 获取数据出现问题:{}", e);
return null; return null;
} }
} }
public static String executeHttpRequestPost(String url,Proxy proxy,Map<String, String> headerMap,Map<String, Object> paramMap) { public static String executeHttpRequestPost(String url,Proxy proxy,Map<String, String> headerMap,Map<String, Object> paramMap) {
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapPost(url, headerMap, paramMap), proxy)){
String result = HttpClientTemplateOK.post(url, proxy, headerMap, paramMap); return response.body().string();
return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e.getMessage()); logger.error("httpClient 获取数据出现问题:{}", e);
return null; return null;
} }
......
...@@ -28,7 +28,7 @@ public class Aiqiyi { ...@@ -28,7 +28,7 @@ public class Aiqiyi {
public static List<Map<String,Object>> getAiqiyiByWordData(String word,Proxy proxy) { public static List<Map<String,Object>> getAiqiyiByWordData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null); Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null);
Map<String,String> headerMap1 = HeadGet.getAiqiyiHeaderMap(null); Map<String,String> headerMap1 = HeadGet.getAiqiyiHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
for(int i = 1;i <= 20;i++) { for(int i = 1;i <= 20;i++) {
String url = "http://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"+i+"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"; String url = "http://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"+i+"_p_1_qc_0_rd__site__m_11_bitrate_?af=true";
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -23,7 +23,7 @@ import okhttp3.Request; ...@@ -23,7 +23,7 @@ import okhttp3.Request;
public class BiliBili { public class BiliBili {
private static Logger logger = LoggerFactory.getLogger(BiliBili.class); private static final Logger logger = LoggerFactory.getLogger(BiliBili.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
...@@ -46,6 +46,7 @@ public class BiliBili { ...@@ -46,6 +46,7 @@ public class BiliBili {
while(more) { while(more) {
map.clear(); map.clear();
String ur = url + "&page=" + n; String ur = url + "&page=" + n;
System.out.println(ur);
request = HttpRequestBuilder.newGetRequest(ur, header); request = HttpRequestBuilder.newGetRequest(ur, header);
String result2 = httpBoot.syncCall(request, proxy).body().string(); String result2 = httpBoot.syncCall(request, proxy).body().string();
map = BilibilikeyWordAnalysis.getData(result2); map = BilibilikeyWordAnalysis.getData(result2);
...@@ -60,13 +61,13 @@ public class BiliBili { ...@@ -60,13 +61,13 @@ public class BiliBili {
} }
return bodyList; return bodyList;
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
e.printStackTrace(); logger.error("e ",e);
} catch (IOException e) { } catch (Exception e) {
e.printStackTrace(); logger.error("e ",e);
} }
return null; return Collections.emptyList();
} }
public static void main(String[] args) { public static void main(String[] args) {
...@@ -88,7 +89,7 @@ public class BiliBili { ...@@ -88,7 +89,7 @@ public class BiliBili {
headlist.add("title"); headlist.add("title");
headlist.add("url"); headlist.add("url");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata//bilibili关键词采集数据-竹鼠.xlsx", "B站数据", headlist, bodyList); poi.exportExcel("D://crawlerdata//bilibili关键词采集数据-txh.xlsx", "B站数据", headlist, bodyList);
} }
......
package com.zhiwei.parse;
import static java.util.Objects.nonNull;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class Chejia {
private static final Logger logger = LoggerFactory.getLogger(Chejia.class);
private static HttpBoot httpBoot = new HttpBoot();
/**
*
* @Description 车家 号 评论数
* @param url
* @param proxy
* @return
*/
public static int getChejiaCommentCount(String url,Proxy proxy) {
String id = getCommentUrl(url, proxy);
if(nonNull(id)) {
System.out.println(id);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(id), proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
return json.getInteger("commentcount");
} catch (Exception e) {
logger.error("error {} ",e);
}
}
return -1;
}
/**
*
* @Description 车家 号 评论数
* @param url
* @param proxy
* @return
*/
public static List<Map<String,Object>> getChejiaComment(String url,Proxy proxy) {
String nUrl = getCommentUrl(url, proxy);
if(nonNull(nUrl)) {
int page = 1;
List<Map<String, Object>> bodyList = new ArrayList<>();
boolean f = true;
while(f) {
String surl = nUrl + "&page=" + page;
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(surl), proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArray = json.getJSONArray("commentlist");
for(int i = 0;i< jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
Map<String,Object> map = new HashMap<>();
map.put("source", data.getString("RMemberName"));
String time = data.getString("RReplyDate");
time = time.split("/Date\\(")[1].split("\\+")[0];
map.put("time", TimeParse.dateFormartString(new Date(Long.parseLong(time)), "yyyy-MM-dd HH:mm:ss"));
map.put("content", data.getString("RContent"));
map.put("like", data.get("RUp"));
map.put("id", data.getString("ReplyId"));
bodyList.add(map);
}
int total = json.getInteger("commentcount");
logger.info(" 一共采集 了 {} 条 采集到 {} 页 一共有 {} 条",bodyList.size(),page,total);
if(page*50 > total) {
f = false;
}
} catch (Exception e) {
logger.error("error {} ",e);
f = false;
}
ZhiWeiTools.sleep(2000);
page++;
}
return bodyList;
}
return Collections.emptyList();
}
private static String getCommentUrl(String url,Proxy proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String objectID = response.body().string().split("pvTrack.object = ")[1].split(";")[0].replace("\"", "");
return "https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="+objectID;
} catch (Exception e) {
logger.error("error {} ",e);
}
return null;
}
}
...@@ -25,6 +25,7 @@ public class Douyin { ...@@ -25,6 +25,7 @@ public class Douyin {
* @param url * @param url
* @return * @return
*/ */
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getDouyinHotData(String url,Proxy proxy) { public static List<Map<String,Object>> getDouyinHotData(String url,Proxy proxy) {
String iid = url.split("iid=")[1].split("&")[0]; String iid = url.split("iid=")[1].split("&")[0];
String ch_id = url.split("challenge/")[1].split("\\?")[0]; String ch_id = url.split("challenge/")[1].split("\\?")[0];
......
...@@ -3,7 +3,6 @@ package com.zhiwei.parse; ...@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
......
package com.zhiwei.parse; package com.zhiwei.parse;
import static com.alibaba.fastjson.JSON.toJavaObject;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis; import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class Maimai { public class Maimai {
private static Logger logger = LoggerFactory.getLogger(Maimai.class); private static Logger logger = LoggerFactory.getLogger(Maimai.class);
private static HttpBoot httpBoot = new HttpBoot();
private static MaimaiBywordAnalysis maimaiBywordAnalysis = new MaimaiBywordAnalysis(); private static MaimaiBywordAnalysis maimaiBywordAnalysis = new MaimaiBywordAnalysis();
/**
*
* @Description 实名动态
* @param key
* @param cookie
* @param time
* @param proxy
* @return
*/
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getData(String key,String cookie,String time,Proxy proxy) { public static List<Map<String,Object>> getData(String key,String cookie,String time,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
boolean f = true; boolean f = true;
try { try {
String url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"; String url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1";
...@@ -32,11 +54,11 @@ public class Maimai { ...@@ -32,11 +54,11 @@ public class Maimai {
Map<String,Object> map = maimaiBywordAnalysis.getData(result, time); Map<String,Object> map = maimaiBywordAnalysis.getData(result, time);
f = (boolean) map.get("hasMore"); f = (boolean) map.get("hasMore");
List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data"); List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data");
if(daList != null && daList.size() > 0) { if(daList != null && !daList.isEmpty()) {
dataList.addAll(daList); dataList.addAll(daList);
url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"&highlight=true&sortby=time&jsononly=1"; url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"&highlight=true&sortby=time&jsononly=1";
i+=20; i+=20;
logger.info("{}==采集到的数据量=="+dataList.size(),key); logger.info("{} ==采集到的数据量== {}",dataList.size(),key);
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
}else { }else {
break; break;
...@@ -48,9 +70,19 @@ public class Maimai { ...@@ -48,9 +70,19 @@ public class Maimai {
return dataList; return dataList;
} }
/**
*
* @Description 职言交流
* @param key
* @param cookie
* @param time
* @param proxy
* @return
*/
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getDataByNoName(String key,String cookie,String time,Proxy proxy) { public static List<Map<String,Object>> getDataByNoName(String key,String cookie,String time,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
boolean f = true; boolean f = true;
try { try {
String url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"; String url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1";
...@@ -64,7 +96,7 @@ public class Maimai { ...@@ -64,7 +96,7 @@ public class Maimai {
dataList.addAll(daList); dataList.addAll(daList);
url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"highlight=true&sortby=time&jsononly=1"; url = "https://maimai.cn/search/gossips?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"highlight=true&sortby=time&jsononly=1";
i+=20; i+=20;
logger.info("{}==采集到的数据量=="+dataList.size(),key); logger.info("{} ==采集到的数据量== {} ",dataList.size(),key);
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
}else { }else {
break; break;
...@@ -76,4 +108,80 @@ public class Maimai { ...@@ -76,4 +108,80 @@ public class Maimai {
return dataList; return dataList;
} }
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 获取脉脉转评赞
* @param url
* @param proxy
* @return
*/
public static Map<String,Object> getMaiaiCount(String url,ProxyHolder proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string();
result = result.split("JSON.parse\\(\"")[1].split("\"\\);\\</script\\>")[0];
result = ZhiWeiTools.decodeUnicode(result);
JSONObject json = JSONObject.parseObject(result);
Map<String,Object> map = new HashMap<>();
JSONObject data = json.getJSONObject("data").getJSONObject("gossip");
map.put("like", data.getInteger("likes"));
map.put("spreads", data.getInteger("spreads"));
map.put("cmts", data.getInteger("cmts"));
map.put("gid", data.getLong("id"));
map.put("title", data.getString("text"));
map.put("author", data.getString("author"));
return map;
} catch (Exception e) {
logger.error(" 脉脉 转评攒 获取失败 {}",e);
}
return Collections.emptyMap();
}
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 脉脉评论采集获取
* @param url
* @param proxy
* @return
*/
@SuppressWarnings("unchecked")
public static List<Map<String,Object>> getMaimaiCommentList(String url,ProxyHolder proxy) {
List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,Object> mmid = getMaiaiCount(url, proxy);
if(mmid!=null) {
String gid = String.valueOf(mmid.get("gid"));
boolean more = true;
int page = 0;
while(more) {
try {
String link = "https://maimai.cn/sdk/web/gossip/getcmts?gid="+gid+"&page="+page+"&count=50&hotcmts_limit_count=100";
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(link),proxy).body().string();
if(htmlBody!=null && htmlBody.length()>0) {
JSONObject dataJson = JSONObject.parseObject(htmlBody);
JSONArray commentJson = dataJson.getJSONArray("comments");
if(commentJson!=null && !commentJson.isEmpty()) {
for(int i=0;i<commentJson.size();i++) {
JSONObject json = commentJson.getJSONObject(i);
Map<String,Object> dataMap = toJavaObject(json, Map.class);
dataMap.put("fromUrl", url);
dataMap.putAll(mmid);
dataList.add(dataMap);
}
page++;
}else {
more = false;
}
int moreInt = dataJson.getIntValue("more");
if(moreInt==0) {
more = false;
}
}
} catch (Exception e) {
logger.info("数据采集出错 {}",e);
}
}
return dataList;
}
return Collections.emptyList();
}
} }
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.PearVideoByWordAnalysis; import com.zhiwei.parse.analysis.PearVideoByWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -26,24 +26,31 @@ public class PearVideo { ...@@ -26,24 +26,31 @@ public class PearVideo {
* @return * @return
*/ */
public static List<Map<String,Object>> getPearVideoData(String word,Proxy proxy) { public static List<Map<String,Object>> getPearVideoData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getPearVideoByWordHeaderMap(null); Map<String,String> headerMap = new HashMap<>();
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); headerMap.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
headerMap.put("Accept", "text/html, */*; q=0.01");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
headerMap.put(":authority", "www.pearvideo.com");
List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
headerMap.put("referer", "https://www.pearvideo.com/search.jsp?start=0&k="+URLEncoder.encode(word, "UTF-8"));
for(int i = 0; i <= 9000;i+=10) { for(int i = 0; i <= 9000;i+=10) {
String url = "http://www.pearvideo.com/search_loading.jsp?start="+i+"&k="+URLEncoder.encode(word, "UTF-8"); String url = "https://www.pearvideo.com/search_loading.jsp?start="+i+"&k="+URLEncoder.encode(word, "UTF-8") + "&sort=first_publish_time";
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> dataList1 = pearVideoByWordAnalysis.getPearVideoData(result); List<Map<String,Object>> dataList1 = pearVideoByWordAnalysis.getPearVideoData(result);
if(dataList1 != null && dataList1.size() > 0) { if(dataList1 != null && !dataList1.isEmpty()) {
dataList.addAll(dataList1); dataList.addAll(dataList1);
} }
System.out.println(i+"=========="+dataList.size()); System.out.println(i+"=========="+dataList.size());
ZhiWeiTools.sleep(4000); ZhiWeiTools.sleep(4000);
} }
return dataList; return dataList;
} catch (UnsupportedEncodingException e) { } catch (Exception e) {
logger.error("获取数据出错",e.getMessage()); logger.error("获取数据出错 {}",e);
e.printStackTrace(); return Collections.emptyList();
return null;
} }
} }
......
...@@ -11,16 +11,21 @@ import org.slf4j.LoggerFactory; ...@@ -11,16 +11,21 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.bean.QQkbUser; import com.zhiwei.bean.QQkbUser;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis; import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis; import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class QQKB { public class QQKB {
private static Logger logger = LoggerFactory.getLogger(QQKB.class); private static Logger logger = LoggerFactory.getLogger(QQKB.class);
private static QQKBAccountAnalysis qqAccountAnalysis = new QQKBAccountAnalysis(); private static QQKBAccountAnalysis qqAccountAnalysis = new QQKBAccountAnalysis();
private static QQKBCommentAnalysis qqkbCommentAnalysis = new QQKBCommentAnalysis(); private static QQKBCommentAnalysis qqkbCommentAnalysis = new QQKBCommentAnalysis();
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
...@@ -113,18 +118,22 @@ public class QQKB { ...@@ -113,18 +118,22 @@ public class QQKB {
Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id); Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id);
int i = 1; int i = 1;
while(true) { while(true) {
try {
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment",proxy, headerMap, paramMap);
paramMap.clear(); String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment",ProxyFactory.getNatProxy(), headerMap, paramMap);
List<Map<String,Object>> lists = qqkbCommentAnalysis.getCommentData(result,null,comment_id, article_id,proxy); paramMap.clear();
if(lists == null || lists.size() < 1) { List<Map<String,Object>> lists = qqkbCommentAnalysis.getCommentData(result,null,comment_id, article_id,proxy);
break; if(lists == null || lists.size() < 1) {
break;
}
dataList.addAll(lists);
paramMap = qqkbCommentAnalysis.getParamMap(result,i,comment_id,article_id);
i++;
ZhiWeiTools.sleep(300);
} catch (Exception e) {
e.printStackTrace();
} }
dataList.addAll(lists);
paramMap = qqkbCommentAnalysis.getParamMap(result,i,comment_id,article_id);
i++;
ZhiWeiTools.sleep(5000);
} }
return dataList; return dataList;
} catch (Exception e) { } catch (Exception e) {
......
...@@ -13,8 +13,6 @@ import java.util.regex.Pattern; ...@@ -13,8 +13,6 @@ import java.util.regex.Pattern;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -31,7 +29,6 @@ import okhttp3.Request; ...@@ -31,7 +29,6 @@ import okhttp3.Request;
public class QQKandian { public class QQKandian {
private static Logger logger = LoggerFactory.getLogger(QQKandian.class);
public List<QQKandianUser> getUser(String name,Proxy proxy) { public List<QQKandianUser> getUser(String name,Proxy proxy) {
if(name != null && name.length() > 0) { if(name != null && name.length() > 0) {
......
...@@ -3,7 +3,6 @@ package com.zhiwei.parse; ...@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
......
...@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException; ...@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
......
...@@ -68,6 +68,7 @@ public class SouBao { ...@@ -68,6 +68,7 @@ public class SouBao {
poi.exportExcel("D:\\crawlerdata\\搜报网-EA 品牌 关键词-06.11-06.12.xlsx", "sa", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\搜报网-EA 品牌 关键词-06.11-06.12.xlsx", "sa", headList, bodyList);
} }
@SuppressWarnings("unchecked")
public static Map<String,String> getdata() { public static Map<String,String> getdata() {
Map<String,String> map = new HashMap<String,String>(); Map<String,String> map = new HashMap<String,String>();
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
......
...@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory; ...@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.SouhuAccountAnalysis; import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
...@@ -144,8 +145,8 @@ public class Souhu { ...@@ -144,8 +145,8 @@ public class Souhu {
int j = 1; int j = 1;
try { try {
while(true) { while(true) {
String newurl = souhuCommentAnalysis.getSouhuURL(url,proxy) + "&page_no=" + j; String newurl = souhuCommentAnalysis.getSouhuURL(url,ProxyFactory.getNatProxy()) + "&page_no=" + j;
String result = HttpClient.executeHttpRequestGet(newurl,proxy,headerMap); String result = HttpClient.executeHttpRequestGet(newurl,ProxyFactory.getNatProxy(),headerMap);
System.out.println(newurl); System.out.println(newurl);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("jsonObject").getJSONArray("comments"); JSONArray jsonArry = json.getJSONObject("jsonObject").getJSONArray("comments");
...@@ -158,7 +159,7 @@ public class Souhu { ...@@ -158,7 +159,7 @@ public class Souhu {
dataList.add(map); dataList.add(map);
} }
j++; j++;
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(300);
} }
} catch (Exception e) { } catch (Exception e) {
......
...@@ -7,9 +7,6 @@ import java.util.ArrayList; ...@@ -7,9 +7,6 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis; import com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis;
...@@ -17,7 +14,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools; ...@@ -17,7 +14,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public class Toutiao { public class Toutiao {
private static Logger logger = LoggerFactory.getLogger(Toutiao.class);
private static ToutiaoKeyWordAnalysis toutiaoKeyWordAnalysis = new ToutiaoKeyWordAnalysis(); private static ToutiaoKeyWordAnalysis toutiaoKeyWordAnalysis = new ToutiaoKeyWordAnalysis();
......
...@@ -5,6 +5,8 @@ import java.io.UnsupportedEncodingException; ...@@ -5,6 +5,8 @@ import java.io.UnsupportedEncodingException;
import java.net.Proxy; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -12,13 +14,14 @@ import java.util.Map; ...@@ -12,13 +14,14 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis; import com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.Response;
public class Xueqiu { public class Xueqiu {
...@@ -60,8 +63,26 @@ public class Xueqiu { ...@@ -60,8 +63,26 @@ public class Xueqiu {
break; break;
} }
} }
return bodyList; return bodyList;
} }
public static Map<String,Object> getUrlData(String url,Proxy proxy) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = response.body().string();
String jsondata = result.split("window.SNOWMAN_STATUS = ")[1].split("window.SNOWMAN_TARGET")[0];
jsondata = jsondata.substring(0, jsondata.lastIndexOf(";"));
JSONObject json = JSONObject.parseObject(jsondata);
Map<String,Object> map = new HashMap<>();
map.put("like", json.getInteger("like_count"));
map.put("repostCount", json.getInteger("retweet_count"));
map.put("commentCount", json.getInteger("reply_count"));
return map;
} catch (Exception e) {
logger.error(" 雪球 数据转评赞获取失败 exception {} url = {}",e,url);
}
return Collections.emptyMap();
}
} }
...@@ -86,6 +86,7 @@ public class Yiche { ...@@ -86,6 +86,7 @@ public class Yiche {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
page++; page++;
} }
return bodyList;
} }
return Collections.emptyList(); return Collections.emptyList();
......
package com.zhiwei.parse;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.tools.tools.URLCodeUtil;
import okhttp3.Response;
public class Youku {
private static final Logger logger = LoggerFactory.getLogger(Youku.class);
private static HttpBoot httpBoot = new HttpBoot();
public static List<Map<String,Object>> getDataList(String word) {
String aaid = "9cae49f0e031664b00d8f9c108e586ab";
List<Map<String,Object>> list = new ArrayList<>();
for(int i = 1;i <= 20;i++) {
String url = "https://so.youku.com/search_video/q_"+URLCodeUtil.getURLEncode(word, "UTF-8")+"?spm=a2h0k.11417342.filter.dnew&orderfield=createtime&aaid="+aaid+"&pg="+i;
System.out.println(url);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), ProxyFactory.getNatProxy())){
String result = response.body().string();
String jsondata = result.split("bigview.view\\(")[1].split("\\)\\</script\\>")[0];
JSONObject json = JSONObject.parseObject(jsondata);
String docData = json.getString("html");
Document doc = Jsoup.parse(docData);
Elements elements = doc.select("div.sk-result-list").select("div.sk-mod");
for(Element element : elements) {
Map<String,Object> map = new HashMap<>();
String title = element.select("div.mod-main > div.mod-header > h2 > a").text();
String surl = element.select("div.mod-main > div.mod-header > h2 > a").attr("href");
String time = element.select("div.mod-main > div.mod-info > p").text();
if(time.contains("上传时间:")) {
map.put("title", title);
map.put("url", "https:"+surl);
map.put("time", time.replaceAll("上传时间:", "").split(" ")[0]);
map.put("uper",time.replace(time.split("上传者:")[0], ""));
list.add(map);
}
}
logger.info(" i = {} dataSize = {} ",i,list.size());
} catch (Exception e) {
logger.error(" Exception {} ",e);
}
}
return list;
}
}
...@@ -13,13 +13,19 @@ import org.jsoup.select.Elements; ...@@ -13,13 +13,19 @@ import org.jsoup.select.Elements;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class AiqiyiByWordAnalysis { public class AiqiyiByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(AiqiyiByWordAnalysis.class);
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
* @Description 解析出所有有用链接 * @Description 解析出所有有用链接
...@@ -45,9 +51,9 @@ public class AiqiyiByWordAnalysis { ...@@ -45,9 +51,9 @@ public class AiqiyiByWordAnalysis {
} }
public Map<String,Object> getAiqiyiData(String url,Map<String,String> headerMap,Proxy proxy) { public Map<String,Object> getAiqiyiData(String url,Map<String,String> headerMap,Proxy proxy) {
Map<String,Object> dataMap = new HashMap<String,Object>(); Map<String,Object> dataMap = new HashMap<>();
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); String result = response.body().string();
Document doc = Jsoup.parse(result); Document doc = Jsoup.parse(result);
String time = doc.select("#widget-vshort-ptime").text(); String time = doc.select("#widget-vshort-ptime").text();
if(!time.contains("2017")) { if(!time.contains("2017")) {
...@@ -68,7 +74,7 @@ public class AiqiyiByWordAnalysis { ...@@ -68,7 +74,7 @@ public class AiqiyiByWordAnalysis {
System.out.println(dataMap.toString()); System.out.println(dataMap.toString());
return dataMap; return dataMap;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析出错",e.getMessage()); logger.error("解析出错 {}",e);
return dataMap; return dataMap;
} }
} }
......
...@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory; ...@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class BaijiaAccountAnalysis { public class BaijiaAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(BaijiaAccountAnalysis.class); private static Logger logger = LoggerFactory.getLogger(BaijiaAccountAnalysis.class);
private static HttpBoot httpBoot = new HttpBoot();
public Map<String,Object> getBaijiaAccount2Data(JSONObject data) { public Map<String,Object> getBaijiaAccount2Data(JSONObject data) {
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<String,Object>();
...@@ -159,8 +163,8 @@ public class BaijiaAccountAnalysis { ...@@ -159,8 +163,8 @@ public class BaijiaAccountAnalysis {
public String getBaijiaContent(String url,Proxy proxy) { public String getBaijiaContent(String url,Proxy proxy) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null);
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); String result = response.body().string();
Document document = Jsoup.parse(result); Document document = Jsoup.parse(result);
return document.select("section.news-content").text(); return document.select("section.news-content").text();
} catch (Exception e) { } catch (Exception e) {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory; ...@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class DayuByWordAnalysis { public class DayuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class);
private static HttpBoot httpBoot = new HttpBoot();
public List<Map<String,Object>> getDayuByWordData(String result,Proxy proxy) { public List<Map<String,Object>> getDayuByWordData(String result,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
...@@ -28,7 +32,7 @@ public class DayuByWordAnalysis { ...@@ -28,7 +32,7 @@ public class DayuByWordAnalysis {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("iflowItems"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("iflowItems");
for(int i = 0;i < jsonArry.size();i++) { for(int i = 0;i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
JSONObject data = jsonArry.getJSONObject(i); JSONObject data = jsonArry.getJSONObject(i);
map.put("title", data.getString("title").replaceAll("<.*?>", "")); map.put("title", data.getString("title").replaceAll("<.*?>", ""));
String url = data.getString("zzd_url"); String url = data.getString("zzd_url");
...@@ -42,7 +46,7 @@ public class DayuByWordAnalysis { ...@@ -42,7 +46,7 @@ public class DayuByWordAnalysis {
} }
return dataList; return dataList;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析出错",e.getMessage()); logger.error("解析出错 {}",e);
return dataList; return dataList;
} }
...@@ -51,22 +55,19 @@ public class DayuByWordAnalysis { ...@@ -51,22 +55,19 @@ public class DayuByWordAnalysis {
public String getContent(String url,Proxy proxy) { public String getContent(String url,Proxy proxy) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
Pattern pat = Pattern.compile("xissJsonData = (.*);"); String result = response.body().string();
Matcher matcher = pat.matcher(result); Pattern pat = Pattern.compile("xissJsonData = (.*);");
try { Matcher matcher = pat.matcher(result);
if(matcher.find()) { if(matcher.find()) {
String s = matcher.group(0); String s = matcher.group(0);
JSONObject json = JSONObject.parseObject(s.substring(15, s.length() - 1)); JSONObject json = JSONObject.parseObject(s.substring(15, s.length() - 1));
String content = json.getString("content").replaceAll("<.*?>", ""); return json.getString("content").replaceAll("<.*?>", "");
return content; }
}
return null;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析文本出错",e.getMessage()); e.printStackTrace();
System.out.println(result);
return null;
} }
return null;
} }
} }
...@@ -10,12 +10,8 @@ import java.util.Map; ...@@ -10,12 +10,8 @@ import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class DayuCommentAnalysis { public class DayuCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(DayuCommentAnalysis.class);
...@@ -33,7 +29,7 @@ public class DayuCommentAnalysis { ...@@ -33,7 +29,7 @@ public class DayuCommentAnalysis {
JSONObject json = JSONObject.parseObject(result).getJSONObject("data").getJSONObject("comments_map"); JSONObject json = JSONObject.parseObject(result).getJSONObject("data").getJSONObject("comments_map");
Map<String,Object> map = (Map<String,Object>)json; Map<String,Object> map = (Map<String,Object>)json;
for(Map.Entry<String, Object> entry : map.entrySet() ) { for(Map.Entry<String, Object> entry : map.entrySet() ) {
Map<String,Object> dataMap = new HashMap<String, Object>(); Map<String,Object> dataMap = new HashMap<>();
JSONObject data = JSONObject.parseObject(entry.getValue().toString()); JSONObject data = JSONObject.parseObject(entry.getValue().toString());
dataMap.put("content", data.getString("content")); dataMap.put("content", data.getString("content"));
dataMap.put("nickname", data.getJSONObject("user").getString("nickname")); dataMap.put("nickname", data.getJSONObject("user").getString("nickname"));
...@@ -45,78 +41,78 @@ public class DayuCommentAnalysis { ...@@ -45,78 +41,78 @@ public class DayuCommentAnalysis {
dataMap.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss")); dataMap.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss"));
int i = data.getInteger("reply_cnt"); int i = data.getInteger("reply_cnt");
dataMap.put("replay_count", i); dataMap.put("replay_count", i);
if(i > 0) { // if(i > 0) {
dataList.addAll(getReplayData(id,articleId,proxy)); // dataList.addAll(getReplayData(id,articleId,proxy));
} // }
dataList.add(dataMap); dataList.add(dataMap);
} }
return dataList; return dataList;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析出错",e.getMessage()); logger.error("解析出错 {}",e);
return dataList; return dataList;
} }
} }
/** // /**
* // *
* @Description 解析 // * @Description 解析
* @param id // * @param id
* @param articleId // * @param articleId
* @return // * @return
*/ // */
private List<Map<String,Object>> getReplayData(String id,String articleId,Proxy proxy) { // private List<Map<String,Object>> getReplayData(String id,String articleId,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); // Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/detail/"+id+"/comments?articleId="+articleId+"&count=10&ts="; // String url = "http://m.uczzd.cn/iflow/api/v2/cmt/detail/"+id+"/comments?articleId="+articleId+"&count=10&ts=";
String result = HttpClient.executeHttpRequestGet(url+"-1",proxy, headerMap); // String result = HttpClient.executeHttpRequestGet(url+"-1",proxy, headerMap);
List<Map<String,Object>> data = new ArrayList<Map<String,Object>>(); // List<Map<String,Object>> data = new ArrayList<Map<String,Object>>();
List<String> timeList = new ArrayList<String>(); // List<String> timeList = new ArrayList<String>();
while(true) { // while(true) {
ZhiWeiTools.sleep(2000); // ZhiWeiTools.sleep(2000);
long time = analysisReplayData(result,data); // long time = analysisReplayData(result,data);
if(timeList.contains(String.valueOf(time))){ // if(timeList.contains(String.valueOf(time))){
break; // break;
} // }
timeList.add(String.valueOf(time)); // timeList.add(String.valueOf(time));
if(time == 0) { // if(time == 0) {
break; // break;
} // }
result = HttpClient.executeHttpRequestGet(url+time,proxy, headerMap); // result = HttpClient.executeHttpRequestGet(url+time,proxy, headerMap);
} // }
System.out.println("=====================评论下回复获取数=="+data.size()); // System.out.println("=====================评论下回复获取数=="+data.size());
return data; // return data;
} // }
/** // /**
* // *
* @Description 解析 // * @Description 解析
* @param result // * @param result
* @param dataList // * @param dataList
* @return // * @return
*/ // */
private long analysisReplayData(String result,List<Map<String,Object>> dataList) { // private long analysisReplayData(String result,List<Map<String,Object>> dataList) {
long time = 0; // long time = 0;
try { // try {
JSONObject json = JSONObject.parseObject(result); // JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("replies"); // JSONArray jsonArry = json.getJSONObject("data").getJSONArray("replies");
for(int i = 0; i < jsonArry.size();i++) { // for(int i = 0; i < jsonArry.size();i++) {
Map<String,Object> map = new HashMap<String, Object>(); // Map<String,Object> map = new HashMap<String, Object>();
JSONObject data = jsonArry.getJSONObject(i); // JSONObject data = jsonArry.getJSONObject(i);
map.put("content", data.getString("content")); // map.put("content", data.getString("content"));
map.put("nickname", data.getString("nickname")); // map.put("nickname", data.getString("nickname"));
map.put("like", data.getString("up_cnt")); // map.put("like", data.getString("up_cnt"));
map.put("id", data.getString("commentId")); // map.put("id", data.getString("commentId"));
map.put("url", data.getString("shareUrl")); // map.put("url", data.getString("shareUrl"));
time = data.getLong("timeShow"); // time = data.getLong("timeShow");
map.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss")); // map.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss"));
map.put("replay_count", data.getInteger("replyCnt")); // map.put("replay_count", data.getInteger("replyCnt"));
dataList.add(map); // dataList.add(map);
} // }
return time; // return time;
} catch (Exception e) { // } catch (Exception e) {
logger.error("获取大鱼号评论出错--回复的",e.getMessage()); // logger.error("获取大鱼号评论出错--回复的",e.getMessage());
return 0; // return 0;
} // }
} // }
......
...@@ -10,8 +10,6 @@ import java.util.Map; ...@@ -10,8 +10,6 @@ import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import javax.swing.plaf.synth.SynthSpinnerUI;
import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.lang3.math.NumberUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
......
...@@ -6,16 +6,12 @@ import java.util.HashMap; ...@@ -6,16 +6,12 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class DouyinHotDataAnalysis { public class DouyinHotDataAnalysis {
private static Logger logger = LoggerFactory.getLogger(DouyinHotDataAnalysis.class);
public Map<String,Object> getData(String result) { public Map<String,Object> getData(String result) {
try { try {
......
...@@ -11,12 +11,17 @@ import org.slf4j.LoggerFactory; ...@@ -11,12 +11,17 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class FenghuangAccountAnalysis { public class FenghuangAccountAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangAccountAnalysis.class); private static Logger logger = LoggerFactory.getLogger(FenghuangAccountAnalysis.class);
private static HttpBoot httpBoot = new HttpBoot();
/** /**
* *
...@@ -31,8 +36,8 @@ public class FenghuangAccountAnalysis { ...@@ -31,8 +36,8 @@ public class FenghuangAccountAnalysis {
Map<String,String> headerMap = HeadGet.getFenghuangAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getFenghuangAccountHeaderMap(null);
JSONArray jsonArry = null; JSONArray jsonArry = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy)){
String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); String result = response.body().string();
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
jsonArry = json.getJSONObject("data").getJSONObject("feeds").getJSONArray("list"); jsonArry = json.getJSONObject("data").getJSONObject("feeds").getJSONArray("list");
if(jsonArry == null || jsonArry.size() < 1) { if(jsonArry == null || jsonArry.size() < 1) {
...@@ -83,7 +88,7 @@ public class FenghuangAccountAnalysis { ...@@ -83,7 +88,7 @@ public class FenghuangAccountAnalysis {
map.put("url", json.getString("shareurl")); map.put("url", json.getString("shareurl"));
map.put("id", json.getString("aid")); map.put("id", json.getString("aid"));
} catch (Exception e) { } catch (Exception e) {
logger.error("解析具体文章的时候出错",e.getMessage()); logger.error("解析具体文章的时候出错 {}",e);
return null; return null;
} }
return map; return map;
......
...@@ -13,17 +13,22 @@ import org.slf4j.LoggerFactory; ...@@ -13,17 +13,22 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import okhttp3.Response;
public class FenghuangCommentAnalysis { public class FenghuangCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class);
private static HttpBoot httpBoot = new HttpBoot();
public Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) { public Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) {
Map<String,Object> map = new HashMap<>(); Map<String,Object> map = new HashMap<>();
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = HttpClient.executeHttpRequestGet(url,proxy, null); String result = response.body().string();
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
map.put("real_count", json.getInteger("count")); map.put("real_count", json.getInteger("count"));
map.put("comment_num", json.getInteger("join_count")); map.put("comment_num", json.getInteger("join_count"));
...@@ -44,8 +49,8 @@ public class FenghuangCommentAnalysis { ...@@ -44,8 +49,8 @@ public class FenghuangCommentAnalysis {
public String getdocUrl(String url,Proxy proxy) { public String getdocUrl(String url,Proxy proxy) {
String docUrl = null; String docUrl = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
String result = HttpClient.executeHttpRequestGet(url,proxy, null); String result = response.body().string();
if(result.contains("commentUrl\": \"")) { if(result.contains("commentUrl\": \"")) {
docUrl = result.split("commentUrl\": \"")[1].split("\",")[0]; docUrl = result.split("commentUrl\": \"")[1].split("\",")[0];
break; break;
...@@ -76,24 +81,18 @@ public class FenghuangCommentAnalysis { ...@@ -76,24 +81,18 @@ public class FenghuangCommentAnalysis {
*/ */
public List<Map<String,Object>> getData(String url,Proxy proxy) { public List<Map<String,Object>> getData(String url,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
String result; try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy)){
try { String result = response.body().string();
result = HttpClient.executeHttpRequestGet(url,proxy, headerMap); JSONObject json = JSONObject.parseObject(result);
} catch (Exception e) { JSONArray jsonArry = json.getJSONArray("data");
logger.error("链接获取信息失败",e.getMessage());
return null;
}
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("data");
try {
for(int i = 0;i < jsonArry.size(); i ++) { for(int i = 0;i < jsonArry.size(); i ++) {
Map<String,Object> map = getcommentData(jsonArry.getJSONObject(i)); Map<String,Object> map = getcommentData(jsonArry.getJSONObject(i));
dataList.add(map); dataList.add(map);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("获取信息出错",e.getMessage()); logger.error("链接获取信息失败",e);
return null; return Collections.emptyList();
} }
return dataList; return dataList;
...@@ -109,22 +108,16 @@ public class FenghuangCommentAnalysis { ...@@ -109,22 +108,16 @@ public class FenghuangCommentAnalysis {
*/ */
public List<Map<String,Object>> getData2(String url,Proxy proxy) { public List<Map<String,Object>> getData2(String url,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
String result; try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url), proxy)){
try { String result = response.body().string();
result = HttpClient.executeHttpRequestGet(url,proxy, null); JSONObject json = JSONObject.parseObject(result);
} catch (Exception e) { JSONArray jsonArry = json.getJSONArray("comments");
logger.error("链接获取信息失败 {}",e);
return Collections.emptyList();
}
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("comments");
try {
for(int i = 0;i < jsonArry.size(); i ++) { for(int i = 0;i < jsonArry.size(); i ++) {
Map<String,Object> map = getcommentData2(jsonArry.getJSONObject(i)); Map<String,Object> map = getcommentData2(jsonArry.getJSONObject(i));
dataList.add(map); dataList.add(map);
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("获取信息出错 {}",e); logger.error("链接获取信息失败 {}",e);
return Collections.emptyList(); return Collections.emptyList();
} }
return dataList; return dataList;
...@@ -154,7 +147,7 @@ public class FenghuangCommentAnalysis { ...@@ -154,7 +147,7 @@ public class FenghuangCommentAnalysis {
* @return * @return
*/ */
private Map<String,Object> getcommentData(JSONObject json) { private Map<String,Object> getcommentData(JSONObject json) {
Map<String,Object> map = new HashMap<String, Object>(); Map<String,Object> map = new HashMap<>();
try { try {
JSONObject data = json.getJSONObject("data"); JSONObject data = json.getJSONObject("data");
map.put("nickname", json.getString("nickname")); map.put("nickname", json.getString("nickname"));
...@@ -169,7 +162,7 @@ public class FenghuangCommentAnalysis { ...@@ -169,7 +162,7 @@ public class FenghuangCommentAnalysis {
long time = data.getLong("add_time") * 1000; long time = data.getLong("add_time") * 1000;
map.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss"));
} catch (Exception e) { } catch (Exception e) {
logger.error("具体解析一条数据出错",e.getMessage()); logger.error("具体解析一条数据出错 {}",e);
return null; return null;
} }
return map; return map;
......
...@@ -5,15 +5,11 @@ import java.util.HashMap; ...@@ -5,15 +5,11 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
public class MaimaiBywordAnalysis { public class MaimaiBywordAnalysis {
private static Logger logger = LoggerFactory.getLogger(MaimaiBywordAnalysis.class);
public Map<String,Object> getData(String result,String time) { public Map<String,Object> getData(String result,String time) {
Map<String,Object> map1 = new HashMap<String,Object>(); Map<String,Object> map1 = new HashMap<String,Object>();
...@@ -38,7 +34,7 @@ public class MaimaiBywordAnalysis { ...@@ -38,7 +34,7 @@ public class MaimaiBywordAnalysis {
map.put("like", data.getJSONObject("feed").getInteger("likes")); map.put("like", data.getJSONObject("feed").getInteger("likes"));
map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt")); map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数 map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数
System.out.println(map.toString()); // System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
map1.put("data", dataList); map1.put("data", dataList);
...@@ -69,7 +65,7 @@ public class MaimaiBywordAnalysis { ...@@ -69,7 +65,7 @@ public class MaimaiBywordAnalysis {
map.put("like", data.getJSONObject("gossip").getInteger("likes")); map.put("like", data.getJSONObject("gossip").getInteger("likes"));
map.put("comment_count", data.getJSONObject("gossip").getInteger("total_cnt")); map.put("comment_count", data.getJSONObject("gossip").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("gossip").getInteger("search_order")); //传播数 map.put("spreads", data.getJSONObject("gossip").getInteger("search_order")); //传播数
System.out.println(map.toString()); // System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
map1.put("data", dataList); map1.put("data", dataList);
......
...@@ -19,7 +19,6 @@ import com.zhiwei.util.TimeUtil; ...@@ -19,7 +19,6 @@ import com.zhiwei.util.TimeUtil;
public class MeipaiByWordAnalysis { public class MeipaiByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(MeipaiByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(MeipaiByWordAnalysis.class);
/** /**
* *
* @Description 解析此页 * @Description 解析此页
......
...@@ -19,7 +19,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools; ...@@ -19,7 +19,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public class QQKBCommentAnalysis { public class QQKBCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQKBCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(QQKBCommentAnalysis.class);
/** /**
* *
...@@ -37,10 +36,9 @@ public class QQKBCommentAnalysis { ...@@ -37,10 +36,9 @@ public class QQKBCommentAnalysis {
JSONObject data = jsonArry.getJSONArray(jsonArry.size()-1).getJSONObject(0); JSONObject data = jsonArry.getJSONArray(jsonArry.size()-1).getJSONObject(0);
String coral_scorem = data.getString("coral_score"); String coral_scorem = data.getString("coral_score");
String reply_id = data.getString("reply_id"); String reply_id = data.getString("reply_id");
Map<String,Object> paMap = HeadGet.getQQKBCommentParamMap2(comment_id, page, coral_scorem, article_id, reply_id); return HeadGet.getQQKBCommentParamMap2(comment_id, page, coral_scorem, article_id, reply_id);
return paMap;
} catch (Exception e) { } catch (Exception e) {
logger.error("构造post请求信息失败",e.getMessage()); logger.error("构造post请求信息失败 {}",e);
return null; return null;
} }
} }
...@@ -52,13 +50,13 @@ public class QQKBCommentAnalysis { ...@@ -52,13 +50,13 @@ public class QQKBCommentAnalysis {
* @return * @return
*/ */
public List<Map<String,Object>> getCommentData(String result,String cookie,String comment_id, String article_id,Proxy proxy) { public List<Map<String,Object>> getCommentData(String result,String cookie,String comment_id, String article_id,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("comments").getJSONArray("hot"); JSONArray jsonArry = json.getJSONObject("comments").getJSONArray("hot");
for(int i = 0; i < jsonArry.size() ;i++) { for(int i = 0; i < jsonArry.size() ;i++) {
JSONObject data = jsonArry.getJSONArray(i).getJSONObject(0); JSONObject data = jsonArry.getJSONArray(i).getJSONObject(0);
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
map.put("content", data.getString("reply_content")); map.put("content", data.getString("reply_content"));
map.put("time", TimeParse.dateFormartString(new Date(Long.valueOf(data.getString("tipstime")) * 1000L), "yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(new Date(Long.valueOf(data.getString("tipstime")) * 1000L), "yyyy-MM-dd HH:mm:ss"));
map.put("name", data.getString("nick")); map.put("name", data.getString("nick"));
...@@ -66,12 +64,11 @@ public class QQKBCommentAnalysis { ...@@ -66,12 +64,11 @@ public class QQKBCommentAnalysis {
int replay_num = 0; int replay_num = 0;
String reply_id = data.getString("reply_id"); String reply_id = data.getString("reply_id");
if(data.toString().contains("reply_num")) { if(data.toString().contains("reply_num")) {
replay_num = data.getInteger("reply_num"); // replay_num = data.getInteger("reply_num");
List<Map<String,Object>> lists = getReplyCommentData(cookie,reply_id,comment_id, article_id,proxy); // List<Map<String,Object>> lists = getReplyCommentData(cookie,reply_id,comment_id, article_id,proxy);
if(lists != null && lists.size() > 0) { // if(lists != null && lists.size() > 0) {
dataList.addAll(lists); // dataList.addAll(lists);
} // }
map.put("reply_num", replay_num);
} }
map.put("reply_id", reply_id); map.put("reply_id", reply_id);
map.put("reply_num", replay_num); map.put("reply_num", replay_num);
...@@ -80,7 +77,7 @@ public class QQKBCommentAnalysis { ...@@ -80,7 +77,7 @@ public class QQKBCommentAnalysis {
} }
return dataList; return dataList;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析数据出错",e.getMessage()); logger.error("解析数据出错 {}",e);
return dataList; return dataList;
} }
...@@ -93,7 +90,7 @@ public class QQKBCommentAnalysis { ...@@ -93,7 +90,7 @@ public class QQKBCommentAnalysis {
* @return * @return
*/ */
public Map<String,Object> getOneReplyComment(JSONObject data) { public Map<String,Object> getOneReplyComment(JSONObject data) {
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
try { try {
map.put("content", data.getString("reply_content")); map.put("content", data.getString("reply_content"));
map.put("time", TimeParse.dateFormartString(new Date(Long.valueOf(data.getString("tipstime")) * 1000L), "yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(new Date(Long.valueOf(data.getString("tipstime")) * 1000L), "yyyy-MM-dd HH:mm:ss"));
...@@ -103,13 +100,13 @@ public class QQKBCommentAnalysis { ...@@ -103,13 +100,13 @@ public class QQKBCommentAnalysis {
System.out.println(map.toString()); System.out.println(map.toString());
return map; return map;
} catch (Exception e) { } catch (Exception e) {
logger.error("获取单个回复评论出错",e.getMessage()); logger.error("获取单个回复评论出错 {}",e);
return null; return null;
} }
} }
public List<Map<String,Object>> getReplyCommentData(String cookie,String reply_id,String comment_id, String article_id,Proxy proxy) { public List<Map<String,Object>> getReplyCommentData(String cookie,String reply_id,String comment_id, String article_id,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie);
try { try {
String old_reply_id = ""; String old_reply_id = "";
......
...@@ -6,16 +6,12 @@ import java.util.HashMap; ...@@ -6,16 +6,12 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
public class QicheHomeKwyWordAnalysis { public class QicheHomeKwyWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(QicheHomeKwyWordAnalysis.class);
public List<Map<String,Object>> getData(String result) { public List<Map<String,Object>> getData(String result) {
try { try {
......
...@@ -21,15 +21,16 @@ public class WangyiHistoryAnalysis { ...@@ -21,15 +21,16 @@ public class WangyiHistoryAnalysis {
private static Logger logger = LoggerFactory.getLogger(WangyiHistoryAnalysis.class); private static Logger logger = LoggerFactory.getLogger(WangyiHistoryAnalysis.class);
public List<Map<String,Object>> getData(String result,Proxy proxy,String endTime,String source) { public List<Map<String,Object>> getData(String result,Proxy proxy,String endTime,String source) {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("list"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("list");
Map<String, String> headerMap = HeadGet.getWangyiHistoryHeaderMap(null); Map<String, String> headerMap = HeadGet.getWangyiHistoryHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
for(int i = 0;i < jsonArry.size();i++) { for(int i = 0;i < jsonArry.size();i++) {
try { try {
JSONObject data = jsonArry.getJSONObject(i); JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
map.put("title", data.getString("title")); map.put("title", data.getString("title"));
if(endTime != null && endTime.length() > 1) { if(endTime != null && endTime.length() > 1) {
if(data.getString("ptime").compareTo(endTime) <= 0) { if(data.getString("ptime").compareTo(endTime) <= 0) {
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
log4j.appender.stdout.layout.ConversionPattern=<%d>[%5p] %c - %m%n log4j.appender.stdout.layout.ConversionPattern=<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE=org.apache.log4j.DailyRollingFileAppender log4j.appender.ROLLING_FILE=org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE.Threshold=stdout log4j.appender.ROLLING_FILE.Threshold=stdout
log4j.appender.ROLLING_FILE.File=./Log/wechatcrawler.log log4j.appender.ROLLING_FILE.File=./Log/artivleData.log
log4j.appender.ROLLING_FILE.Append=true log4j.appender.ROLLING_FILE.Append=true
log4j.appender.ROLLING_FILE.layout=org.apache.log4j.PatternLayout log4j.appender.ROLLING_FILE.layout=org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout.ConversionPattern=<%d>[%5p] %c - %m%n log4j.appender.ROLLING_FILE.layout.ConversionPattern=<%d>[%5p] %c - %m%n
\ No newline at end of file
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Chejia;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class ChejiaCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("D://crawlerdata//自媒体/车家号.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + "";
//// url = "https://chejiahao.autohome.com.cn/info/3073188#reply";
// System.out.println(url);
// Chejia.getChejiaComment(url, ProxyFactory.getNatProxy());
//// int i = Chejia.getChejiaCommentCount(url, ProxyFactory.getNatProxy());
//// System.out.println(i);
//// map1.put("count", i);
// ZhiWeiTools.sleep(100);
// }
// headList.add("count");
// poi.exportExcel("D://crawlerdata//自媒体/车家号.xlsx", "评论采集", headList,
// list);
//
// }
//}
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Maimai;
//import com.zhiwei.parse.Yiche;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class MaimaiCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + "";
// Map<String,Object> map3 = Maimai.getMaiaiCount(url, ProxyFactory.getNatProxy());
// map1.putAll(map3);
// ZhiWeiTools.sleep(100);
// }
// headList.add("like");
// headList.add("spreads");
// headList.add("cmts");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", "评论采集", headList,
// list);
// }
//}
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//import java.util.Objects;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Xueqiu;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class XueqiuCommentCountTest {
// @Test
// public void f() {
//
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\雪球-腾讯.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// for(int i = 1;i < 5;i++) {
// String url = map1.get("地址") + "";
// Map<String,Object> map3 = Xueqiu.getUrlData(url, ProxyFactory.getNatProxy());
// ZhiWeiTools.sleep(100);
// if(Objects.nonNull(map3)) {
// System.out.println(map3.toString());
// map1.putAll(map3);
// break;
// }
// }
// }
// headList.add("like");
// headList.add("repostCount");
// headList.add("commentCount");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\雪球-腾讯.xlsx", "评论数采集", headList,
// list);
//
// }
//}
package com.zhiwei.crawler; //package com.zhiwei.crawler;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Aiqiyi; //import com.zhiwei.parse.Aiqiyi;
//
public class AiqiyiByWordExample { //public class AiqiyiByWordExample {
//
//
@Test // @Test
public void aiqiyiByWordTest() { // public void aiqiyiByWordTest() {
String word = "美食,味道,菜"; // String word = "美食,味道,菜";
String[] words = word.split(","); // String[] words = word.split(",");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); // List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { // for(String w : words) {
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null); // List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
if(dataList != null && dataList.size() >= 1) { // if(dataList != null && dataList.size() >= 1) {
bodyList.addAll(dataList); // bodyList.addAll(dataList);
} // }
} // }
List<String> headList = new ArrayList<String>(); // List<String> headList = new ArrayList<String>();
headList.add("count"); // headList.add("count");
headList.add("time"); // headList.add("time");
headList.add("source"); // headList.add("source");
headList.add("content"); // headList.add("content");
headList.add("url"); // headList.add("url");
headList.add("title"); // headList.add("title");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList); // poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList);
//
//
//
} // }
//
//
//
} //}
package com.zhiwei.crawler; //package com.zhiwei.crawler;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Baijia; //import com.zhiwei.parse.Baijia;
import com.zhiwei.tools.tools.ZhiWeiTools; //import com.zhiwei.tools.tools.ZhiWeiTools;
//
public class BaijiaAccountExample { //public class BaijiaAccountExample {
//
//// @Test
// public void baijiaAccountTest() {
// String app_id = "1536766276004443";
// String startTime = "2015-01-01 00:00:00";
// //2017-11-30 17:48:17
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime,null);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// List<String> headList = new ArrayList<String>();
// headList.add("title");
// headList.add("time");
// headList.add("read_amount");
// headList.add("app_id");
// headList.add("source");
// headList.add("url");
// headList.add("content");
// poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists);
// }
//
//// @Test
// public void baijiaAccount2Test() {
// String app_id = "b_1548519002063358";
// String startTime = "2018-01-01 00:00:00";
// //2017-11-30 17:48:17
// List<String> idList = new ArrayList<>();
// idList.add("b_1548519002063358");
// idList.add("b_1536766292852334");
// idList.add("b_1536766781763274");
// idList.add("b_1536766200338498");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String id : idList) {
// ZhiWeiTools.sleep(5000);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(id,startTime,null);
// bodyList.addAll(lists);
// }
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// List<String> headList = new ArrayList<String>();
// headList.add("title");
// headList.add("time");
// headList.add("source");
// headList.add("url");
// headList.add("content");
// poi.exportExcel("D://crawlerdata//自媒体/百家号-all.xlsx", "科学的fan", headList, bodyList);
// }
//
// @Test // @Test
public void baijiaAccountTest() { // public void test3() {
String app_id = "1536766276004443"; // String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
String startTime = "2015-01-01 00:00:00"; // PoiExcelUtil poi = PoiExcelUtil.getInstance();
//2017-11-30 17:48:17 // String startTime = "2018-05-01 00:00:00";
List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime,null); // Map<String,Object> map = poi.importExcel(path, 0);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<String> headList = new ArrayList<String>(); // List<Map<String,Object>> bodyList = new ArrayList<>();
headList.add("title"); // for(Map<String,Object> m : list) {
headList.add("time"); // try {
headList.add("read_amount"); // String app_id = m.get("id").toString();
headList.add("app_id"); // app_id = "1594158489045754";
headList.add("source"); // String name = m.get("name").toString();
headList.add("url"); // String cookie = "__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5";
headList.add("content"); // List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists); // if(lists != null) {
} // bodyList.addAll(lists);
// }
// @Test // } catch (Exception e) {
public void baijiaAccount2Test() { // }
String app_id = "b_1548519002063358"; // }
String startTime = "2018-01-01 00:00:00"; // List<String> headList = new ArrayList<String>();
//2017-11-30 17:48:17 // headList.add("title");
List<String> idList = new ArrayList<>(); // headList.add("time");
idList.add("b_1548519002063358"); // headList.add("source");
idList.add("b_1536766292852334"); // headList.add("url");
idList.add("b_1536766781763274"); // headList.add("content");
idList.add("b_1536766200338498"); // headList.add("read_amount");
List<Map<String,Object>> bodyList = new ArrayList<>(); // poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
for(String id : idList) { // }
ZhiWeiTools.sleep(5000); //
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(id,startTime,null); //}
bodyList.addAll(lists);
}
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("source");
headList.add("url");
headList.add("content");
poi.exportExcel("D://crawlerdata//自媒体/百家号-all.xlsx", "科学的fan", headList, bodyList);
}
@Test
public void test3() {
String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance();
String startTime = "2018-05-01 00:00:00";
Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<>();
for(Map<String,Object> m : list) {
try {
String app_id = m.get("id").toString();
app_id = "1594158489045754";
String name = m.get("name").toString();
String cookie = "__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5";
List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
if(lists != null) {
bodyList.addAll(lists);
}
} catch (Exception e) {
}
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("source");
headList.add("url");
headList.add("content");
headList.add("read_amount");
poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
}
}
package com.zhiwei.crawler; //package com.zhiwei.crawler;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Dayu; //import com.zhiwei.parse.Dayu;
//
public class DayuAccountExample { //public class DayuAccountExample {
//
//
@Test // @Test
public void dayuAccountTest() { // public void dayuAccountTest() {
//https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true // //https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true
//
//
// String mid = "d7300311c1504d24a229c3da345785c6"; //// String mid = "d7300311c1504d24a229c3da345785c6";
// String name = "大鱼海棠雨"; //// String name = "大鱼海棠雨";
String startTime = "2017-01-01 00:00:00"; // String startTime = "2017-01-01 00:00:00";
String path = "D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx"; // String path = "D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx";
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel(path, 0); // Map<String,Object> map = poi.importExcel(path, 0);
List<Map<String,Object>> lists = (List<Map<String, Object>>) map.get("body"); // List<Map<String,Object>> lists = (List<Map<String, Object>>) map.get("body");
List<String> headList = new ArrayList<String>(); // List<String> headList = new ArrayList<String>();
headList.add("title"); // headList.add("title");
headList.add("time"); // headList.add("time");
headList.add("content"); // headList.add("content");
headList.add("source"); // headList.add("source");
headList.add("url"); // headList.add("url");
// headList.add("content_id"); //// headList.add("content_id");
// headList.add("origin_id"); //// headList.add("origin_id");
// headList.add("xss_item_id"); //// headList.add("xss_item_id");
for(Map<String,Object> data : lists) { // for(Map<String,Object> data : lists) {
String mid = data.get("mid")+""; // String mid = data.get("mid")+"";
String name = data.get("name")+""; // String name = data.get("name")+"";
if(mid.length() < 1 && name.length() < 1) { // if(mid.length() < 1 && name.length() < 1) {
continue; // continue;
} // }
List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null,null); // List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null,null);
poi.exportExcel(path, name, headList, dataList); // poi.exportExcel(path, name, headList, dataList);
} // }
//
//
} // }
//
//
} //}
package com.zhiwei.crawler; //package com.zhiwei.crawler;
//
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.junit.Test; //import org.junit.Test;
//
import com.zhiwei.parse.Dayu; //import com.zhiwei.parse.Dayu;
//
public class DayuByWordExample { //public class DayuByWordExample {
//
//
@Test // @Test
public void dayuByWordTest() { // public void dayuByWordTest() {
String word = "11"; // String word = "11";
//
List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null); // List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null);
//
System.out.println(dataList.size()); // System.out.println(dataList.size());
//
//
} // }
//
//
} //}
...@@ -10,15 +10,18 @@ import com.zhiwei.parse.Maimai; ...@@ -10,15 +10,18 @@ import com.zhiwei.parse.Maimai;
public class MaimaiBywordExample { public class MaimaiBywordExample {
public static void main(String[] args) { public static void main(String[] args) {
String word = "美团 晋升"; String word = "美团|某团|MT|大众点评|新美大|美团点评";
String cookie = "sessionid=y87knknqrc3fi6xto2zv0s4kugmleepk; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4fHhwcGhgbVhwZBB0ZHwVDWEtMS3kKGhobBB0THhkEGgQTHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1539933372113; token=\"ZTjnEij9jsL4ZCdnKF2CaUAwcJHgcem/zHvAbXp3MXdY+uSPva8scjbe2zHl2gE98CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiSFVMLVhKb2g5TkJGNHRJanljUW5Qa1V5IiwiX2V4cGlyZSI6MTU0MDAxOTc5MTUwNSwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=dJmy52LHX-stqroAbm66u2zJaZA"; String cookie = "guid=GxsfBBgZGwQYGx4EGBkeVhsfGB4aHBpWHBkEHRkfBUNYS0xLeQoSEwQSHR8ZBBoEGx0FT0dFWEJpCgNFQUlPbQpPQUNGCgZmZ35iYQIKHBkEHRkfBV5DYUhPfU9GWlprCgMeHH1lfQoRGQQcCn5kClldRU5EQ30CChoEHwVLRkZDUEVn; token=\"7IGuqjEwgJ2gXX5PZ0UYSxvn81Aws6v5OFrwpSErsbctlSd1e/7+AzYEMMMeeFJJ8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; _buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiOGtDSnF6VG5QcFk0R3ZmVFB4MThIMW1ZIiwiX2V4cGlyZSI6MTU0ODMwODU0MTMyNCwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=cnQ0i1LwYxhjO3_BvQ4Coh0f9PQ";
String time = "2018-10-15 00:00:00"; String time = "2019-01-17 00:00:00";
String[] words = word.split("\\|"); String[] words = word.split("\\|");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null); //实名动态
// List<Map<String,Object>> c = Maimai.getDataByNoName(w, cookie, time, null); // List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
bodyList.addAll(c); //职言交流
List<Map<String,Object>> c2 = Maimai.getDataByNoName(w, cookie, time, null);
// bodyList.addAll(c);
bodyList.addAll(c2);
} }
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("time"); headList.add("time");
...@@ -29,7 +32,7 @@ public class MaimaiBywordExample { ...@@ -29,7 +32,7 @@ public class MaimaiBywordExample {
headList.add("comment_count"); headList.add("comment_count");
headList.add("spreads"); headList.add("spreads");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团 晋升-1015.xlsx", "脉脉关键词", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-0123.xlsx", "脉脉关键词", headList, bodyList);
} }
} }
...@@ -13,7 +13,7 @@ public class PearVideoByWordExample { ...@@ -13,7 +13,7 @@ public class PearVideoByWordExample {
@Test @Test
public void pearVideoByWordTest() { public void pearVideoByWordTest() {
String word = "美食"; String word = "大宝 甲醛";
List<Map<String,Object>> bodyList = PearVideo.getPearVideoData(word,null); List<Map<String,Object>> bodyList = PearVideo.getPearVideoData(word,null);
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
......
...@@ -6,6 +6,8 @@ import java.util.Map; ...@@ -6,6 +6,8 @@ import java.util.Map;
import org.junit.Test; import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKB; import com.zhiwei.parse.QQKB;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -18,7 +20,8 @@ public class QQKBCommentExample { ...@@ -18,7 +20,8 @@ public class QQKBCommentExample {
String url = "https://kuaibao.qq.com/s/20181122A11WQB00"; String url = "https://kuaibao.qq.com/s/20181122A11WQB00";
//https://kuaibao.qq.com/s/20180423A1PI7400?refer=kb_news //https://kuaibao.qq.com/s/20180423A1PI7400?refer=kb_news
// https://kuaibao.qq.com/s/20180423A0L60800?refer=kb_news // https://kuaibao.qq.com/s/20180423A0L60800?refer=kb_news
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType.PROVIDER);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体/快报评论采集.xlsx", 0); Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体/快报评论采集.xlsx", 0);
List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body"); List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body");
...@@ -40,7 +43,7 @@ public class QQKBCommentExample { ...@@ -40,7 +43,7 @@ public class QQKBCommentExample {
headList.add("time"); //时间 headList.add("time"); //时间
headList.add("content"); //内容 headList.add("content"); //内容
System.out.println(bodyList.size()); System.out.println(bodyList.size());
poi.exportExcel("D:\\crawlerdata\\自媒体\\快报评论采集-zhj.xlsx", "sada", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\快报评论采集.xlsx", "sada", headList, bodyList);
} }
......
package com.zhiwei.crawler; package com.zhiwei.crawler;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.Test; import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Souhu; import com.zhiwei.parse.Souhu;
import com.zhiwei.tools.tools.ZhiWeiTools;
public class SouhuCommentCountExample { public class SouhuCommentCountExample {
@Test @Test
public void souhuCommentCountTest() { public void souhuCommentCountTest() {
String url = "http://www.sohu.com/a/281414426_133392"; ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType.PROVIDER);
int i = Souhu.getSouhuCommentCount(url,null); PoiExcelUtil poi = PoiExcelUtil.getInstance();
System.out.println(i); Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体//搜狐评论采集.xlsx", 0);
List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body");
List<String> headList = (List<String>) map.get("head");
for(Map<String,Object> map1 : list) {
String url = "";
try {
url = map1.get("url")+"";
System.out.println(url);
int i = Souhu.getSouhuCommentCount(url,ProxyFactory.getNatProxy());
map1.put("count", i);
System.out.println(map1.toString());
} catch (Exception e) {
System.out.println(url);
e.printStackTrace();
continue;
}
}
headList.add("count");
poi.exportExcel("D://crawlerdata//自媒体//搜狐评论采集.xlsx", "sheet2", headList, list);
} }
......
...@@ -6,6 +6,8 @@ import java.util.Map; ...@@ -6,6 +6,8 @@ import java.util.Map;
import org.junit.Test; import org.junit.Test;
import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.Fenghuang; import com.zhiwei.parse.Fenghuang;
import com.zhiwei.parse.Souhu; import com.zhiwei.parse.Souhu;
...@@ -16,7 +18,8 @@ public class SouhuCommentExample { ...@@ -16,7 +18,8 @@ public class SouhuCommentExample {
@Test @Test
public void souhuCommentTest() { public void souhuCommentTest() {
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType.PROVIDER);
Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体//搜狐评论采集.xlsx", 0); Map<String,Object> map = poi.importExcel("D://crawlerdata//自媒体//搜狐评论采集.xlsx", 0);
List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body"); List<Map<String,Object>> list = (List<Map<String,Object>>)map.get("body");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
...@@ -30,7 +33,7 @@ public class SouhuCommentExample { ...@@ -30,7 +33,7 @@ public class SouhuCommentExample {
if(dataList.size() <= 0) { if(dataList.size() <= 0) {
urlList.add(url); urlList.add(url);
} }
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(100);
if(dataList != null) { if(dataList != null) {
bodyList.addAll(dataList); bodyList.addAll(dataList);
} }
......
package com.zhiwei.hsitory; //package com.zhiwei.hsitory;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.HashMap; //import java.util.HashMap;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import org.testng.annotations.Test; //import org.testng.annotations.Test;
//
import com.zhiwei.bean.HistortyBean; //import com.zhiwei.bean.HistortyBean;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.parse.QQKandian; //import com.zhiwei.parse.QQKandian;
//
public class QQkandianHistoryExample { //public class QQkandianHistoryExample {
@Test // @Test
public void f() { // public void f() {
String uid = "2661642386"; // String uid = "2661642386";
//
QQKandian qqKandian = new QQKandian(); // QQKandian qqKandian = new QQKandian();
List<HistortyBean> dataList = qqKandian.getHistoryData(uid, null); // List<HistortyBean> dataList = qqKandian.getHistoryData(uid, null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); // List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(HistortyBean h : dataList) { // for(HistortyBean h : dataList) {
Map<String, Object> map = new HashMap<String,Object>(); // Map<String, Object> map = new HashMap<String,Object>();
map.put("标题", h.getTitle()); // map.put("标题", h.getTitle());
map.put("时间", h.getTime()); // map.put("时间", h.getTime());
map.put("来源", h.getSource()); // map.put("来源", h.getSource());
map.put("正文", h.getContent()); // map.put("正文", h.getContent());
map.put("链接", h.getUrl()); // map.put("链接", h.getUrl());
bodyList.add(map); // bodyList.add(map);
} // }
List<String> headList = new ArrayList<String>(); // List<String> headList = new ArrayList<String>();
headList.add("标题"); // headList.add("标题");
headList.add("来源"); // headList.add("来源");
headList.add("链接"); // headList.add("链接");
headList.add("正文"); // headList.add("正文");
headList.add("时间"); // headList.add("时间");
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx", "数据", headList, bodyList); // poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx", "数据", headList, bodyList);
//
//
} // }
} //}
//package com.zhiwei.keyword; package com.zhiwei.keyword;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import org.testng.annotations.Test; import org.testng.annotations.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Gftai; import com.zhiwei.parse.Gftai;
//
//public class GftaiTest { public class GftaiTest {
// @Test @Test
// public void f() { public void f() {
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"; String words = "美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信";
// String[] ws = words.split("\\|"); String[] ws = words.split("\\|");
// List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String word : ws) { for(String word : ws) {
// List<Map<String,Object>> list = Gftai.getData(word, null); List<Map<String,Object>> list = Gftai.getData(word, null);
// bodyList.addAll(list); bodyList.addAll(list);
// System.out.println(word + " --------- " + bodyList.size()); System.out.println(word + " --------- " + bodyList.size());
// } }
// List<String> headList = new ArrayList<>(); List<String> headList = new ArrayList<>();
// headList.add("title"); headList.add("title");
// headList.add("time"); headList.add("time");
// headList.add("content"); headList.add("content");
// headList.add("source"); headList.add("source");
// headList.add("url"); headList.add("url");
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\国富泰信用.xlsx", "数据", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\国富泰信用-美团-2.xlsx", "数据", headList, bodyList);
// } }
//} }
//package com.zhiwei.keyword; package com.zhiwei.keyword;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import org.testng.annotations.Test; import org.testng.annotations.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Gftai; import com.zhiwei.parse.Gftai;
//import com.zhiwei.parse.KuaiTousu; import com.zhiwei.parse.KuaiTousu;
//
//public class KuaiTousuTest { public class KuaiTousuTest {
// @Test @Test
// public void f() { public void f() {
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"; String words = "美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信";
// String[] ws = words.split("\\|"); String[] ws = words.split("\\|");
// List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String word : ws) { for(String word : ws) {
// List<Map<String,Object>> list = KuaiTousu.getData(word, null); List<Map<String,Object>> list = KuaiTousu.getData(word, null);
// bodyList.addAll(list); bodyList.addAll(list);
// System.out.println(word + " --------- " + bodyList.size()); System.out.println(word + " --------- " + bodyList.size());
// } }
// List<String> headList = new ArrayList<>(); List<String> headList = new ArrayList<>();
// headList.add("title"); headList.add("title");
// headList.add("time"); headList.add("time");
// headList.add("content"); headList.add("content");
// headList.add("source"); headList.add("source");
// headList.add("url"); headList.add("url");
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\新浪广东快投诉.xlsx", "数据", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\新浪广东快投诉-美团-2.xlsx", "数据", headList, bodyList);
//
//
//
//
// } }
//} }
//package com.zhiwei.keyword; package com.zhiwei.keyword;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import org.testng.annotations.Test; import org.testng.annotations.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil; import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.KuaiTousu; import com.zhiwei.parse.KuaiTousu;
//import com.zhiwei.parse.SinaTousu; import com.zhiwei.parse.SinaTousu;
//
//public class SinaTousuTest { public class SinaTousuTest {
//
// @Test @Test
// public void getSinaTousuData() { public void getSinaTousuData() {
// PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"; String words = "美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信";
// String[] ws = words.split("\\|"); String[] ws = words.split("\\|");
// List<Map<String,Object>> bodyList = new ArrayList<>(); List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String word : ws) { for(String word : ws) {
// List<Map<String,Object>> list = SinaTousu.getSinaTousuData(word, null, "2018-01-01 00:00:00"); List<Map<String,Object>> list = SinaTousu.getSinaTousuData(word, null, "2018-07-01 00:00:00");
// bodyList.addAll(list); bodyList.addAll(list);
// System.out.println(word + " --------- " + bodyList.size()); System.out.println(word + " --------- " + bodyList.size());
// } }
// List<String> headList = new ArrayList<>(); List<String> headList = new ArrayList<>();
// headList.add("title"); headList.add("title");
// headList.add("time"); headList.add("time");
// headList.add("content"); headList.add("content");
// headList.add("source"); headList.add("source");
// headList.add("url"); headList.add("url");
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉.xlsx", "数据", headList, bodyList); poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉-美团-2.xlsx", "数据", headList, bodyList);
//
//
//
// } }
//} }
//package com.zhiwei.keyword;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Youku;
//
//public class YoukuKeyWordTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// String word = "帮宝适 二噁英," +
// "帮宝适 二恶英," +
// "帮宝适 有毒," +
// "帮宝适 剧毒," +
// "帮宝适 致癌," +
// "宝洁 二噁英," +
// "宝洁 二恶英," +
// "宝洁 有毒," +
// "宝洁 剧毒," +
// "宝洁 致癌," +
// "纸尿裤 二噁英," +
// "纸尿裤 二恶英," +
// "纸尿裤 有毒," +
// "纸尿裤 剧毒," +
// "纸尿裤 致癌";
// List<Map<String,Object>> bodyList = new ArrayList<>();
// String[] words = word.split(",");
// for(String w : words) {
// System.out.println(w);
// bodyList.addAll(Youku.getDataList(w));
// }
// List<String> headList = new ArrayList<>();
// headList.add("title");
// headList.add("time");
// headList.add("url");
// headList.add("uper");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\优酷数据-txh-0121.xlsx", "数据", headList, bodyList);
//
// }
//}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment