Commit e77ce092 by yangchen

添加代理爬取 搜狐号增加采集来源

parent 89439323
package com.zhiwei.httpclient; package com.zhiwei.httpclient;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.alibaba.fastjson.JSONObject;
import com.sun.net.httpserver.Headers;
public class HeadGet { public class HeadGet {
/** /**
...@@ -788,12 +779,12 @@ public class HeadGet { ...@@ -788,12 +779,12 @@ public class HeadGet {
} }
public static void main(String[] args) { public static void main(String[] args) {
String url = "https://news.baidu.com/sn/api/homesubcribe?forum_id=b_1560023960896882&page=1"; String url = "https://a.jiemian.com/index.php?m=user&a=centerArticle&id=100032140&page=1";
String cookie = "BAIDUID=4DB3FA13736131DBC2094C010E6EBCB0:FG=1; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; PSTM=1522304033; BDUSS=zJEdDI0WFBCUE05M3BVTlhSbnozYkpUflZveW9aaGZ3ODBVTC1WRzVwaUxkZlphQVFBQUFBJCQAAAAAAAAAAAEAAADTCNY9Y3k5MDkyMDk5NTEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIvozlqL6M5ac; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BD_CK_SAM=1; BDSVRTM=98; BDSFRCVID=9g8sJeC62rdtQM7AdMI6hrB7leHy_qbTH6aoIgcaD_KjQB22bioFEG0PDU8g0KubMyQBogKKKgOTHIjP; H_BDCLCKID_SF=tJPOoD-bJI83fP36qRj8hPCsqxby26nQB2ceaJ5nJDoAoqOVWR5N-T-_-f7H3jbQ5RRb3CnvQpP-HJ7TyfCWM5_PhMbhhUcHKaufKl0MLpbYbb0xynoD-lFzLfnMBMni52OnapT_LIFaMII6D5DaejPShMr2aK6KaI58LRu8Kb7VbIOgDbbkbfJBD4QqhR5na26b3R3v2PoIMnRvhbQDD4t7yajK2-bmaN6A3lQ8aI3oD45HDTopQT8rKqAOK5OibCrpaC_Eab3vOpvTXpO1ytIreGLjt5LHJnFOVbD8bRrEDnukhtu_-P4DePjK-nJZ5m7mXp0b04TPjljgqj7jKU_mBpJbW60qXKb7BPF5BDOkbC86D6K5jjjM-f8X-PcKaD70LPI8Kb7VbprDXbbkbfJBDxc4-U_jB26b3tbe2PoIMnRNjl5tQU47yajK2-tfK64qXl5CyPOJftjT3-opQT8rQb_OK5Oib4jZ-fo9ab3vOpvTXpO1ytIreGKJtTF8fnuOV-35b5rtHJrwMtJo5DCHbq8sq4-O-2Q-5KL--JbMVqC6LtOYyjKJK4Kf2PQ7MGOD3fbdJJjoOJ3n-fOryPIuLGKH5tcy3eTxoUJgQCnJhhvG-xcB0fDebPRiB-b9QgbABftLK-oj-DLmD60h3e; PSINO=5; locale=zh; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; FP_UID=f9e064a71741aa2e821e58ca2b30c3da; H_PS_PSSID=1433_21104_20882_20927; userId=1524191310247; Hm_lvt_348091a80fe10e213d94a7de762bbd44=1524191312; Hm_lpvt_348091a80fe10e213d94a7de762bbd44=1524191395"; String cookie = "pgv_pvi=1395917824; pgv_si=s4065829888";
Map<String,String> headerMap = HeadGet.getBaijiaAccount2HeaderMap(null); Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(cookie);
// Map<String,Object> paramMap = HeadGet.getTxNewsAccountpageParamMap("1979"); // Map<String,Object> paramMap = HeadGet.getTxNewsAccountpageParamMap("1979");
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,null, headerMap);
// System.out.println(result); System.out.println(result);
System.out.println(result.length()); System.out.println(result.length());
} }
......
package com.zhiwei.httpclient; package com.zhiwei.httpclient;
import java.io.IOException; import java.io.IOException;
import java.net.Proxy;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -19,9 +20,9 @@ public class HttpClient { ...@@ -19,9 +20,9 @@ public class HttpClient {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public static String executeHttpRequestGet(String url,Map<String, String> headerMap) { public static String executeHttpRequestGet(String url,Proxy proxy,Map<String, String> headerMap) {
try { try {
String result = HttpClientTemplateOK.get(url, null, headerMap); String result = HttpClientTemplateOK.get(url, proxy, headerMap);
return result; return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e.getMessage()); logger.error("httpClient 获取数据出现问题:{}", e.getMessage());
...@@ -30,9 +31,9 @@ public class HttpClient { ...@@ -30,9 +31,9 @@ public class HttpClient {
} }
public static String executeHttpRequestPost(String url,Map<String, String> headerMap,Map<String, Object> paramMap) { public static String executeHttpRequestPost(String url,Proxy proxy,Map<String, String> headerMap,Map<String, Object> paramMap) {
try { try {
String result = HttpClientTemplateOK.post(url, null, headerMap, paramMap); String result = HttpClientTemplateOK.post(url, proxy, headerMap, paramMap);
return result; return result;
} catch (Exception e) { } catch (Exception e) {
logger.error("httpClient 获取数据出现问题:{}", e.getMessage()); logger.error("httpClient 获取数据出现问题:{}", e.getMessage());
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -24,18 +25,18 @@ public class Aiqiyi { ...@@ -24,18 +25,18 @@ public class Aiqiyi {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getAiqiyiByWordData(String word) { public static List<Map<String,Object>> getAiqiyiByWordData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null); Map<String,String> headerMap = HeadGet.getAiqiyiBywordHeaderMap(null);
Map<String,String> headerMap1 = HeadGet.getAiqiyiHeaderMap(null); Map<String,String> headerMap1 = HeadGet.getAiqiyiHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
for(int i = 1;i <= 20;i++) { for(int i = 1;i <= 20;i++) {
String url = "http://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"+i+"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"; String url = "http://so.iqiyi.com/so/q_"+URLEncoder.encode(word, "UTF-8")+"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"+i+"_p_1_qc_0_rd__site__m_11_bitrate_?af=true";
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<String> urlList = aiqiyiByWordAnalysis.getAiqiyiUrlList(result); List<String> urlList = aiqiyiByWordAnalysis.getAiqiyiUrlList(result);
for(String newurl : urlList) { for(String newurl : urlList) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
Map<String,Object> map = aiqiyiByWordAnalysis.getAiqiyiData(newurl, headerMap1); Map<String,Object> map = aiqiyiByWordAnalysis.getAiqiyiData(newurl, headerMap1,proxy);
if(map != null) { if(map != null) {
dataList.add(map); dataList.add(map);
} }
...@@ -45,7 +46,8 @@ public class Aiqiyi { ...@@ -45,7 +46,8 @@ public class Aiqiyi {
return dataList; return dataList;
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
e.printStackTrace(); e.printStackTrace();
return null; logger.info("采集数据出错:{}",e.getMessage());
return dataList;
} }
} }
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -13,7 +13,6 @@ import com.alibaba.fastjson.JSONObject; ...@@ -13,7 +13,6 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.BaijiaAccountAnalysis; import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class Baijia { public class Baijia {
...@@ -27,13 +26,13 @@ public class Baijia { ...@@ -27,13 +26,13 @@ public class Baijia {
* @param startTime * @param startTime
* @return * @return
*/ */
public static List<Map<String,Object>> getBaijiaAccount2Data(String app_id,String startTime) { public static List<Map<String,Object>> getBaijiaAccount2Data(String app_id,String startTime,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getBaijiaAccount2HeaderMap(null); Map<String,String> headerMap = HeadGet.getBaijiaAccount2HeaderMap(null);
String url = "https://news.baidu.com/sn/api/homesubcribe?forum_id="+app_id; String url = "https://news.baidu.com/sn/api/homesubcribe?forum_id="+app_id;
boolean f = true; boolean f = true;
while(f) { while(f) {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("news"); JSONArray jsonArry = json.getJSONObject("data").getJSONArray("news");
for(int i = 0;i < jsonArry.size();i++) { for(int i = 0;i < jsonArry.size();i++) {
...@@ -66,7 +65,7 @@ public class Baijia { ...@@ -66,7 +65,7 @@ public class Baijia {
* @param startTime * @param startTime
* @return * @return
*/ */
public static List<Map<String,Object>> getBaijiaAccountData(String app_id,String startTime) { public static List<Map<String,Object>> getBaijiaAccountData(String app_id,String startTime,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 0; int i = 0;
Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null);
...@@ -74,7 +73,7 @@ public class Baijia { ...@@ -74,7 +73,7 @@ public class Baijia {
while(true) { while(true) {
try { try {
String url = "https://baijia.baidu.com/writerlistarticle?ajax=json&app_id="+app_id+"&_limit=20&_skip="; String url = "https://baijia.baidu.com/writerlistarticle?ajax=json&app_id="+app_id+"&_limit=20&_skip=";
String result = HttpClient.executeHttpRequestGet(url + i, headerMap); String result = HttpClient.executeHttpRequestGet(url + i,proxy, headerMap);
List<Map<String,Object>> list = baijiaAccountAnalysis.getBaijiaAccountData(result, startTime); List<Map<String,Object>> list = baijiaAccountAnalysis.getBaijiaAccountData(result, startTime);
if(list == null || list.size() < 1){ if(list == null || list.size() < 1){
break; break;
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
...@@ -29,14 +30,14 @@ public class Dayu { ...@@ -29,14 +30,14 @@ public class Dayu {
* @param mid * @param mid
* @return * @return
*/ */
public static List<Map<String,Object>> getDayuAccountData(String mid,String name,String startTime) { public static List<Map<String,Object>> getDayuAccountData(String mid,String name,String startTime,Proxy proxy) {
int i = 1; int i = 1;
Map<String,String> headerMap = HeadGet.getDayuAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuAccountHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
while(true) { while(true) {
String url = "http://ff.dayu.com/contents/author/"+mid+"?biz_id=1002&_size=50&_page="+i+"&_order_type=published_at&status=1&_fetch=1"; String url = "http://ff.dayu.com/contents/author/"+mid+"?biz_id=1002&_size=50&_page="+i+"&_order_type=published_at&status=1&_fetch=1";
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
System.out.println(url); System.out.println(url);
List<Map<String,Object>> lists = dayuAccountAnalysis.getDayuAccountData(result,name,startTime); List<Map<String,Object>> lists = dayuAccountAnalysis.getDayuAccountData(result,name,startTime);
if(lists == null) { if(lists == null) {
...@@ -65,22 +66,22 @@ public class Dayu { ...@@ -65,22 +66,22 @@ public class Dayu {
* @param articleId * @param articleId
* @return * @return
*/ */
public static List<Map<String,Object>> getDayuCommentData(String articleId) { public static List<Map<String,Object>> getDayuCommentData(String articleId,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/article/"+articleId+"/comments/byhot?sn=0&count=10&ts="+new Date().getTime(); String url = "http://m.uczzd.cn/iflow/api/v2/cmt/article/"+articleId+"/comments/byhot?sn=0&count=10&ts="+new Date().getTime();
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 9991; int i = 9991;
try { try {
System.out.println(url); System.out.println(url);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> lists = dayuCommentAnalysis.getDayuCommentData(result,articleId); List<Map<String,Object>> lists = dayuCommentAnalysis.getDayuCommentData(result,articleId,proxy);
dataList.addAll(lists); dataList.addAll(lists);
while(true) { while(true) {
lists.clear(); lists.clear();
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
System.out.println(url+"&hotValue="+i); System.out.println(url+"&hotValue="+i);
result = HttpClient.executeHttpRequestGet(url+"&hotValue="+i, headerMap); result = HttpClient.executeHttpRequestGet(url+"&hotValue="+i,proxy, headerMap);
lists = dayuCommentAnalysis.getDayuCommentData(result,articleId); lists = dayuCommentAnalysis.getDayuCommentData(result,articleId,proxy);
if(lists == null || lists.size() < 1) { if(lists == null || lists.size() < 1) {
break; break;
} }
...@@ -103,10 +104,10 @@ public class Dayu { ...@@ -103,10 +104,10 @@ public class Dayu {
* @param articleId * @param articleId
* @return * @return
*/ */
public static int getDayuCommentCount(String articleId) { public static int getDayuCommentCount(String articleId,Proxy proxy) {
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/article/"+articleId+"/comments/byhot"; String url = "http://m.uczzd.cn/iflow/api/v2/cmt/article/"+articleId+"/comments/byhot";
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
return json.getJSONObject("data").getInteger("comment_cnt"); return json.getJSONObject("data").getInteger("comment_cnt");
} }
...@@ -117,7 +118,7 @@ public class Dayu { ...@@ -117,7 +118,7 @@ public class Dayu {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getDayuByWordData(String word) { public static List<Map<String,Object>> getDayuByWordData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getDayuByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuByWordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 1; int i = 1;
...@@ -125,8 +126,8 @@ public class Dayu { ...@@ -125,8 +126,8 @@ public class Dayu {
while(true) { while(true) {
String url = "http://zzd.sm.cn/iflow/api/v1/article/fsearch?page="+i+"&size=20&sid=&q="+URLEncoder.encode(word, "UTF-8")+"&scene=0"; String url = "http://zzd.sm.cn/iflow/api/v1/article/fsearch?page="+i+"&size=20&sid=&q="+URLEncoder.encode(word, "UTF-8")+"&scene=0";
System.out.println(url); System.out.println(url);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> lists = dayuByWordAnalysis.getDayuByWordData(result); List<Map<String,Object>> lists = dayuByWordAnalysis.getDayuByWordData(result,proxy);
if(lists == null || lists.size() < 1) { if(lists == null || lists.size() < 1) {
break; break;
} }
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -29,7 +29,7 @@ public class Fenghuang { ...@@ -29,7 +29,7 @@ public class Fenghuang {
* @param startTime 可不传 格式(2017-12-09 17:53:02) * @param startTime 可不传 格式(2017-12-09 17:53:02)
* @return * @return
*/ */
public static List<Map<String,Object>> getFenghuangAccountData(String id,String startTime) { public static List<Map<String,Object>> getFenghuangAccountData(String id,String startTime,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 1; int i = 1;
boolean f = true; boolean f = true;
...@@ -38,7 +38,7 @@ public class Fenghuang { ...@@ -38,7 +38,7 @@ public class Fenghuang {
for(int j = 0;j< 4;j++){ for(int j = 0;j< 4;j++){
f = true; f = true;
String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"+id+"&page="+i+"&pagesize=20&tag=article"; String url = "http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"+id+"&page="+i+"&pagesize=20&tag=article";
List<Map<String,Object>> list = fenghuangAccountAnalysis.getArticleData(url, startTime); List<Map<String,Object>> list = fenghuangAccountAnalysis.getArticleData(url, startTime,proxy);
if(list != null && list.size() > 0) { if(list != null && list.size() > 0) {
dataList.addAll(list); dataList.addAll(list);
System.out.println("====================采集第"+i+"页===共获取数据=="+dataList.size()); System.out.println("====================采集第"+i+"页===共获取数据=="+dataList.size());
...@@ -63,8 +63,8 @@ public class Fenghuang { ...@@ -63,8 +63,8 @@ public class Fenghuang {
* @param docUrl * @param docUrl
* @return * @return
*/ */
public static List<Map<String,Object>> getFenghuangCommentData(String url) { public static List<Map<String,Object>> getFenghuangCommentData(String url,Proxy proxy) {
url = fenghuangCommentAnalysis.getdocUrl(url); url = fenghuangCommentAnalysis.getdocUrl(url,proxy);
if(url == null) { if(url == null) {
return null; return null;
} }
...@@ -73,7 +73,7 @@ public class Fenghuang { ...@@ -73,7 +73,7 @@ public class Fenghuang {
while(true) { while(true) {
System.out.println(url+i); System.out.println(url+i);
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
List<Map<String,Object>> list = fenghuangCommentAnalysis.getData(url+i); List<Map<String,Object>> list = fenghuangCommentAnalysis.getData(url+i,proxy);
if(list == null || list.size() < 1) { if(list == null || list.size() < 1) {
break; break;
} }
...@@ -89,12 +89,12 @@ public class Fenghuang { ...@@ -89,12 +89,12 @@ public class Fenghuang {
* @param url * @param url
* @return * @return
*/ */
public static Map<String,Object> getFenghuangCommentCount(String url) { public static Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) {
url = fenghuangCommentAnalysis.getdocUrl(url); url = fenghuangCommentAnalysis.getdocUrl(url,proxy);
if(url == null) { if(url == null) {
return null; return null;
} }
Map<String,Object> map = fenghuangCommentAnalysis.getFenghuangCommentCount(url); Map<String,Object> map = fenghuangCommentAnalysis.getFenghuangCommentCount(url,proxy);
return map; return map;
} }
...@@ -104,7 +104,7 @@ public class Fenghuang { ...@@ -104,7 +104,7 @@ public class Fenghuang {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getFenghuangByWord(String word) { public static List<Map<String,Object>> getFenghuangByWord(String word,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 1; int i = 1;
try { try {
...@@ -112,7 +112,7 @@ public class Fenghuang { ...@@ -112,7 +112,7 @@ public class Fenghuang {
try { try {
String url = "http://search.ifeng.com/sofeng/search.action?q="+URLEncoder.encode(word, "UTF-8")+"&c=1&p="; String url = "http://search.ifeng.com/sofeng/search.action?q="+URLEncoder.encode(word, "UTF-8")+"&c=1&p=";
Map<String,String> headerMap = HeadGet.getFenghuangWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getFenghuangWordHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url+i, headerMap); String result = HttpClient.executeHttpRequestGet(url+i,proxy, headerMap);
List<Map<String,Object>> lists = fenghuangByWordAnalysis.getFenghuangByWord(result); List<Map<String,Object>> lists = fenghuangByWordAnalysis.getFenghuangByWord(result);
if(lists == null || lists.size() < 1) { if(lists == null || lists.size() < 1) {
break; break;
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -23,7 +24,7 @@ public class Meipai { ...@@ -23,7 +24,7 @@ public class Meipai {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getMeipaiByWordData(String word) { public static List<Map<String,Object>> getMeipaiByWordData(String word,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
String url = "http://www.meipai.com/search/mv?q="+URLEncoder.encode(word, "UTF-8"); String url = "http://www.meipai.com/search/mv?q="+URLEncoder.encode(word, "UTF-8");
...@@ -31,13 +32,13 @@ public class Meipai { ...@@ -31,13 +32,13 @@ public class Meipai {
int i = 1; int i = 1;
while(true) { while(true) {
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
String result = HttpClient.executeHttpRequestGet(url+"&page="+i, headerMap); String result = HttpClient.executeHttpRequestGet(url+"&page="+i,proxy, headerMap);
List<String> urlList = meipaiByWordAnalysis.getURl(result); List<String> urlList = meipaiByWordAnalysis.getURl(result);
if(urlList.size() < 1) { if(urlList.size() < 1) {
break; break;
} }
for(String newurl : urlList) { for(String newurl : urlList) {
Map<String,Object> map = meipaiByWordAnalysis.getMeipaiData(headerMap,newurl); Map<String,Object> map = meipaiByWordAnalysis.getMeipaiData(headerMap,newurl,proxy);
if(map != null) { if(map != null) {
dataList.add(map); dataList.add(map);
} }
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet; import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
public class Miaopai { public class Miaopai {
private static Logger logger = LoggerFactory.getLogger(Miaopai.class);
/** /**
* *
...@@ -20,9 +18,9 @@ public class Miaopai { ...@@ -20,9 +18,9 @@ public class Miaopai {
* @param url * @param url
* @return * @return
*/ */
public static Map<String,Object> getMiaopaiDataByURL(String url) { public static Map<String,Object> getMiaopaiDataByURL(String url,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getMiaoPaiByURlHeaderMap(null); Map<String,String> headerMap = HeadGet.getMiaoPaiByURlHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
Map<String,Object> dataMap = new HashMap<String,Object>(); Map<String,Object> dataMap = new HashMap<String,Object>();
Document doc = Jsoup.parse(result); Document doc = Jsoup.parse(result);
String time = doc.select("div.personalData > p.personalDataT > span:nth-child(1)").text(); String time = doc.select("div.personalData > p.personalDataT > span:nth-child(1)").text();
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -24,13 +25,13 @@ public class PearVideo { ...@@ -24,13 +25,13 @@ public class PearVideo {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getPearVideoData(String word) { public static List<Map<String,Object>> getPearVideoData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getPearVideoByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getPearVideoByWordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
for(int i = 0; i <= 9000;i+=10) { for(int i = 0; i <= 9000;i+=10) {
String url = "http://www.pearvideo.com/search_loading.jsp?start="+i+"&k="+URLEncoder.encode(word, "UTF-8"); String url = "http://www.pearvideo.com/search_loading.jsp?start="+i+"&k="+URLEncoder.encode(word, "UTF-8");
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> dataList1 = pearVideoByWordAnalysis.getPearVideoData(result); List<Map<String,Object>> dataList1 = pearVideoByWordAnalysis.getPearVideoData(result);
if(dataList1 != null && dataList1.size() > 0) { if(dataList1 != null && dataList1.size() > 0) {
dataList.addAll(dataList1); dataList.addAll(dataList1);
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -12,7 +13,6 @@ import com.zhiwei.httpclient.HeadGet; ...@@ -12,7 +13,6 @@ import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient; import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.QQKBAccountAnalysis; import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import com.zhiwei.parse.analysis.QQKBCommentAnalysis; import com.zhiwei.parse.analysis.QQKBCommentAnalysis;
import com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class QQKB { public class QQKB {
...@@ -27,7 +27,7 @@ public class QQKB { ...@@ -27,7 +27,7 @@ public class QQKB {
* @param cookie * @param cookie
* @return * @return
*/ */
public static List<Map<String,Object>> getQQAccountData(String child,String cookie) { public static List<Map<String,Object>> getQQAccountData(String child,String cookie,Proxy proxy) {
String url = "http://r.cnews.qq.com/getSubNewsIndex"; String url = "http://r.cnews.qq.com/getSubNewsIndex";
Map<String,String> headerMap = HeadGet.getQQAccountHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getQQAccountHeaderMap(cookie);
Map<String,Object> paramMap = HeadGet.getQQAccountOneParamMap(child); Map<String,Object> paramMap = HeadGet.getQQAccountOneParamMap(child);
...@@ -36,7 +36,7 @@ public class QQKB { ...@@ -36,7 +36,7 @@ public class QQKB {
String result = ""; String result = "";
List<String> idsList = new ArrayList<String>(); List<String> idsList = new ArrayList<String>();
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
result = HttpClient.executeHttpRequestPost(url, headerMap, paramMap); result = HttpClient.executeHttpRequestPost(url,proxy, headerMap, paramMap);
idsList = qqAccountAnalysis.getQQAllIds(result); idsList = qqAccountAnalysis.getQQAllIds(result);
if(idsList.size() > 1) { if(idsList.size() > 1) {
break; break;
...@@ -57,7 +57,7 @@ public class QQKB { ...@@ -57,7 +57,7 @@ public class QQKB {
ZhiWeiTools.sleep(7000); ZhiWeiTools.sleep(7000);
paramMap.clear(); paramMap.clear();
paramMap = HeadGet.getQQAccountOtherParamMap(ids); paramMap = HeadGet.getQQAccountOtherParamMap(ids);
result = HttpClient.executeHttpRequestPost(url, headerMap, paramMap); result = HttpClient.executeHttpRequestPost(url,proxy, headerMap, paramMap);
List<Map<String,Object>> list = qqAccountAnalysis.analysisQQAccountData(result); List<Map<String,Object>> list = qqAccountAnalysis.analysisQQAccountData(result);
ids = ""; ids = "";
i = 0; i = 0;
...@@ -79,7 +79,7 @@ public class QQKB { ...@@ -79,7 +79,7 @@ public class QQKB {
ZhiWeiTools.sleep(8000); ZhiWeiTools.sleep(8000);
paramMap.clear(); paramMap.clear();
paramMap = HeadGet.getQQAccountOtherParamMap(ids); paramMap = HeadGet.getQQAccountOtherParamMap(ids);
result = HttpClient.executeHttpRequestPost(url, headerMap, paramMap); result = HttpClient.executeHttpRequestPost(url,proxy, headerMap, paramMap);
List<Map<String,Object>> list = qqAccountAnalysis.analysisQQAccountData(result); List<Map<String,Object>> list = qqAccountAnalysis.analysisQQAccountData(result);
if(list != null) { if(list != null) {
dataList.addAll(list); dataList.addAll(list);
...@@ -102,9 +102,9 @@ public class QQKB { ...@@ -102,9 +102,9 @@ public class QQKB {
* @param article_id * @param article_id
* @return * @return
*/ */
public static List<Map<String,Object>> getQQKBCommentData(String url) { public static List<Map<String,Object>> getQQKBCommentData(String url,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
String comment_id = getCid(url); String comment_id = getCid(url,proxy);
String article_id = url.split("/")[4]; String article_id = url.split("/")[4];
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(null);
try { try {
...@@ -112,10 +112,9 @@ public class QQKB { ...@@ -112,10 +112,9 @@ public class QQKB {
int i = 1; int i = 1;
while(true) { while(true) {
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment", headerMap, paramMap); String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment",proxy, headerMap, paramMap);
// System.out.println(result);
paramMap.clear(); paramMap.clear();
List<Map<String,Object>> lists = qqkbCommentAnalysis.getCommentData(result,null,comment_id, article_id); List<Map<String,Object>> lists = qqkbCommentAnalysis.getCommentData(result,null,comment_id, article_id,proxy);
if(lists == null || lists.size() < 1) { if(lists == null || lists.size() < 1) {
break; break;
} }
...@@ -138,15 +137,15 @@ public class QQKB { ...@@ -138,15 +137,15 @@ public class QQKB {
* @param url * @param url
* @return * @return
*/ */
private static String getCid(String url) { private static String getCid(String url,Proxy proxy) {
try { try {
if(url.contains("view.inews.qq.com")){ if(url.contains("view.inews.qq.com")){
String result = HttpClient.executeHttpRequestGet(url, null); String result = HttpClient.executeHttpRequestGet(url,proxy, null);
result = result.split("window.__initData = ")[1].split(";</script>")[0]; result = result.split("window.__initData = ")[1].split(";</script>")[0];
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
return json.getJSONObject("content").getString("cid"); return json.getJSONObject("content").getString("cid");
}else if(url.contains("kuaibao.qq.com")) { }else if(url.contains("kuaibao.qq.com")) {
String result = HttpClient.executeHttpRequestGet(url, null); String result = HttpClient.executeHttpRequestGet(url,proxy, null);
String cid = result.split("var commentId = ")[1].split(";")[0]; String cid = result.split("var commentId = ")[1].split(";")[0];
return cid.substring(1, cid.length()-1); return cid.substring(1, cid.length()-1);
} }
...@@ -164,13 +163,13 @@ public class QQKB { ...@@ -164,13 +163,13 @@ public class QQKB {
* @param article_id * @param article_id
* @return * @return
*/ */
public static int getCommentCount(String cookie,String url) { public static int getCommentCount(String cookie,String url,Proxy proxy) {
String comment_id = getCid(url); String comment_id = getCid(url,proxy);
String article_id = url.split("/")[4]; String article_id = url.split("/")[4];
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie);
try { try {
Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id); Map<String,Object> paramMap = HeadGet.getQQKBCommentParamMap(comment_id, article_id);
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment", headerMap, paramMap); String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsComment",proxy, headerMap, paramMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
return json.getJSONObject("comments").getInteger("count"); return json.getJSONObject("comments").getInteger("count");
} catch (Exception e) { } catch (Exception e) {
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -25,13 +26,13 @@ public class Soku { ...@@ -25,13 +26,13 @@ public class Soku {
* @param type * @param type
* @return * @return
*/ */
public static List<Map<String,Object>> getSoKuByWordData(String word,String type) { public static List<Map<String,Object>> getSoKuByWordData(String word,String type,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getSoKuByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getSoKuByWordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
for(int i = 1; i < 14;i++) { for(int i = 1; i < 14;i++) {
String url = "http://www.soku.com/search_video_ajax/q_"+URLEncoder.encode(word, "UTF-8")+"_orderby_3_cateid_"+type+"_limitdate_365?site=14&_lg=20&page="; String url = "http://www.soku.com/search_video_ajax/q_"+URLEncoder.encode(word, "UTF-8")+"_orderby_3_cateid_"+type+"_limitdate_365?site=14&_lg=20&page=";
String result = HttpClient.executeHttpRequestGet(url+i, headerMap); String result = HttpClient.executeHttpRequestGet(url+i,proxy, headerMap);
List<Map<String,Object>> lists = soKuByWordAnalysis.getSoKuData(result); List<Map<String,Object>> lists = soKuByWordAnalysis.getSoKuData(result);
if(lists != null && lists.size() > 0) { if(lists != null && lists.size() > 0) {
dataList.addAll(lists); dataList.addAll(lists);
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -29,11 +32,11 @@ public class Souhu { ...@@ -29,11 +32,11 @@ public class Souhu {
* @param url * @param url
* @return * @return
*/ */
public static int getSouhuCommentCount(String url) { public static int getSouhuCommentCount(String url,Proxy proxy) {
String newurl = souhuCommentAnalysis.getSouhuURL(url); String newurl = souhuCommentAnalysis.getSouhuURL(url);
int i; int i;
try { try {
i = souhuCommentAnalysis.getSouhuCommentCount(newurl); i = souhuCommentAnalysis.getSouhuCommentCount(newurl,proxy);
return i; return i;
} catch (Exception e) { } catch (Exception e) {
logger.error("搜狐获取评论数出错了",e.getMessage()); logger.error("搜狐获取评论数出错了",e.getMessage());
...@@ -51,8 +54,10 @@ public class Souhu { ...@@ -51,8 +54,10 @@ public class Souhu {
* @param isCulling 是否采集精选 * @param isCulling 是否采集精选
* @return * @return
*/ */
public static List<Map<String,Object>> getSouHuAccountData(String xpt,String startTime,boolean isCulling) { public static List<Map<String,Object>> getSouHuAccountData(String xpt,String startTime,boolean isCulling,Proxy proxy) {
int i = 1; int i = 1;
String name = getName(xpt,proxy);
ZhiWeiTools.sleep(2000);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null);
boolean f = true; boolean f = true;
...@@ -64,7 +69,7 @@ public class Souhu { ...@@ -64,7 +69,7 @@ public class Souhu {
url = url + "&categoryId=-1"; url = url + "&categoryId=-1";
} }
try { try {
result = HttpClient.executeHttpRequestGet(url,headerMap); result = HttpClient.executeHttpRequestGet(url,proxy,headerMap);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
...@@ -72,7 +77,7 @@ public class Souhu { ...@@ -72,7 +77,7 @@ public class Souhu {
result = result.substring(1, result.length()-1); result = result.substring(1, result.length()-1);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArray = json.getJSONArray("data"); JSONArray jsonArray = json.getJSONArray("data");
List<Map<String,Object>> dataList1 = souhuAccountAnalysis.analysisData(jsonArray); List<Map<String,Object>> dataList1 = souhuAccountAnalysis.analysisData(jsonArray,name);
if(jsonArray.size() < 1) { if(jsonArray.size() < 1) {
break; break;
} }
...@@ -102,6 +107,20 @@ public class Souhu { ...@@ -102,6 +107,20 @@ public class Souhu {
} }
private static String getName(String xpt,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getSouhuAccountHeaderMap(null);
try {
String result = HttpClient.executeHttpRequestGet("http://mp.sohu.com/profile?xpt="+xpt, proxy, headerMap);
Document doc = Jsoup.parse(result);
String name = doc.select("p#ff").text();
System.out.println(name);
return name;
} catch (Exception e) {
return null;
}
}
/** /**
* *
* @Description 传入搜狐文章链接和cookie 可获取此文章所有评论 * @Description 传入搜狐文章链接和cookie 可获取此文章所有评论
...@@ -109,14 +128,14 @@ public class Souhu { ...@@ -109,14 +128,14 @@ public class Souhu {
* @param cookie * @param cookie
* @return * @return
*/ */
public static List<Map<String,Object>> getSouhuCommentData(String url) { public static List<Map<String,Object>> getSouhuCommentData(String url,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int j = 1; int j = 1;
try { try {
while(true) { while(true) {
String newurl = souhuCommentAnalysis.getSouhuURL(url) + "&page_no=" + j; String newurl = souhuCommentAnalysis.getSouhuURL(url) + "&page_no=" + j;
String result = HttpClient.executeHttpRequestGet(newurl,headerMap); String result = HttpClient.executeHttpRequestGet(newurl,proxy,headerMap);
System.out.println(newurl); System.out.println(newurl);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("jsonObject").getJSONArray("comments"); JSONArray jsonArry = json.getJSONObject("jsonObject").getJSONArray("comments");
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -18,11 +19,11 @@ public class TXNews { ...@@ -18,11 +19,11 @@ public class TXNews {
private static TXNewsByWordAnalysis txNewsByWordAnalysis = new TXNewsByWordAnalysis(); private static TXNewsByWordAnalysis txNewsByWordAnalysis = new TXNewsByWordAnalysis();
public static boolean hasMore = true; public static boolean hasMore = true;
public static List<Map<String,Object>> getData(String word) { public static List<Map<String,Object>> getData(String word,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getTxNewspage1HeaderMap(null); Map<String,String> headerMap = HeadGet.getTxNewspage1HeaderMap(null);
Map<String,Object> paramMap = HeadGet.getTxNewspage1ParamMap(word); Map<String,Object> paramMap = HeadGet.getTxNewspage1ParamMap(word);
String result = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC", headerMap, paramMap); String result = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC",proxy, headerMap, paramMap);
List<Map<String,Object>> dList = txNewsByWordAnalysis.getData(result); List<Map<String,Object>> dList = txNewsByWordAnalysis.getData(result);
dataList.addAll(dList); dataList.addAll(dList);
int page = 2; int page = 2;
...@@ -32,7 +33,7 @@ public class TXNews { ...@@ -32,7 +33,7 @@ public class TXNews {
try { try {
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
Map<String,Object> param2Map = HeadGet.getTxNewspagemoreParamMap(word, page); Map<String,Object> param2Map = HeadGet.getTxNewspagemoreParamMap(word, page);
String result2 = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC", header2Map, param2Map); String result2 = HttpClient.executeHttpRequestPost("http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC",proxy, header2Map, param2Map);
page++; page++;
List<Map<String,Object>> dList2 = txNewsByWordAnalysis.getData(result2); List<Map<String,Object>> dList2 = txNewsByWordAnalysis.getData(result2);
dataList.addAll(dList2); dataList.addAll(dList2);
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -23,7 +24,7 @@ public class Wangyi { ...@@ -23,7 +24,7 @@ public class Wangyi {
* @param id * @param id
* @return * @return
*/ */
public static List<Map<String,Object>> getWangyiCommentData(String id) { public static List<Map<String,Object>> getWangyiCommentData(String id,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 0; int i = 0;
...@@ -31,7 +32,7 @@ public class Wangyi { ...@@ -31,7 +32,7 @@ public class Wangyi {
try { try {
while(true) { while(true) {
String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id+"/comments/newList?offset="+i+"&limit=30"; String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id+"/comments/newList?offset="+i+"&limit=30";
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
System.out.println(url); System.out.println(url);
List<Map<String,Object>> lists = wangyiCommentAnalysis.getWangyiCommentData(result,idList); List<Map<String,Object>> lists = wangyiCommentAnalysis.getWangyiCommentData(result,idList);
if(lists == null || lists.size() < 1) { if(lists == null || lists.size() < 1) {
...@@ -57,10 +58,10 @@ public class Wangyi { ...@@ -57,10 +58,10 @@ public class Wangyi {
* @param id * @param id
* @return * @return
*/ */
public static int getWangyiCommentCount(String id) { public static int getWangyiCommentCount(String id,Proxy proxy) {
String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id; String url = "http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"+id;
Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getWangyiCommentHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
return json.getInteger("tcount"); return json.getInteger("tcount");
} }
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -28,7 +29,7 @@ public class XiGua { ...@@ -28,7 +29,7 @@ public class XiGua {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getXiguaVideoByWordData(String word) { public static List<Map<String,Object>> getXiguaVideoByWordData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getXiguaByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getXiguaByWordHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 0; int i = 0;
...@@ -37,7 +38,7 @@ public class XiGua { ...@@ -37,7 +38,7 @@ public class XiGua {
while(true) { while(true) {
String url = "https://www.ixigua.com/search_content/?format=json&autoload=true&count=20&keyword="+URLEncoder.encode(word, "UTF-8")+"&cur_tab=1&offset="; String url = "https://www.ixigua.com/search_content/?format=json&autoload=true&count=20&keyword="+URLEncoder.encode(word, "UTF-8")+"&cur_tab=1&offset=";
System.out.println(url + i); System.out.println(url + i);
String result = HttpClient.executeHttpRequestGet(url+i, headerMap); String result = HttpClient.executeHttpRequestGet(url+i,proxy, headerMap);
List<Map<String,Object>> lists = xiguaByWordAnalysis.getXiguaData(result); List<Map<String,Object>> lists = xiguaByWordAnalysis.getXiguaData(result);
if(lists != null && lists.size() > 0) { if(lists != null && lists.size() > 0) {
dataList.addAll(lists); dataList.addAll(lists);
...@@ -67,7 +68,7 @@ public class XiGua { ...@@ -67,7 +68,7 @@ public class XiGua {
* @param url * @param url
* @return * @return
*/ */
public static List<Map<String,Object>> getXiguaAccountData(String url,String startTime) { public static List<Map<String,Object>> getXiguaAccountData(String url,String startTime,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getXiguaByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getXiguaByWordHeaderMap(null);
String time = "0"; String time = "0";
...@@ -76,7 +77,7 @@ public class XiGua { ...@@ -76,7 +77,7 @@ public class XiGua {
while(true) { while(true) {
String uid = xiguaAccountAnalysis.getUid(url); String uid = xiguaAccountAnalysis.getUid(url);
String newurl = "https://www.ixigua.com/c/user/article/?user_id="+uid+"&max_behot_time="+time+"&count=20"; String newurl = "https://www.ixigua.com/c/user/article/?user_id="+uid+"&max_behot_time="+time+"&count=20";
String result = HttpClient.executeHttpRequestGet(newurl, headerMap); String result = HttpClient.executeHttpRequestGet(newurl,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
time = json.getJSONObject("next").getString("max_behot_time"); time = json.getJSONObject("next").getString("max_behot_time");
List<Map<String,Object>> list = xiguaAccountAnalysis.getXiguaAccountData(json,time1); List<Map<String,Object>> list = xiguaAccountAnalysis.getXiguaAccountData(json,time1);
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -18,14 +19,14 @@ public class Xiaomi { ...@@ -18,14 +19,14 @@ public class Xiaomi {
private static Logger logger = LoggerFactory.getLogger(Xiaomi.class); private static Logger logger = LoggerFactory.getLogger(Xiaomi.class);
private static XiaomiShequByWordAnalysis xiaomiShequByWordAnalysis = new XiaomiShequByWordAnalysis(); private static XiaomiShequByWordAnalysis xiaomiShequByWordAnalysis = new XiaomiShequByWordAnalysis();
public static List<Map<String,Object>> getXiaomiByWordData(String word) { public static List<Map<String,Object>> getXiaomiByWordData(String word,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getxiaomiShequByWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getxiaomiShequByWordHeaderMap(null);
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
int i = 1; int i = 1;
try { try {
while(true) { while(true) {
String url = "http://so.bbs.xiaomi.cn/?q="+URLEncoder.encode(word, "UTF-8")+"&p="+i+"&fid=457&time=63072000&order=1"; String url = "http://so.bbs.xiaomi.cn/?q="+URLEncoder.encode(word, "UTF-8")+"&p="+i+"&fid=457&time=63072000&order=1";
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> dataList = xiaomiShequByWordAnalysis.getdata(result); List<Map<String,Object>> dataList = xiaomiShequByWordAnalysis.getdata(result);
if(dataList == null || dataList.size() < 1) { if(dataList == null || dataList.size() < 1) {
break; break;
...@@ -38,6 +39,7 @@ public class Xiaomi { ...@@ -38,6 +39,7 @@ public class Xiaomi {
return bodyList; return bodyList;
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
e.printStackTrace(); e.printStackTrace();
logger.info("数据采集出错:{}",e.getMessage());
return bodyList; return bodyList;
} }
} }
......
package com.zhiwei.parse; package com.zhiwei.parse;
import java.net.Proxy;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
...@@ -30,7 +31,7 @@ public class Yidianzixun { ...@@ -30,7 +31,7 @@ public class Yidianzixun {
* @param startTime * @param startTime
* @return * @return
*/ */
public static List<Map<String,Object>> getYidianzixunAccountData(String channelid,String startTime) { public static List<Map<String,Object>> getYidianzixunAccountData(String channelid,String startTime,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getYidianzixunAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getYidianzixunAccountHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int j = 0; int j = 0;
...@@ -38,7 +39,7 @@ public class Yidianzixun { ...@@ -38,7 +39,7 @@ public class Yidianzixun {
try { try {
while(f) { while(f) {
String url = "http://www.yidianzixun.com/home/q/news_list_for_channel?channel_id="+channelid+"&cstart="+j+"&cend="+(j+10); String url = "http://www.yidianzixun.com/home/q/news_list_for_channel?channel_id="+channelid+"&cstart="+j+"&cend="+(j+10);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("result"); JSONArray jsonArry = json.getJSONArray("result");
if(jsonArry.size() == 0) { if(jsonArry.size() == 0) {
...@@ -74,14 +75,14 @@ public class Yidianzixun { ...@@ -74,14 +75,14 @@ public class Yidianzixun {
* @param cookie * @param cookie
* @return * @return
*/ */
public static List<Map<String,Object>> getYidianzixunCommentData(String url) { public static List<Map<String,Object>> getYidianzixunCommentData(String url,Proxy proxy) {
url = yidianzixunCommentAnalysis.analysisURL(url); url = yidianzixunCommentAnalysis.analysisURL(url);
Map<String, String> headerMap = HeadGet.getYidianzixunCommentHeaderMap(null); Map<String, String> headerMap = HeadGet.getYidianzixunCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
String urlb = url; String urlb = url;
while(true) { while(true) {
String result = HttpClient.executeHttpRequestGet(url,headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy,headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONArray("comments"); JSONArray jsonArry = json.getJSONArray("comments");
String comment_id = ""; String comment_id = "";
...@@ -93,7 +94,7 @@ public class Yidianzixun { ...@@ -93,7 +94,7 @@ public class Yidianzixun {
} }
if (data.toString().contains("replies")) { if (data.toString().contains("replies")) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
List<Map<String,Object>> replyList = yidianzixunCommentAnalysis.getrepliesData(map, null, headerMap); List<Map<String,Object>> replyList = yidianzixunCommentAnalysis.getrepliesData(map, null, headerMap,proxy);
if(replyList != null && replyList.size() > 0) { if(replyList != null && replyList.size() > 0) {
dataList.addAll(replyList); dataList.addAll(replyList);
} }
...@@ -119,14 +120,14 @@ public class Yidianzixun { ...@@ -119,14 +120,14 @@ public class Yidianzixun {
* @param word * @param word
* @return * @return
*/ */
public static List<Map<String,Object>> getYidianzixunDataByWord(String word) { public static List<Map<String,Object>> getYidianzixunDataByWord(String word,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
int i = 0; int i = 0;
while(true) { while(true) {
String url = "http://www.yidianzixun.com/home/q/news_list_for_keyword?display="+URLEncoder.encode(word, "UTF-8")+"&cstart="+i+"&cend="+(i+10)+"&word_type=token"; String url = "http://www.yidianzixun.com/home/q/news_list_for_keyword?display="+URLEncoder.encode(word, "UTF-8")+"&cstart="+i+"&cend="+(i+10)+"&word_type=token";
Map<String,String> headerMap = HeadGet.getYidianzixunWordHeaderMap(null); Map<String,String> headerMap = HeadGet.getYidianzixunWordHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
List<Map<String,Object>> list = yidianzixunByWordAnalysis.getOnePageData(result); List<Map<String,Object>> list = yidianzixunByWordAnalysis.getOnePageData(result);
if(list == null || list.size() < 1) { if(list == null || list.size() < 1) {
break; break;
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -43,10 +44,10 @@ public class AiqiyiByWordAnalysis { ...@@ -43,10 +44,10 @@ public class AiqiyiByWordAnalysis {
} }
} }
public Map<String,Object> getAiqiyiData(String url,Map<String,String> headerMap) { public Map<String,Object> getAiqiyiData(String url,Map<String,String> headerMap,Proxy proxy) {
Map<String,Object> dataMap = new HashMap<String,Object>(); Map<String,Object> dataMap = new HashMap<String,Object>();
try { try {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
Document doc = Jsoup.parse(result); Document doc = Jsoup.parse(result);
String time = doc.select("#widget-vshort-ptime").text(); String time = doc.select("#widget-vshort-ptime").text();
if(!time.contains("2017")) { if(!time.contains("2017")) {
...@@ -61,7 +62,7 @@ public class AiqiyiByWordAnalysis { ...@@ -61,7 +62,7 @@ public class AiqiyiByWordAnalysis {
String title = doc.select("#widget-videotitle").attr("title"); String title = doc.select("#widget-videotitle").attr("title");
String id = result.split(" tvId: ")[1].split(",")[0]; String id = result.split(" tvId: ")[1].split(",")[0];
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
int count = getVideo_count(id); int count = getVideo_count(id,proxy);
dataMap.put("count", count); dataMap.put("count", count);
dataMap.put("title", title); dataMap.put("title", title);
System.out.println(dataMap.toString()); System.out.println(dataMap.toString());
...@@ -73,11 +74,11 @@ public class AiqiyiByWordAnalysis { ...@@ -73,11 +74,11 @@ public class AiqiyiByWordAnalysis {
} }
public int getVideo_count(String id) { public int getVideo_count(String id,Proxy proxy) {
try { try {
String url = "http://cache.video.iqiyi.com/jp/pc/"+id+"/"; String url = "http://cache.video.iqiyi.com/jp/pc/"+id+"/";
Map<String,String> headerMap = HeadGet.getAiqiyiForCountHeaderMap(null); Map<String,String> headerMap = HeadGet.getAiqiyiForCountHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
String count = result.split(":")[1].split("\\}")[0]; String count = result.split(":")[1].split("\\}")[0];
return Integer.valueOf(count); return Integer.valueOf(count);
} catch (Exception e) { } catch (Exception e) {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -83,11 +84,11 @@ public class BaijiaAccountAnalysis { ...@@ -83,11 +84,11 @@ public class BaijiaAccountAnalysis {
} }
} }
public String getBaijiaContent(String url) { public String getBaijiaContent(String url,Proxy proxy) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getBaijiaAccountHeaderMap(null);
try { try {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
Document document = Jsoup.parse(result); Document document = Jsoup.parse(result);
return document.select("section.news-content").text(); return document.select("section.news-content").text();
} catch (Exception e) { } catch (Exception e) {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -8,8 +9,6 @@ import java.util.Map; ...@@ -8,8 +9,6 @@ import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -23,7 +22,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools; ...@@ -23,7 +22,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class DayuByWordAnalysis { public class DayuByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class); private static Logger logger = LoggerFactory.getLogger(DayuByWordAnalysis.class);
public List<Map<String,Object>> getDayuByWordData(String result) { public List<Map<String,Object>> getDayuByWordData(String result,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
...@@ -37,7 +36,7 @@ public class DayuByWordAnalysis { ...@@ -37,7 +36,7 @@ public class DayuByWordAnalysis {
map.put("time", TimeParse.dateFormartString(new Date(data.getLong("publish_time")), "yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(new Date(data.getLong("publish_time")), "yyyy-MM-dd HH:mm:ss"));
map.put("id", data.getString("id")); map.put("id", data.getString("id"));
map.put("source", data.getString("source_name").replaceAll("<.*?>", "")); map.put("source", data.getString("source_name").replaceAll("<.*?>", ""));
map.put("content", getContent(url)); map.put("content", getContent(url,proxy));
System.out.println(map.toString()); System.out.println(map.toString());
dataList.add(map); dataList.add(map);
} }
...@@ -49,10 +48,10 @@ public class DayuByWordAnalysis { ...@@ -49,10 +48,10 @@ public class DayuByWordAnalysis {
} }
public String getContent(String url) { public String getContent(String url,Proxy proxy) {
ZhiWeiTools.sleep(2000); ZhiWeiTools.sleep(2000);
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
Pattern pat = Pattern.compile("xissJsonData = (.*);"); Pattern pat = Pattern.compile("xissJsonData = (.*);");
Matcher matcher = pat.matcher(result); Matcher matcher = pat.matcher(result);
try { try {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -26,7 +27,7 @@ public class DayuCommentAnalysis { ...@@ -26,7 +27,7 @@ public class DayuCommentAnalysis {
* @param articleId * @param articleId
* @return * @return
*/ */
public List<Map<String,Object>> getDayuCommentData(String result,String articleId) { public List<Map<String,Object>> getDayuCommentData(String result,String articleId,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
JSONObject json = JSONObject.parseObject(result).getJSONObject("data").getJSONObject("comments_map"); JSONObject json = JSONObject.parseObject(result).getJSONObject("data").getJSONObject("comments_map");
...@@ -45,7 +46,7 @@ public class DayuCommentAnalysis { ...@@ -45,7 +46,7 @@ public class DayuCommentAnalysis {
int i = data.getInteger("reply_cnt"); int i = data.getInteger("reply_cnt");
dataMap.put("replay_count", i); dataMap.put("replay_count", i);
if(i > 0) { if(i > 0) {
dataList.addAll(getReplayData(id,articleId)); dataList.addAll(getReplayData(id,articleId,proxy));
} }
dataList.add(dataMap); dataList.add(dataMap);
} }
...@@ -63,10 +64,10 @@ public class DayuCommentAnalysis { ...@@ -63,10 +64,10 @@ public class DayuCommentAnalysis {
* @param articleId * @param articleId
* @return * @return
*/ */
private List<Map<String,Object>> getReplayData(String id,String articleId) { private List<Map<String,Object>> getReplayData(String id,String articleId,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/detail/"+id+"/comments?articleId="+articleId+"&count=10&ts="; String url = "http://m.uczzd.cn/iflow/api/v2/cmt/detail/"+id+"/comments?articleId="+articleId+"&count=10&ts=";
String result = HttpClient.executeHttpRequestGet(url+"-1", headerMap); String result = HttpClient.executeHttpRequestGet(url+"-1",proxy, headerMap);
List<Map<String,Object>> data = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> data = new ArrayList<Map<String,Object>>();
List<String> timeList = new ArrayList<String>(); List<String> timeList = new ArrayList<String>();
while(true) { while(true) {
...@@ -79,7 +80,7 @@ public class DayuCommentAnalysis { ...@@ -79,7 +80,7 @@ public class DayuCommentAnalysis {
if(time == 0) { if(time == 0) {
break; break;
} }
result = HttpClient.executeHttpRequestGet(url+time, headerMap); result = HttpClient.executeHttpRequestGet(url+time,proxy, headerMap);
} }
System.out.println("=====================评论下回复获取数=="+data.size()); System.out.println("=====================评论下回复获取数=="+data.size());
return data; return data;
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.io.IOException; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -24,7 +24,7 @@ public class FenghuangAccountAnalysis { ...@@ -24,7 +24,7 @@ public class FenghuangAccountAnalysis {
* @param result * @param result
* @return * @return
*/ */
public List<Map<String,Object>> getArticleData(String url,String startTime) { public List<Map<String,Object>> getArticleData(String url,String startTime,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
...@@ -32,7 +32,7 @@ public class FenghuangAccountAnalysis { ...@@ -32,7 +32,7 @@ public class FenghuangAccountAnalysis {
JSONArray jsonArry = null; JSONArray jsonArry = null;
for(int i = 0;i < 3;i++) { for(int i = 0;i < 3;i++) {
try { try {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
jsonArry = json.getJSONObject("data").getJSONObject("feeds").getJSONArray("list"); jsonArry = json.getJSONObject("data").getJSONObject("feeds").getJSONArray("list");
if(jsonArry == null || jsonArry.size() < 1) { if(jsonArry == null || jsonArry.size() < 1) {
...@@ -49,7 +49,7 @@ public class FenghuangAccountAnalysis { ...@@ -49,7 +49,7 @@ public class FenghuangAccountAnalysis {
try { try {
JSONObject data = jsonArry.getJSONObject(i); JSONObject data = jsonArry.getJSONObject(i);
String articleurl = data.getString("id"); String articleurl = data.getString("id");
String articleResult = HttpClient.executeHttpRequestGet(articleurl, headerMap); String articleResult = HttpClient.executeHttpRequestGet(articleurl,proxy, headerMap);
Map<String,Object> dataMap = getArticle(articleResult); Map<String,Object> dataMap = getArticle(articleResult);
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
if(dataMap != null) { if(dataMap != null) {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -18,11 +19,11 @@ import com.zhiwei.zhiweiTools.timeParse.TimeParse; ...@@ -18,11 +19,11 @@ import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public class FenghuangCommentAnalysis { public class FenghuangCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(FenghuangCommentAnalysis.class);
public Map<String,Object> getFenghuangCommentCount(String url) { public Map<String,Object> getFenghuangCommentCount(String url,Proxy proxy) {
Map<String, String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null); Map<String, String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null);
Map<String,Object> map = new HashMap<String, Object>(); Map<String,Object> map = new HashMap<String, Object>();
try { try {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
map.put("real_count", json.getInteger("real_num")); map.put("real_count", json.getInteger("real_num"));
map.put("comment_num", json.getInteger("comment_num")); map.put("comment_num", json.getInteger("comment_num"));
...@@ -40,9 +41,9 @@ public class FenghuangCommentAnalysis { ...@@ -40,9 +41,9 @@ public class FenghuangCommentAnalysis {
* @param url * @param url
* @return * @return
*/ */
public String getdocUrl(String url) { public String getdocUrl(String url,Proxy proxy) {
try { try {
String result = HttpClient.executeHttpRequestGet(url, null); String result = HttpClient.executeHttpRequestGet(url,proxy, null);
result = result.split("commentsUrl = '")[1].split("',")[0]; result = result.split("commentsUrl = '")[1].split("',")[0];
System.out.println(result); System.out.println(result);
...@@ -71,12 +72,12 @@ public class FenghuangCommentAnalysis { ...@@ -71,12 +72,12 @@ public class FenghuangCommentAnalysis {
* @param url * @param url
* @return * @return
*/ */
public List<Map<String,Object>> getData(String url) { public List<Map<String,Object>> getData(String url,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getFenghuangCommentHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
String result; String result;
try { try {
result = HttpClient.executeHttpRequestGet(url, headerMap); result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
} catch (Exception e) { } catch (Exception e) {
logger.error("链接获取信息失败",e.getMessage()); logger.error("链接获取信息失败",e.getMessage());
return null; return null;
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -25,10 +26,10 @@ public class MeipaiByWordAnalysis { ...@@ -25,10 +26,10 @@ public class MeipaiByWordAnalysis {
* @param result * @param result
* @return * @return
*/ */
public Map<String,Object> getMeipaiData(Map<String,String> headerMap,String url) { public Map<String,Object> getMeipaiData(Map<String,String> headerMap,String url,Proxy proxy) {
try { try {
Map<String,Object> dataMap = new HashMap<String,Object>(); Map<String,Object> dataMap = new HashMap<String,Object>();
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
Document doc = Jsoup.parse(result); Document doc = Jsoup.parse(result);
String video_count = doc.select("div.detail-location").text().split("播放")[0]; String video_count = doc.select("div.detail-location").text().split("播放")[0];
String time = doc.select("div.detail-time.pa > strong").text(); String time = doc.select("div.detail-time.pa > strong").text();
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class QQKBByWordAnalysis { public class QQKBByWordAnalysis {
private static Logger logger = LoggerFactory.getLogger(QQKBByWordAnalysis.class);
// public List<Map<String,Object>> get
} }
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
...@@ -50,7 +51,7 @@ public class QQKBCommentAnalysis { ...@@ -50,7 +51,7 @@ public class QQKBCommentAnalysis {
* @param result * @param result
* @return * @return
*/ */
public List<Map<String,Object>> getCommentData(String result,String cookie,String comment_id, String article_id) { public List<Map<String,Object>> getCommentData(String result,String cookie,String comment_id, String article_id,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
...@@ -66,7 +67,7 @@ public class QQKBCommentAnalysis { ...@@ -66,7 +67,7 @@ public class QQKBCommentAnalysis {
String reply_id = data.getString("reply_id"); String reply_id = data.getString("reply_id");
if(data.toString().contains("reply_num")) { if(data.toString().contains("reply_num")) {
replay_num = data.getInteger("reply_num"); replay_num = data.getInteger("reply_num");
List<Map<String,Object>> lists = getReplyCommentData(cookie,reply_id,comment_id, article_id); List<Map<String,Object>> lists = getReplyCommentData(cookie,reply_id,comment_id, article_id,proxy);
if(lists != null && lists.size() > 0) { if(lists != null && lists.size() > 0) {
dataList.addAll(lists); dataList.addAll(lists);
} }
...@@ -107,7 +108,7 @@ public class QQKBCommentAnalysis { ...@@ -107,7 +108,7 @@ public class QQKBCommentAnalysis {
} }
} }
public List<Map<String,Object>> getReplyCommentData(String cookie,String reply_id,String comment_id, String article_id) { public List<Map<String,Object>> getReplyCommentData(String cookie,String reply_id,String comment_id, String article_id,Proxy proxy) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie); Map<String,String> headerMap = HeadGet.getQQKBCommentHeaderMap(cookie);
try { try {
...@@ -115,7 +116,7 @@ public class QQKBCommentAnalysis { ...@@ -115,7 +116,7 @@ public class QQKBCommentAnalysis {
Map<String,Object> paramMap = HeadGet.getQQKBCommentReplyParamMap(null,comment_id, article_id, reply_id); Map<String,Object> paramMap = HeadGet.getQQKBCommentReplyParamMap(null,comment_id, article_id, reply_id);
while(true) { while(true) {
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsOrigReplyComment", headerMap, paramMap); String result = HttpClient.executeHttpRequestPost("http://r.cnews.qq.com/getQQNewsOrigReplyComment",proxy, headerMap, paramMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
if(json.getJSONObject("comments").getString("reply_list") == null) { if(json.getJSONObject("comments").getString("reply_list") == null) {
break; break;
......
...@@ -25,11 +25,11 @@ public class SouhuAccountAnalysis { ...@@ -25,11 +25,11 @@ public class SouhuAccountAnalysis {
* @param startTime * @param startTime
* @return * @return
*/ */
public List<Map<String,Object>> analysisData(JSONArray jsonArray) { public List<Map<String,Object>> analysisData(JSONArray jsonArray,String name) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
for(int i = 0;i < jsonArray.size();i++) { for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
Map<String,Object> map = parseHtmlByAccount(data); Map<String,Object> map = parseHtmlByAccount(data,name);
if(map != null) { if(map != null) {
dataList.add(map); dataList.add(map);
} }
...@@ -45,11 +45,12 @@ public class SouhuAccountAnalysis { ...@@ -45,11 +45,12 @@ public class SouhuAccountAnalysis {
* @param data * @param data
* @return * @return
*/ */
private static Map<String,Object> parseHtmlByAccount(JSONObject data) { private static Map<String,Object> parseHtmlByAccount(JSONObject data,String name) {
Map<String,Object> map = new HashMap<String, Object>(); Map<String,Object> map = new HashMap<String, Object>();
try { try {
String title = data.getString("title"); String title = data.getString("title");
map.put("title", URLDecoder.decode(title, "UTF-8")); map.put("title", URLDecoder.decode(title, "UTF-8"));
map.put("source", name);
String content = data.getString("brief"); String content = data.getString("brief");
map.put("content", URLDecoder.decode(content,"UTF-8")); map.put("content", URLDecoder.decode(content,"UTF-8"));
map.put("newsPv", data.getString("newsPv")); map.put("newsPv", data.getString("newsPv"));
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
...@@ -39,11 +40,11 @@ public class SouhuCommentAnalysis { ...@@ -39,11 +40,11 @@ public class SouhuCommentAnalysis {
return newurl; return newurl;
} }
public int getSouhuCommentCount(String url) { public int getSouhuCommentCount(String url,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null); Map<String,String> headerMap = HeadGet.getSouhuCommentHeaderMap(null);
int i; int i;
try { try {
String result = HttpClient.executeHttpRequestGet(url, headerMap); String result = HttpClient.executeHttpRequestGet(url,proxy, headerMap);
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
i = json.getJSONObject("jsonObject").getInteger("participation_sum"); i = json.getJSONObject("jsonObject").getInteger("participation_sum");
return i; return i;
......
...@@ -14,6 +14,7 @@ public class WangyiCommentAnalysis { ...@@ -14,6 +14,7 @@ public class WangyiCommentAnalysis {
private static Logger logger = LoggerFactory.getLogger(WangyiCommentAnalysis.class); private static Logger logger = LoggerFactory.getLogger(WangyiCommentAnalysis.class);
@SuppressWarnings("unchecked")
public List<Map<String,Object>> getWangyiCommentData(String result,List<String> idList) { public List<Map<String,Object>> getWangyiCommentData(String result,List<String> idList) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try { try {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
...@@ -41,12 +42,12 @@ public class YidianzixunCommentAnalysis { ...@@ -41,12 +42,12 @@ public class YidianzixunCommentAnalysis {
* @return * @return
*/ */
public List<Map<String, Object>> getrepliesData(Map<String, Object> map, String cookie, public List<Map<String, Object>> getrepliesData(Map<String, Object> map, String cookie,
Map<String, String> headerMap) { Map<String, String> headerMap,Proxy proxy) {
List<Map<String,Object>> replylists = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> replylists = new ArrayList<Map<String,Object>>();
try { try {
String replyurl = "http://www.yidianzixun.com/home/q/getmorereplies?comment_id=" String replyurl = "http://www.yidianzixun.com/home/q/getmorereplies?comment_id="
+ map.get("comment_id"); + map.get("comment_id");
String replyresult = HttpClient.executeHttpRequestGet(replyurl, headerMap); String replyresult = HttpClient.executeHttpRequestGet(replyurl,proxy, headerMap);
JSONObject replyjson = JSONObject.parseObject(replyresult); JSONObject replyjson = JSONObject.parseObject(replyresult);
JSONArray replyjsonArry = replyjson.getJSONObject("comment").getJSONArray("replies"); JSONArray replyjsonArry = replyjson.getJSONObject("comment").getJSONArray("replies");
for (int j = 0; j < replyjsonArry.size(); j++) { for (int j = 0; j < replyjsonArry.size(); j++) {
......
...@@ -18,7 +18,7 @@ public class AiqiyiByWordExample { ...@@ -18,7 +18,7 @@ public class AiqiyiByWordExample {
String[] words = word.split(","); String[] words = word.split(",");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w); List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
if(dataList != null && dataList.size() >= 1) { if(dataList != null && dataList.size() >= 1) {
bodyList.addAll(dataList); bodyList.addAll(dataList);
} }
......
...@@ -16,7 +16,7 @@ public class BaijiaAccountExample { ...@@ -16,7 +16,7 @@ public class BaijiaAccountExample {
String app_id = "1536766390576806"; String app_id = "1536766390576806";
String startTime = "2016-01-01 00:00:00"; String startTime = "2016-01-01 00:00:00";
//2017-11-30 17:48:17 //2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime); List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
...@@ -34,7 +34,7 @@ public class BaijiaAccountExample { ...@@ -34,7 +34,7 @@ public class BaijiaAccountExample {
String app_id = "b_1536766390576806"; String app_id = "b_1536766390576806";
String startTime = "2016-01-01 00:00:00"; String startTime = "2016-01-01 00:00:00";
//2017-11-30 17:48:17 //2017-11-30 17:48:17
List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(app_id,startTime); List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(app_id,startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
......
...@@ -39,7 +39,7 @@ public class DayuAccountExample { ...@@ -39,7 +39,7 @@ public class DayuAccountExample {
if(mid.length() < 1 && name.length() < 1) { if(mid.length() < 1 && name.length() < 1) {
continue; continue;
} }
List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null); List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null,null);
poi.exportExcel(path, name, headList, dataList); poi.exportExcel(path, name, headList, dataList);
} }
......
...@@ -14,7 +14,7 @@ public class DayuByWordExample { ...@@ -14,7 +14,7 @@ public class DayuByWordExample {
public void dayuByWordTest() { public void dayuByWordTest() {
String word = "沃尔玛"; String word = "沃尔玛";
List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word); List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null);
System.out.println(dataList.size()); System.out.println(dataList.size());
......
...@@ -10,7 +10,7 @@ public class DayuCommentCountExample { ...@@ -10,7 +10,7 @@ public class DayuCommentCountExample {
public void dayuCommentCountTest() { public void dayuCommentCountTest() {
String articleId = "6987993456991247474"; String articleId = "6987993456991247474";
int i = Dayu.getDayuCommentCount(articleId); int i = Dayu.getDayuCommentCount(articleId,null);
System.out.println(i); System.out.println(i);
} }
......
...@@ -32,7 +32,7 @@ public class DayuCommentExample { ...@@ -32,7 +32,7 @@ public class DayuCommentExample {
}else { }else {
articleId = url; articleId = url;
} }
List<Map<String,Object>> dataList = Dayu.getDayuCommentData(articleId); List<Map<String,Object>> dataList = Dayu.getDayuCommentData(articleId,null);
if(dataList.size() <= 0) { if(dataList.size() <= 0) {
urlList.add(url); urlList.add(url);
} }
......
...@@ -20,7 +20,7 @@ public class FenghuangAccountExample { ...@@ -20,7 +20,7 @@ public class FenghuangAccountExample {
for(int i = 0;i < ids.length;i++) { for(int i = 0;i < ids.length;i++) {
try { try {
String startTime = "2017-01-01 00:00:00"; //可为空 String startTime = "2017-01-01 00:00:00"; //可为空
List<Map<String,Object>> dataList = Fenghuang.getFenghuangAccountData(ids[i], startTime); List<Map<String,Object>> dataList = Fenghuang.getFenghuangAccountData(ids[i], startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
......
...@@ -19,7 +19,7 @@ public class FenghuangByWordExample { ...@@ -19,7 +19,7 @@ public class FenghuangByWordExample {
List<Map<String,Object>> listAll = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> listAll = new ArrayList<Map<String,Object>>();
for(String word : wordList) { for(String word : wordList) {
try { try {
List<Map<String,Object>> dataList = Fenghuang.getFenghuangByWord(word); List<Map<String,Object>> dataList = Fenghuang.getFenghuangByWord(word,null);
if(dataList != null && dataList.size() > 0) { if(dataList != null && dataList.size() > 0) {
listAll.addAll(dataList); listAll.addAll(dataList);
} }
......
...@@ -14,7 +14,7 @@ public class FenghuangCommentCountExample { ...@@ -14,7 +14,7 @@ public class FenghuangCommentCountExample {
String url = "http://wemedia.ifeng.com/40906977/wemedia.shtml"; String url = "http://wemedia.ifeng.com/40906977/wemedia.shtml";
//http://news.ifeng.com/a/20161229/50492484_0.shtml //http://news.ifeng.com/a/20161229/50492484_0.shtml
//http://wemedia.ifeng.com/4096977/wemedia.shtml //http://wemedia.ifeng.com/4096977/wemedia.shtml
Map<String,Object> map = Fenghuang.getFenghuangCommentCount(url); Map<String,Object> map = Fenghuang.getFenghuangCommentCount(url,null);
System.out.println(map.toString()); System.out.println(map.toString());
} }
......
...@@ -24,7 +24,7 @@ public class FenghuangCommentExample { ...@@ -24,7 +24,7 @@ public class FenghuangCommentExample {
try { try {
url = map1.get("url")+""; url = map1.get("url")+"";
List<Map<String,Object>> dataList = Fenghuang.getFenghuangCommentData(url); List<Map<String,Object>> dataList = Fenghuang.getFenghuangCommentData(url,null);
if(dataList.size() <= 0) { if(dataList.size() <= 0) {
urlList.add(url); urlList.add(url);
} }
......
...@@ -17,7 +17,7 @@ public class MeipaiByWordExample { ...@@ -17,7 +17,7 @@ public class MeipaiByWordExample {
String[] words = word.split(","); String[] words = word.split(",");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> dataList = Meipai.getMeipaiByWordData(w); List<Map<String,Object>> dataList = Meipai.getMeipaiByWordData(w,null);
if(dataList != null) { if(dataList != null) {
bodyList.addAll(dataList); bodyList.addAll(dataList);
} }
......
...@@ -31,7 +31,7 @@ public class MiaopaiByUrlExample { ...@@ -31,7 +31,7 @@ public class MiaopaiByUrlExample {
urlList.add(url); urlList.add(url);
ZhiWeiTools.sleep(5000); ZhiWeiTools.sleep(5000);
System.out.println(url); System.out.println(url);
Map<String,Object> dataMap = Miaopai.getMiaopaiDataByURL(url); Map<String,Object> dataMap = Miaopai.getMiaopaiDataByURL(url,null);
if(dataMap != null) { if(dataMap != null) {
bodyList.add(dataMap); bodyList.add(dataMap);
} }
......
...@@ -15,7 +15,7 @@ public class PearVideoByWordExample { ...@@ -15,7 +15,7 @@ public class PearVideoByWordExample {
public void pearVideoByWordTest() { public void pearVideoByWordTest() {
String word = "美食"; String word = "美食";
List<Map<String,Object>> bodyList = PearVideo.getPearVideoData(word); List<Map<String,Object>> bodyList = PearVideo.getPearVideoData(word,null);
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("time"); headList.add("time");
headList.add("title"); headList.add("title");
......
...@@ -24,7 +24,7 @@ public class QQAccountExample { ...@@ -24,7 +24,7 @@ public class QQAccountExample {
String child = map.get("帐号链接")+""; String child = map.get("帐号链接")+"";
// System.out.println(child.split("chlid=")[1]); // System.out.println(child.split("chlid=")[1]);
System.out.println((String)map.get("child")); System.out.println((String)map.get("child"));
List<Map<String,Object>> lists = QQKB.getQQAccountData((String)map.get("child"), cookie); List<Map<String,Object>> lists = QQKB.getQQAccountData((String)map.get("child"), cookie,null);
if(lists != null) { if(lists != null) {
for(Map<String,Object> map1 : lists) { for(Map<String,Object> map1 : lists) {
map1.put("name", map.get("呢称")); map1.put("name", map.get("呢称"));
......
...@@ -12,7 +12,7 @@ public class QQKBCommentCountExample { ...@@ -12,7 +12,7 @@ public class QQKBCommentCountExample {
String cookie = "phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"; String cookie = "phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0";
String url = ""; String url = "";
int i = QQKB.getCommentCount(cookie, url); int i = QQKB.getCommentCount(cookie, url,null);
System.out.println(i); System.out.println(i);
} }
......
...@@ -16,7 +16,7 @@ public class QQKBCommentExample { ...@@ -16,7 +16,7 @@ public class QQKBCommentExample {
public void qqkbCommentTest() { public void qqkbCommentTest() {
String url = "https://kuaibao.qq.com/s/20180116C0EA8G00"; String url = "https://kuaibao.qq.com/s/20180116C0EA8G00";
List<Map<String,Object>> dataList = QQKB.getQQKBCommentData(url); List<Map<String,Object>> dataList = QQKB.getQQKBCommentData(url,null);
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("reply_id"); //id headList.add("reply_id"); //id
headList.add("like"); //点赞数 headList.add("like"); //点赞数
......
...@@ -20,7 +20,7 @@ public class SoKuByWordExample { ...@@ -20,7 +20,7 @@ public class SoKuByWordExample {
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words ) { for(String w : words ) {
for(String t : types) { for(String t : types) {
List<Map<String,Object>> list = Soku.getSoKuByWordData(w, t); List<Map<String,Object>> list = Soku.getSoKuByWordData(w, t,null);
if(list != null && list.size() > 0) { if(list != null && list.size() > 0) {
bodyList.addAll(list); bodyList.addAll(list);
} }
......
...@@ -16,7 +16,7 @@ public class SouhuAccountExample { ...@@ -16,7 +16,7 @@ public class SouhuAccountExample {
@Test @Test
public void souhuAccountTest() { public void souhuAccountTest() {
List<Map<String,Object>> lists = Souhu.getSouHuAccountData("MjI5MzAyOTMyMEBzaW5hLnNvaHUuY29t","2016-01-01 00:00:00",false); List<Map<String,Object>> lists = Souhu.getSouHuAccountData("MjI5MzAyOTMyMEBzaW5hLnNvaHUuY29t","2016-01-01 00:00:00",false,null);
System.out.println(lists.size()); System.out.println(lists.size());
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
...@@ -26,6 +26,7 @@ public class SouhuAccountExample { ...@@ -26,6 +26,7 @@ public class SouhuAccountExample {
headList.add("comment"); headList.add("comment");
headList.add("tags"); headList.add("tags");
headList.add("newsid"); headList.add("newsid");
headList.add("source");
headList.add("newsPv"); headList.add("newsPv");
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\搜狐号历史文章-太保乱谈.xlsx", "太保乱谈", headList, lists); poi.exportExcel("D:\\crawlerdata\\搜狐号历史文章-太保乱谈.xlsx", "太保乱谈", headList, lists);
......
...@@ -11,7 +11,7 @@ public class SouhuCommentCountExample { ...@@ -11,7 +11,7 @@ public class SouhuCommentCountExample {
public void souhuCommentCountTest() { public void souhuCommentCountTest() {
String url = "https://www.sohu.com/a/210588884_267106?_f=index_news_7"; String url = "https://www.sohu.com/a/210588884_267106?_f=index_news_7";
int i = Souhu.getSouhuCommentCount(url); int i = Souhu.getSouhuCommentCount(url,null);
System.out.println(i); System.out.println(i);
} }
......
...@@ -25,7 +25,7 @@ public class SouhuCommentExample { ...@@ -25,7 +25,7 @@ public class SouhuCommentExample {
try { try {
url = map1.get("url")+""; url = map1.get("url")+"";
List<Map<String,Object>> dataList = Souhu.getSouhuCommentData(url); List<Map<String,Object>> dataList = Souhu.getSouhuCommentData(url,null);
if(dataList.size() <= 0) { if(dataList.size() <= 0) {
urlList.add(url); urlList.add(url);
} }
......
...@@ -11,7 +11,7 @@ public class TXNewsByWordExample { ...@@ -11,7 +11,7 @@ public class TXNewsByWordExample {
public static void main(String[] args) { public static void main(String[] args) {
String word = "唐嫣"; String word = "唐嫣";
List<Map<String,Object>> dataList = TXNews.getData(word); List<Map<String,Object>> dataList = TXNews.getData(word,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
......
...@@ -10,7 +10,7 @@ public class WangyiCommentCountExample { ...@@ -10,7 +10,7 @@ public class WangyiCommentCountExample {
public void wangyiCommentCountTest() { public void wangyiCommentCountTest() {
String id = "D77CENT50001875P"; String id = "D77CENT50001875P";
int i = Wangyi.getWangyiCommentCount(id); int i = Wangyi.getWangyiCommentCount(id,null);
System.out.println(i); System.out.println(i);
} }
......
...@@ -22,7 +22,7 @@ public class WangyiCommentExample { ...@@ -22,7 +22,7 @@ public class WangyiCommentExample {
for(String url : urlList) { for(String url : urlList) {
String id = url.split("a/")[1].split(".ht")[0]; String id = url.split("a/")[1].split(".ht")[0];
List<Map<String,Object>> lists = Wangyi.getWangyiCommentData(id); List<Map<String,Object>> lists = Wangyi.getWangyiCommentData(id,null);
System.out.println(lists.size()); System.out.println(lists.size());
if(lists != null) { if(lists != null) {
bodyList.addAll(lists); bodyList.addAll(lists);
......
...@@ -15,7 +15,7 @@ public class XiaomiShequByWordExample { ...@@ -15,7 +15,7 @@ public class XiaomiShequByWordExample {
String[] words = word.split(","); String[] words = word.split(",");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> dataList = Xiaomi.getXiaomiByWordData(w); List<Map<String,Object>> dataList = Xiaomi.getXiaomiByWordData(w,null);
if(dataList != null && dataList.size() > 0) { if(dataList != null && dataList.size() > 0) {
bodyList.addAll(dataList); bodyList.addAll(dataList);
} }
......
...@@ -23,7 +23,7 @@ public class XiguaAccountExample { ...@@ -23,7 +23,7 @@ public class XiguaAccountExample {
for(Map<String,Object> map1 : lists ) { for(Map<String,Object> map1 : lists ) {
String url = map1.get("主页")+""; String url = map1.get("主页")+"";
if(url != null && url.length() > 5) { if(url != null && url.length() > 5) {
List<Map<String,Object>> lists1 = XiGua.getXiguaAccountData(url,startTime); List<Map<String,Object>> lists1 = XiGua.getXiguaAccountData(url,startTime,null);
if(lists1 != null && lists.size() > 0) { if(lists1 != null && lists.size() > 0) {
bodyList.addAll(lists1); bodyList.addAll(lists1);
} }
......
...@@ -19,7 +19,7 @@ public class XiguaByWordExample { ...@@ -19,7 +19,7 @@ public class XiguaByWordExample {
String[] words = word.split(","); String[] words = word.split(",");
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String w : words) { for(String w : words) {
List<Map<String,Object>> list = XiGua.getXiguaVideoByWordData(w); List<Map<String,Object>> list = XiGua.getXiguaVideoByWordData(w,null);
if(list != null && list.size() > 0) { if(list != null && list.size() > 0) {
bodyList.addAll(list); bodyList.addAll(list);
} }
......
...@@ -18,7 +18,7 @@ public class YidainzixunByWordExample { ...@@ -18,7 +18,7 @@ public class YidainzixunByWordExample {
List<String> wordList = WordReadFile.getWords("D://crawlerdata/关键词.txt"); List<String> wordList = WordReadFile.getWords("D://crawlerdata/关键词.txt");
List<Map<String,Object>> listAll = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> listAll = new ArrayList<Map<String,Object>>();
for(String word : wordList) { for(String word : wordList) {
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunDataByWord(word); List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunDataByWord(word,null);
System.out.println(dataList.size()); System.out.println(dataList.size());
listAll.addAll(dataList); listAll.addAll(dataList);
System.out.println(listAll.size()); System.out.println(listAll.size());
......
...@@ -16,7 +16,7 @@ public class YidianzixunAccountExample { ...@@ -16,7 +16,7 @@ public class YidianzixunAccountExample {
public void yidianzixunAccountTest() { public void yidianzixunAccountTest() {
String channelid = "m143901"; String channelid = "m143901";
String startTime = "2017-01-01 00:00:00"; String startTime = "2017-01-01 00:00:00";
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunAccountData(channelid, startTime); List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunAccountData(channelid, startTime,null);
PoiExcelUtil poi = PoiExcelUtil.getInstance(); PoiExcelUtil poi = PoiExcelUtil.getInstance();
List<String> headList = new ArrayList<String>(); List<String> headList = new ArrayList<String>();
headList.add("title"); headList.add("title");
......
...@@ -12,7 +12,7 @@ public class YidianzixunCommentExample { ...@@ -12,7 +12,7 @@ public class YidianzixunCommentExample {
@Test @Test
public void yidianzixunCommentTest() { public void yidianzixunCommentTest() {
String url = "http://www.yidianzixun.com/article/0ILHigvv"; String url = "http://www.yidianzixun.com/article/0ILHigvv";
List<Map<String,Object>> lists = Yidianzixun.getYidianzixunCommentData(url); List<Map<String,Object>> lists = Yidianzixun.getYidianzixunCommentData(url,null);
System.out.println(lists.size()); System.out.println(lists.size());
for(Map<String,Object> map : lists) { for(Map<String,Object> map : lists) {
System.out.println(map.toString()); System.out.println(map.toString());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment