Commit c514570f by yangchen

部分平台添加

parent 3e3ea4d9
package com.zhiwei.httpclient; package com.zhiwei.httpclient;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.SocketAddress;
import java.net.URLEncoder;
import java.net.Proxy.Type;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.jsoup.nodes.Document;
public class HeadGet { public class HeadGet {
/** /**
...@@ -60,7 +68,7 @@ public class HeadGet { ...@@ -60,7 +68,7 @@ public class HeadGet {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public static Map<String,String> getYidianzixunAccountHeaderMap(String cookie) { public static Map<String,String> getYidianzixunAccountHeaderMap(String cookie,String referer) {
Map<String, String> headerMap = new HashMap<String, String>(); Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent", headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"); "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
...@@ -69,6 +77,7 @@ public class HeadGet { ...@@ -69,6 +77,7 @@ public class HeadGet {
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9"); headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive"); headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "www.yidianzixun.com"); headerMap.put("Host", "www.yidianzixun.com");
headerMap.put("referer", referer);
if(cookie != null) { if(cookie != null) {
headerMap.put("Cookie", cookie); headerMap.put("Cookie", cookie);
} }
...@@ -793,14 +802,89 @@ public class HeadGet { ...@@ -793,14 +802,89 @@ public class HeadGet {
return headerMap; return headerMap;
} }
public static void main(String[] args) { public static Map<String,String> getMaimaiKeywordHeaderMap(String cookie) {
String url = "http://dy.163.com/v2/article/detail/CK4OE81O0512974K.html"; Map<String,String> headerMap = new HashMap<String, String>();
// String cookie = "pgv_pvi=1395917824; pgv_si=s4065829888"; headerMap.put("User-Agent",
Map<String,String> headerMap = HeadGet.getWangyiHistoryHeaderMap(null); "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
// Map<String,Object> paramMap = HeadGet.getTxNewsAccountpageParamMap("1979"); headerMap.put("Accept",
"*/*");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,String> getSoubaoHeaderMap(String cookie) {
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36");
headerMap.put("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "www.soubao.net");
headerMap.put("Referer", "http://www.soubao.net/");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,Object> getSoubaoParamMap(String word, int page,Document doc,String start,String end) {
Map<String,Object> param = new HashMap<String,Object>();
param.put("__VIEWSTATE", doc.select("#__VIEWSTATE").attr("value"));
param.put("__VIEWSTATEGENERATOR", doc.select("#__VIEWSTATEGENERATOR").attr("value"));
param.put("__EVENTTARGET", "AspNetPager1");
param.put("__EVENTARGUMENT", page);
param.put("__EVENTVALIDATION", doc.select("#__EVENTVALIDATION").attr("value"));
param.put("HidTimeSelect", "custom");
param.put("HiddenMsg", "default");
param.put("txtKeyword", word);
param.put("checkNum", "");
param.put("timesel", "on");
param.put("txtStartDate", start);
param.put("txtEndDate", end);
return param;
}
public static Map<String,String> getZhihuXiangfaHeaderMap(String cookie) {
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"ZhihuHybrid osee2unifiedRelease/980 osee2unifiedReleaseVersion/4.18.1 Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153");
// headerMap.put("Accept", "application/json, text/plain, */*");
// headerMap.put("Accept-Language", "zh-cn");
// headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "www.zhihu.com");
headerMap.put("Referer", "https://www.zhihu.com/appview/search/general?config=%7B%22search_tab%22:%22collapsed%22,%22is_dark_theme%22:false%7D");
headerMap.put("x-app-za", "OS=iOS&Release=11.2.1&Model=iPhone8,1&VersionName=4.18.1&VersionCode=980&Width=750&Height=1334&DeviceType=Phone&Brand=Apple&OperatorType=46002");
headerMap.put("x-api-version", "3.0.91");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static Map<String,String> getDouyinHotHeaderMap(String cookie) {
Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("User-Agent",
"Aweme/1.9.0 (iPhone; iOS 11.2.1; Scale/2.00)");
headerMap.put("Accept-Language", "zh-cn");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "aweme.snssdk.com");
if(cookie != null) {
headerMap.put("Cookie", cookie);
}
return headerMap;
}
public static void main(String[] args) throws UnsupportedEncodingException {
String url = "https://aweme.snssdk.com/aweme/v1/comment/list/?iid=36454376501&device_id=47835193298&os_api=18&app_name=aweme&channel=App%20Store&idfa=FE659B7E-5104-44C2-8A31-F88DEE7A2747&device_platform=iphone&build_number=19007&vid=E66B8A7B-F8E3-4ED2-BA42-D6D8EFAD0A3F&openudid=fa9701d8e1a8892e777693ba287551c226006542&device_type=iPhone8,1&app_version=1.9.0&version_code=1.9.0&os_version=11.2.1&screen_width=750&aid=1128&ac=WIFI&aweme_id=6570655003491437837&comment_style=2&count=20&cursor=0&digged_cid=&mas=006ecc6faa02e1374a12bc4c9a0368283d6a1f2412878507cf4eb8";
System.out.println(url);
String cookie = "__utma=51854390.454838676.1510118174.1528502507.1529542643.327; __utmb=51854390.0.10.1529542643; __utmc=51854390; __utmv=51854390.010--; __utmz=51854390.1510118174.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _zap=4e09de9f-d212-48b9-af42-9173e1092406; d_c0=ACACkXJlZQxLBTcU1Z70bp9TpD_qDHF6sAY=|1529542618; q_c0=2|1:0|10:1528360082|4:q_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|c6f0a98f0d30bdfda124c276e9e88ce945dcf50103663cedeab1be9415a42101; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000; z_c0=2|1:0|10:1528360082|4:z_c0|80:MS4xX0RmNkJRQUFBQUFMQUFBQVlBSlZUWkY1UUZzOXFnWVlQOWRXTVkxNG5kcl80WjJkdzhiTDN3PT0=|021aae122258a6476003fd206423140b8c2ee162e406b628051471c64021b211; zst_82=1.0ANCm7PYOyA0LAAAASwUAADEuMDv9KlsAAAAAYiqmK0gPDsX0FVknWJlXcKc07NA=; __DAYU_PP=VFZF3fmJavniAMQf2fnf23a6dd5221ec; q_c1=2d80752a333f4fcd99b1362dfce3e7eb|1529542724000|1506556052000";
Map<String,String> headerMap = HeadGet.getDouyinHotHeaderMap(null);
String result = HttpClient.executeHttpRequestGet(url,null, headerMap); String result = HttpClient.executeHttpRequestGet(url,null, headerMap);
System.out.println(result); System.out.println(result);
System.out.println(result.length()); System.out.println(result.length());
} }
} }
package com.zhiwei.parse;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.DouyinHotDataAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class Douyin {
private static Logger logger = LoggerFactory.getLogger(Douyin.class);
private static DouyinHotDataAnalysis douyinHotDataAnalysis = new DouyinHotDataAnalysis();
/**
*
* @Description 获取抖音热门数据
* @param url
* @return
*/
public static List<Map<String,Object>> getDouyinHotData(String url,Proxy proxy) {
String iid = url.split("iid=")[1].split("&")[0];
String ch_id = url.split("challenge/")[1].split("\\?")[0];
Map<String,String> headerMap = HeadGet.getDouyinHotHeaderMap(null);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
String cursor = "0";
while(true) {
try {
String urls = "https://aweme.snssdk.com/aweme/v1/challenge/aweme/?iid="+iid+"&device_id=47835193298&aid=1128&ac=WIFI&ch_id="+ch_id+"&count=18&pull_type=2&query_type=0&type=5&cursor="+cursor;
String result = HttpClient.executeHttpRequestGet(urls, proxy, headerMap);
System.out.println(result);
Map<String,Object> ma = douyinHotDataAnalysis.getData(result);
if(ma != null) {
dataList.addAll((List<Map<String,Object>>) ma.get("data"));
if(!(boolean) ma.get("status")) {
break;
}
cursor = ma.get("cursor")+"";
}
logger.info("采集到的数据总量---"+dataList.size());
ZhiWeiTools.sleep(4000);
} catch (Exception e) {
continue;
}
}
return dataList;
}
public static List<Map<String,Object>> getDouyinCommentData(String url,Proxy proxy) {
return null;
}
}
package com.zhiwei.parse;
import java.net.Proxy;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.parse.analysis.MaimaiBywordAnalysis;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class Maimai {
private static Logger logger = LoggerFactory.getLogger(Maimai.class);
private static MaimaiBywordAnalysis maimaiBywordAnalysis = new MaimaiBywordAnalysis();
public static List<Map<String,Object>> getData(String key,String cookie,String time,Proxy proxy) {
Map<String,String> headerMap = HeadGet.getMaimaiKeywordHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
boolean f = true;
try {
String url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1";
int i = 20;
while(f) {
String result = HttpClient.executeHttpRequestGet(url, proxy, headerMap);
Map<String,Object> map = maimaiBywordAnalysis.getData(result, time);
f = (boolean) map.get("hasMore");
List<Map<String,Object>> daList = (List<Map<String, Object>>) map.get("data");
if(daList != null && daList.size() > 0) {
dataList.addAll(daList);
url = "https://maimai.cn/search/feeds?query="+URLEncoder.encode(key, "utf-8")+"&limit=20&offset="+i+"&highlight=true&sortby=time&jsononly=1";
i+=20;
logger.info("{}==采集到的数据量=="+dataList.size(),key);
ZhiWeiTools.sleep(2000);
}else {
break;
}
}
} catch (Exception e) {
}
return dataList;
}
}
package com.zhiwei.parse;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.httpclient.HeadGet;
import com.zhiwei.httpclient.HttpClient;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SouBao {
public static void main(String[] args) {
List<String> wordList = getWords("D:\\crawlerdata\\关键词.txt");
Map<String,String> map1 = getdata();
String cookie = "UM_distinctid=163edb1f5e369-014b755d3bd662-6f14167a-1fa400-163edb1f5e648c; ASP.NET_SessionId=zy45xibjfmchosyskjqznwz0; CNZZDATA4625144=cnzz_eid%3D240947030-1528700717-%26ntime%3D1528965485; CNZZDATA1260939784=1605411930-1528965615-http%253A%252F%252Fwww.cnepaper.com%252F%7C1528965615";
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for(String word : wordList) {
ZhiWeiTools.sleep(2000);
List<Map<String,Object>> dataList = getData(word, cookie,"2018-06-11","2018-06-12");
for(Map<String,Object> map : dataList) {
boolean f = false;
String time = (String) map.get("time");
String w = (String) map.get("word");
String[] words = w.split(" ");
String matchContent = (String) map.get("title") + "_" + (String) map.get("content");
if("20160101".equals(time)) {
continue;
}
for(int i=0;i<words.length;i++){
if(matchContent.toLowerCase().contains(words[i].toLowerCase()))
{
f = true;
}else{
f = false;
break;
}
}
if(f){
System.out.println(map.toString());
map.put("品牌", map1.get(w));
bodyList.add(map);
}
}
}
List<String> headList = new ArrayList<String>();
headList.add("title");
headList.add("time");
headList.add("content");
headList.add("source");
headList.add("url");
headList.add("word");
headList.add("品牌");
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi.exportExcel("D:\\crawlerdata\\搜报网-EA 品牌 关键词-06.11-06.12.xlsx", "sa", headList, bodyList);
}
public static Map<String,String> getdata() {
Map<String,String> map = new HashMap<String,String>();
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map<String,Object> m = poi.importExcel("D:\\crawlerdata\\品牌区分.xlsx", 0);
List<Map<String,Object>> l = (List<Map<String, Object>>) m.get("body");
for(Map<String,Object> mm : l) {
map.put((String)mm.get("关键词"), (String)mm.get("品牌"));
}
System.out.println(map.toString());
return map;
}
public static List<String> getWords(String wordFileName) {
List<String> list = null;
try {
list = new ArrayList<String>();
BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(wordFileName),"GBK"));
String line = "";
while((line = br.readLine())!=null)
{
if(line.length() >= 1) {
list.add(line);
}
}
br.close();
return list;
} catch (IOException e) {
return null;
}
}
public static List<Map<String,Object>> getData(String word,String cookie,String start,String end) {
Map<String,String> headerMap = HeadGet.getSoubaoHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
try {
String url = "http://www.soubao.net/search/searchList.aspx?keyword="+URLEncoder.encode(word,"utf-8")+"&startdate="+start+"&enddate="+end+"&timesel=custom&checkNum=";
String result = HttpClient.executeHttpRequestGet(url, null, headerMap);
Document doc = Jsoup.parse(result);
Elements elements = doc.select("div#srh_main").select("ul");
for(Element element : elements) {
Map<String,Object> map = new HashMap<String,Object>();
map.put("title", element.select("h2").select("a").text());
map.put("content", element.select("p.newCon").text());
map.put("source", element.select("p.newsInfo").select("em.paperName").select("span").text());
map.put("time", element.select("p.newsInfo").select("em.postDate").select("span").text());
map.put("url","http://www.soubao.net" + element.select("h2").select("a").attr("href"));
map.put("word", word);
System.out.println(map.toString());
dataList.add(map);
}
if(elements.size() < 10) {
return dataList;
}
dataList.addAll(gettwoData(word, doc, cookie, start, end));
System.out.println("=================================");
ZhiWeiTools.sleep(2000);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return dataList;
}
public static List<Map<String,Object>> gettwoData(String word,Document doc,String cookie,String start,String end) {
Map<String, String> headerMap = HeadGet.getSoubaoHeaderMap(cookie);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int i = 2;
while(true) {
try {
Map<String,Object> paramMap = HeadGet.getSoubaoParamMap(word, i, doc, start, end);
String result = HttpClient.executeHttpRequestPost("http://www.soubao.net/search/searchList.aspx", null, headerMap, paramMap);
doc = null;
doc = Jsoup.parse(result);
Elements elements = doc.select("div#srh_main").select("ul");
for(Element element : elements) {
Map<String,Object> map = new HashMap<String,Object>();
map.put("title", element.select("h2").select("a").text());
map.put("content", element.select("p.newCon").text());
map.put("source", element.select("p.newsInfo").select("em.paperName").select("span").text());
map.put("time", element.select("p.newsInfo").select("em.postDate").select("span").text());
map.put("url","http://www.soubao.net" + element.select("h2").select("a").attr("href"));
map.put("word", word);
System.out.println(map.toString());
dataList.add(map);
}
if(elements.size() < 10) {
return dataList;
}
System.out.println("=================================");
ZhiWeiTools.sleep(2000);
i++;
} catch (Exception e) {
// TODO: handle exception
}
}
}
}
...@@ -79,7 +79,7 @@ public class Wangyi { ...@@ -79,7 +79,7 @@ public class Wangyi {
String source = Jsoup.parse(result).select("body > div.colum_wrap.fl > div > div.colum_des > div.normal > div.colum_info > h4").text(); String source = Jsoup.parse(result).select("body > div.colum_wrap.fl > div > div.colum_des > div.normal > div.colum_info > h4").text();
boolean f = true; boolean f = true;
url = "http://dy.163.com/v2/article/list.do?wemediaId="+wemediaid+"&size=20&pageNo="; url = "http://dy.163.com/v2/article/list.do?wemediaId="+wemediaid+"&size=20&pageNo=";
int i = 1; int i = 20;
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
while(f) { while(f) {
try { try {
...@@ -91,6 +91,9 @@ public class Wangyi { ...@@ -91,6 +91,9 @@ public class Wangyi {
break; break;
} }
bodyList.addAll(dataList); bodyList.addAll(dataList);
if(i == 40) {
break;
}
logger.info("数据采集第{}页;目前采集到数据{}条",i,bodyList.size()); logger.info("数据采集第{}页;目前采集到数据{}条",i,bodyList.size());
f = json.getJSONObject("data").getBoolean("hasNext"); f = json.getJSONObject("data").getBoolean("hasNext");
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
......
...@@ -31,8 +31,8 @@ public class Yidianzixun { ...@@ -31,8 +31,8 @@ public class Yidianzixun {
* @param startTime * @param startTime
* @return * @return
*/ */
public static List<Map<String,Object>> getYidianzixunAccountData(String channelid,String startTime,Proxy proxy) { public static List<Map<String,Object>> getYidianzixunAccountData(String channelid,String startTime,Proxy proxy,String cookie) {
Map<String,String> headerMap = HeadGet.getYidianzixunAccountHeaderMap(null); Map<String,String> headerMap = HeadGet.getYidianzixunAccountHeaderMap(cookie,"http://www.yidianzixun.com/channel/"+channelid);
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
int j = 0; int j = 0;
boolean f = true; boolean f = true;
......
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public class DouyinHotDataAnalysis {
private static Logger logger = LoggerFactory.getLogger(DouyinHotDataAnalysis.class);
public Map<String,Object> getData(String result) {
try {
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArray = json.getJSONArray("aweme_list");
List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
for(int i = 0;i < jsonArray.size();i++) {
Map<String,Object> map = new HashMap<String,Object>();
JSONObject data = jsonArray.getJSONObject(i);
map.put("time", TimeParse.dateFormartString(new Date(data.getLong("create_time")*1000L), "yyyy-MM-dd HH:mm:ss"));
map.put("url", data.getString("share_url"));
map.put("author", data.getJSONObject("author").getString("nickname"));
map.put("text", data.getString("desc"));
map.put("like_count", data.getJSONObject("statistics").getInteger("digg_count"));
map.put("comment_count", data.getJSONObject("statistics").getInteger("comment_count"));
map.put("share_count", data.getJSONObject("statistics").getInteger("share_count"));
// System.out.println(map.toString());
list.add(map);
}
Map<String,Object> ma = new HashMap<String,Object>();
ma.put("data", list);
if(list.size() < 1) {
ma.put("status", false);
}else {
ma.put("status", true);
}
ma.put("cursor", json.getString("cursor"));
return ma;
} catch (Exception e) {
return null;
}
}
}
package com.zhiwei.parse.analysis;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class MaimaiBywordAnalysis {
private static Logger logger = LoggerFactory.getLogger(MaimaiBywordAnalysis.class);
public Map<String,Object> getData(String result,String time) {
Map<String,Object> map1 = new HashMap<String,Object>();
JSONObject json = JSONObject.parseObject(result);
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("feeds");
boolean f = true;
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
f = json.getJSONObject("data").getInteger("more")==1?true:false;
for(int i = 0;i < jsonArry.size();i++) {
JSONObject data = jsonArry.getJSONObject(i);
Map<String,Object> map = new HashMap<String,Object>();
String url = "https://maimai.cn/article/detail?fid="+data.getJSONObject("feed").getString("id");
String atime = data.getJSONObject("feed").getString("crtime_string");
if(time.compareTo(atime) > -1) {
f = false;
continue;
}
map.put("time", atime);
map.put("url", url);
map.put("text", data.getJSONObject("feed").getString("text"));
map.put("name", data.getJSONObject("contact").getString("name"));
map.put("like", data.getJSONObject("feed").getInteger("likes"));
map.put("comment_count", data.getJSONObject("feed").getInteger("total_cnt"));
map.put("spreads", data.getJSONObject("feed").getInteger("spreads")); //传播数
dataList.add(map);
}
map1.put("data", dataList);
map1.put("hasMore", f);
return map1;
}
}
...@@ -44,7 +44,6 @@ public class WangyiHistoryAnalysis { ...@@ -44,7 +44,6 @@ public class WangyiHistoryAnalysis {
map.put("content", doc.select("div.content").text()); map.put("content", doc.select("div.content").text());
map.put("url", url); map.put("url", url);
map.put("source", source); map.put("source", source);
System.out.println(map.toString());
dataList.add(map); dataList.add(map);
ZhiWeiTools.sleep(1000); ZhiWeiTools.sleep(1000);
} catch (Exception e) { } catch (Exception e) {
......
log4j.rootLogger=INFO,stdout,ROLLING_FILE
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE=org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE.Threshold=stdout
log4j.appender.ROLLING_FILE.File=./Log/wechatcrawler.log
log4j.appender.ROLLING_FILE.Append=true
log4j.appender.ROLLING_FILE.layout=org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout.ConversionPattern=<%d>[%5p] %c - %m%n
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment