Commit 7e601698 by yangchen

修改聚投诉采集规则(增加referer 和cookie传入 不传如cookie 只能采集10页)

parent 31b11eff
package com.zhiwei.parse; package com.zhiwei.parse;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
...@@ -21,18 +23,26 @@ public class Ts21cn { ...@@ -21,18 +23,26 @@ public class Ts21cn {
private static Ts21cnAnalysis ts21cnAnalysis = new Ts21cnAnalysis(); private static Ts21cnAnalysis ts21cnAnalysis = new Ts21cnAnalysis();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(2).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(2).build();
public static List<Map<String,Object>> getdata(String word,ProxyHolder proxy,String endTime) { public static List<Map<String,Object>> getdata(String word,String cookie,ProxyHolder proxy,String endTime) {
int page = 1; int page = 1;
int count1 = 1; int count1 = 1;
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
Map<String, Object> headers = new HashMap<>();
try {
headers.put("referer", "https://ts.21cn.com/home/search?keyword=" + URLEncoder.encode(word, "utf-8"));
headers.put("host", "ts.21cn.com");
headers.put("cookie", cookie);
} catch (UnsupportedEncodingException e1) {
e1.printStackTrace();
}
while(true) { while(true) {
try { try {
if(count1 > 3) { if(count1 > 3) {
break; break;
} }
String url = "http://ts.21cn.com/front/api/search/searchPostList.do?title="+URLEncoder.encode(word, "utf-8")+"&listType=1&pageNo="; String url = "http://ts.21cn.com/front/api/search/searchPostList.do?title="+URLEncoder.encode(word, "utf-8")+"&listType=1&pageNo=";
ZhiWeiTools.sleep(100); ZhiWeiTools.sleep(1000);
String result = httpBoot.syncCall(RequestUtils.wrapGet(url+page), proxy).body().string(); String result = httpBoot.syncCall(RequestUtils.wrapGet(url+page,headers), proxy).body().string();
int count = JSONObject.parseObject(result).getInteger("count"); int count = JSONObject.parseObject(result).getInteger("count");
List<Map<String,Object>> bodyList = ts21cnAnalysis.getdata(result,endTime); List<Map<String,Object>> bodyList = ts21cnAnalysis.getdata(result,endTime);
if(bodyList.isEmpty() || dataList.size() > count) { if(bodyList.isEmpty() || dataList.size() > count) {
......
package com.zhiwei.parse.analysis; package com.zhiwei.parse.analysis;
import static java.util.Objects.isNull;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
import java.util.ArrayList; import java.util.ArrayList;
...@@ -28,7 +29,7 @@ public class Ts21cnAnalysis { ...@@ -28,7 +29,7 @@ public class Ts21cnAnalysis {
for(int i = 0;i < jsonArray.size();i++) { for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
String ctime = data.getString("ctimeStr"); String ctime = data.getString("ctimeStr");
if(!nonNull(time) || ctime.compareTo(time) <= 0) { if(nonNull(time) && ctime.compareTo(time) > -1) {
continue; continue;
} }
Map<String,Object> map = new HashMap<>(); Map<String,Object> map = new HashMap<>();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment