Commit e5fa93cf by cwy

客户端采集修改为 外部控制翻页

parent b60d852a
......@@ -423,73 +423,66 @@ public class TouTiaoArticleParse {
* @param maxBehotTime
* @return
*/
public static List<Map<String,Object>> getClientMicroToutiaoCrawler(String userId, ProxyHolder proxy,
public static Map<String, Object> getClientMicroToutiaoCrawler(String userId, ProxyHolder proxy,
Long maxBehotTime) {
Map<String, Object> rMap = new HashMap<>();
List<Map<String,Object>> dataList = new ArrayList<>();
String ma = "";
while(true) {
String url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="+userId+"&stream_api_version=82&offset="+maxBehotTime+"&version_code=7.5.3&version_name=70503&user_id="+userId+"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article";
ma = String.valueOf(maxBehotTime);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
maxBehotTime = json.getLongValue("offset");
JSONArray jsonArray = json.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
JSONObject dataJSON = data.getJSONObject("content");
if(Objects.nonNull(dataJSON.getLongValue("id"))) {
Map<String,Object> map = new HashMap<>();
String source = null;
Date date = null;
String content = null;
String title = null;
if(Objects.nonNull(dataJSON.getJSONObject("user_info"))) {
source = dataJSON.getJSONObject("user_info").getString("name");
userId = dataJSON.getJSONObject("user_info").getString("user_id");
date = new Date(dataJSON.getLongValue("publish_time") * 1000);
content = dataJSON.getString("abstract");
title = dataJSON.getString("title");
map.put("type", "文章");
}
if(Objects.nonNull(dataJSON.getJSONObject("user"))) {
source = dataJSON.getJSONObject("user").getString("name");
userId = dataJSON.getJSONObject("user").getString("user_id");
date = new Date(dataJSON.getLongValue("create_time") * 1000);
content = dataJSON.getString("content");
map.put("type", "微头条");
}
if(Objects.nonNull(source)) {
String href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id") + "/";
String readNum = String.valueOf(dataJSON.getInteger("read_count"));
String commentNum = String.valueOf(dataJSON.getInteger("comment_count"));
map.put("title", title);
map.put("time", date);
map.put("href", href);
map.put("source", source);
map.put("content", content);
map.put("readNum", readNum);
map.put("commentNum", commentNum);
map.put("user_id", userId);
System.out.println(map.toString());
dataList.add(map);
}
String url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="+userId+"&stream_api_version=82&offset="+maxBehotTime+"&version_code=7.5.3&version_name=70503&user_id="+userId+"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article";
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
maxBehotTime = json.getLongValue("offset");
JSONArray jsonArray = json.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
JSONObject dataJSON = data.getJSONObject("content");
if(Objects.nonNull(dataJSON.getLongValue("id"))) {
Map<String,Object> map = new HashMap<>();
String source = null;
Date date = null;
String content = null;
String title = null;
if(Objects.nonNull(dataJSON.getJSONObject("user_info"))) {
source = dataJSON.getJSONObject("user_info").getString("name");
userId = dataJSON.getJSONObject("user_info").getString("user_id");
date = new Date(dataJSON.getLongValue("publish_time") * 1000);
content = dataJSON.getString("abstract");
title = dataJSON.getString("title");
map.put("type", "文章");
}
if(Objects.nonNull(dataJSON.getJSONObject("user"))) {
source = dataJSON.getJSONObject("user").getString("name");
userId = dataJSON.getJSONObject("user").getString("user_id");
date = new Date(dataJSON.getLongValue("create_time") * 1000);
content = dataJSON.getString("content");
map.put("type", "微头条");
}
if(Objects.nonNull(source)) {
String href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id") + "/";
String readNum = String.valueOf(dataJSON.getInteger("read_count"));
String commentNum = String.valueOf(dataJSON.getInteger("comment_count"));
map.put("title", title);
map.put("time", date);
map.put("href", href);
map.put("source", source);
map.put("content", content);
map.put("readNum", readNum);
map.put("commentNum", commentNum);
map.put("user_id", userId);
dataList.add(map);
}
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println(" 采集到 条 == "+dataList.size() + " -- " +ma + " -- " + maxBehotTime);
if(ma.equals(String.valueOf(maxBehotTime))) {
break;
}
} catch (Exception e) {
logger.info("客户端微头条采集错误 {}",e);
}
}
return dataList;
rMap.put("data", dataList);
rMap.put("maxBehotTime", maxBehotTime);
} catch (Exception e) {
logger.info("客户端微头条采集错误 {}",e);
}
return rMap;
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment