Commit e5fa93cf by cwy

客户端采集修改为 外部控制翻页

parent b60d852a
...@@ -423,73 +423,66 @@ public class TouTiaoArticleParse { ...@@ -423,73 +423,66 @@ public class TouTiaoArticleParse {
* @param maxBehotTime * @param maxBehotTime
* @return * @return
*/ */
public static List<Map<String,Object>> getClientMicroToutiaoCrawler(String userId, ProxyHolder proxy, public static Map<String, Object> getClientMicroToutiaoCrawler(String userId, ProxyHolder proxy,
Long maxBehotTime) { Long maxBehotTime) {
Map<String, Object> rMap = new HashMap<>();
List<Map<String,Object>> dataList = new ArrayList<>(); List<Map<String,Object>> dataList = new ArrayList<>();
String ma = ""; String url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="+userId+"&stream_api_version=82&offset="+maxBehotTime+"&version_code=7.5.3&version_name=70503&user_id="+userId+"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article";
while(true) { try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
String url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="+userId+"&stream_api_version=82&offset="+maxBehotTime+"&version_code=7.5.3&version_name=70503&user_id="+userId+"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article"; String result = response.body().string();
ma = String.valueOf(maxBehotTime); JSONObject json = JSONObject.parseObject(result);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){ maxBehotTime = json.getLongValue("offset");
String result = response.body().string(); JSONArray jsonArray = json.getJSONArray("data");
JSONObject json = JSONObject.parseObject(result); for (int i = 0; i < jsonArray.size(); i++) {
maxBehotTime = json.getLongValue("offset"); try {
JSONArray jsonArray = json.getJSONArray("data"); JSONObject data = jsonArray.getJSONObject(i);
for (int i = 0; i < jsonArray.size(); i++) { JSONObject dataJSON = data.getJSONObject("content");
try { if(Objects.nonNull(dataJSON.getLongValue("id"))) {
JSONObject data = jsonArray.getJSONObject(i); Map<String,Object> map = new HashMap<>();
JSONObject dataJSON = data.getJSONObject("content"); String source = null;
if(Objects.nonNull(dataJSON.getLongValue("id"))) { Date date = null;
Map<String,Object> map = new HashMap<>(); String content = null;
String source = null; String title = null;
Date date = null; if(Objects.nonNull(dataJSON.getJSONObject("user_info"))) {
String content = null; source = dataJSON.getJSONObject("user_info").getString("name");
String title = null; userId = dataJSON.getJSONObject("user_info").getString("user_id");
if(Objects.nonNull(dataJSON.getJSONObject("user_info"))) { date = new Date(dataJSON.getLongValue("publish_time") * 1000);
source = dataJSON.getJSONObject("user_info").getString("name"); content = dataJSON.getString("abstract");
userId = dataJSON.getJSONObject("user_info").getString("user_id"); title = dataJSON.getString("title");
date = new Date(dataJSON.getLongValue("publish_time") * 1000); map.put("type", "文章");
content = dataJSON.getString("abstract"); }
title = dataJSON.getString("title"); if(Objects.nonNull(dataJSON.getJSONObject("user"))) {
map.put("type", "文章"); source = dataJSON.getJSONObject("user").getString("name");
} userId = dataJSON.getJSONObject("user").getString("user_id");
if(Objects.nonNull(dataJSON.getJSONObject("user"))) { date = new Date(dataJSON.getLongValue("create_time") * 1000);
source = dataJSON.getJSONObject("user").getString("name"); content = dataJSON.getString("content");
userId = dataJSON.getJSONObject("user").getString("user_id"); map.put("type", "微头条");
date = new Date(dataJSON.getLongValue("create_time") * 1000); }
content = dataJSON.getString("content"); if(Objects.nonNull(source)) {
map.put("type", "微头条"); String href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id") + "/";
} String readNum = String.valueOf(dataJSON.getInteger("read_count"));
if(Objects.nonNull(source)) { String commentNum = String.valueOf(dataJSON.getInteger("comment_count"));
String href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id") + "/"; map.put("title", title);
String readNum = String.valueOf(dataJSON.getInteger("read_count")); map.put("time", date);
String commentNum = String.valueOf(dataJSON.getInteger("comment_count")); map.put("href", href);
map.put("title", title); map.put("source", source);
map.put("time", date); map.put("content", content);
map.put("href", href); map.put("readNum", readNum);
map.put("source", source); map.put("commentNum", commentNum);
map.put("content", content); map.put("user_id", userId);
map.put("readNum", readNum); dataList.add(map);
map.put("commentNum", commentNum);
map.put("user_id", userId);
System.out.println(map.toString());
dataList.add(map);
}
} }
} catch (Exception e) {
e.printStackTrace();
} }
} catch (Exception e) {
e.printStackTrace();
} }
System.out.println(" 采集到 条 == "+dataList.size() + " -- " +ma + " -- " + maxBehotTime);
if(ma.equals(String.valueOf(maxBehotTime))) {
break;
}
} catch (Exception e) {
logger.info("客户端微头条采集错误 {}",e);
} }
} rMap.put("data", dataList);
return dataList; rMap.put("maxBehotTime", maxBehotTime);
} catch (Exception e) {
logger.info("客户端微头条采集错误 {}",e);
}
return rMap;
} }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment