Commit b60d852a by cwy

头条客户端历史采集修改 并提升版本

parent 1d1ff5f8
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>toutiao</artifactId>
<version>0.4.2-SNAPSHOT</version>
<version>0.4.3-SNAPSHOT</version>
<dependencies>
<dependency>
......
......@@ -13,7 +13,13 @@ package com.zhiwei.toutiao.parse;
import java.io.IOException;
import java.net.Proxy;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
......@@ -422,7 +428,7 @@ public class TouTiaoArticleParse {
List<Map<String,Object>> dataList = new ArrayList<>();
String ma = "";
while(true) {
String url = "https://i.snssdk.com/api/feed/profile/v1/?visited_uid="+userId+"&offset="+ maxBehotTime;
String url = "https://profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="+userId+"&stream_api_version=82&offset="+maxBehotTime+"&version_code=7.5.3&version_name=70503&user_id="+userId+"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article";
ma = String.valueOf(maxBehotTime);
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url),proxy)){
String result = response.body().string();
......@@ -430,25 +436,35 @@ public class TouTiaoArticleParse {
maxBehotTime = json.getLongValue("offset");
JSONArray jsonArray = json.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject data = jsonArray.getJSONObject(i);
try {
JSONObject dataJSON = data.getJSONObject("content").getJSONObject("raw_data");
JSONObject data = jsonArray.getJSONObject(i);
JSONObject dataJSON = data.getJSONObject("content");
if(Objects.nonNull(dataJSON.getLongValue("id"))) {
Map<String,Object> map = new HashMap<>();
if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) {
JSONObject commentBase = dataJSON.getJSONObject("comment_base");
Date date = new Date(commentBase.getLongValue("create_time") * 1000);
String href = "http://weitoutiao.zjurl.cn/ugc/share/wap/comment/" + dataJSON.getLongValue("id");
String source = commentBase.getJSONObject("user").getJSONObject("info").getString("name");
String content = commentBase.getString("content");
String readNum = commentBase.getJSONObject("action").getInteger("read_count") + "";
String commentNum = commentBase.getJSONObject("action").getInteger("comment_count") + "";
userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if(dataJSON.containsKey("origin_group")) {
String replayUrl = dataJSON.getJSONObject("origin_group").getString("article_url");
String title = dataJSON.getJSONObject("origin_group").getString("title");
map.put("title", title);
map.put("replayUrl", replayUrl);
String source = null;
Date date = null;
String content = null;
String title = null;
if(Objects.nonNull(dataJSON.getJSONObject("user_info"))) {
source = dataJSON.getJSONObject("user_info").getString("name");
userId = dataJSON.getJSONObject("user_info").getString("user_id");
date = new Date(dataJSON.getLongValue("publish_time") * 1000);
content = dataJSON.getString("abstract");
title = dataJSON.getString("title");
map.put("type", "文章");
}
if(Objects.nonNull(dataJSON.getJSONObject("user"))) {
source = dataJSON.getJSONObject("user").getString("name");
userId = dataJSON.getJSONObject("user").getString("user_id");
date = new Date(dataJSON.getLongValue("create_time") * 1000);
content = dataJSON.getString("content");
map.put("type", "微头条");
}
if(Objects.nonNull(source)) {
String href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id") + "/";
String readNum = String.valueOf(dataJSON.getInteger("read_count"));
String commentNum = String.valueOf(dataJSON.getInteger("comment_count"));
map.put("title", title);
map.put("time", date);
map.put("href", href);
map.put("source", source);
......@@ -456,10 +472,11 @@ public class TouTiaoArticleParse {
map.put("readNum", readNum);
map.put("commentNum", commentNum);
map.put("user_id", userId);
System.out.println(map.toString());
dataList.add(map);
}
}
} catch (Exception e) {
// System.out.println(data.toString());
e.printStackTrace();
}
}
......@@ -686,7 +703,7 @@ public class TouTiaoArticleParse {
String userId = data.getJSONObject("media_user").getLong("id").toString();
String source = data.getString("source");
String title = data.getString("title");
String link = data.getString("url");
// String link = data.getString("url");
String content = data.getString("content");
if(data.containsKey("content") && StringUtils.isNotBlank(content)){
content = Jsoup.parse(content).text();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment