Commit 5578cebf by zhiwei

修复微头条采集问题

parent ae21017e
......@@ -12,7 +12,11 @@ package com.zhiwei.toutiao.bean;
import java.io.Serializable;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
/**
* @Description:
......@@ -165,5 +169,4 @@ public class TouTiaoArticle implements Serializable{
+ "]";
}
}
......@@ -360,10 +360,11 @@ public class TouTiaoArticleParse {
String maxBehotTime) throws IOException {
String url = "https://www.toutiao.com/api/pc/feed/?category=pc_profile_ugc&utm_source=toutiao&visit_user_id=" + userId;
if (maxBehotTime != null) {
url = url + "?max_behot_time=" + maxBehotTime;
url = url + "&max_behot_time=" + maxBehotTime;
}
System.out.println(url);
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao");
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try {
String htmlBody = downloadHtml(url, proxy, headerMap);
if (htmlBody != null) {
......@@ -389,7 +390,7 @@ public class TouTiaoArticleParse {
}
logger.info("微头条采集链接:::{}", url);
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao");
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null && htmlBody.contains("create_time")) {
......@@ -441,7 +442,7 @@ public class TouTiaoArticleParse {
String content = commentBase.getString("content");
String readNum = commentBase.getJSONObject("action").getInteger("read_count") + "";
String commentNum = commentBase.getJSONObject("action").getInteger("comment_count") + "";
String user_id = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if(dataJSON.containsKey("origin_group")) {
String replayUrl = dataJSON.getJSONObject("origin_group").getString("article_url");
String title = dataJSON.getJSONObject("origin_group").getString("title");
......@@ -454,7 +455,7 @@ public class TouTiaoArticleParse {
map.put("content", content);
map.put("readNum", readNum);
map.put("commentNum", commentNum);
map.put("user_id", user_id);
map.put("user_id", userId);
dataList.add(map);
}
} catch (Exception e) {
......@@ -496,72 +497,80 @@ public class TouTiaoArticleParse {
if(json.containsKey("has_more")) {
more = json.getBoolean("has_more");
}
maxBehotTime = json.getJSONObject("next").getLongValue("max_behot_time");
JSONArray jsonArray = json.getJSONArray("data");
if(json.containsKey("next")) {
maxBehotTime = json.getJSONObject("next").getLongValue("max_behot_time");
}
Date date = null;
String href = null;
String source = null;
String title = null;
String content = null;
String readNum = null;
String commentNum = null;
String playNum = null;
String user_id = null;
String likeNum = null;
String articleType = null;
int count = 16;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
String text = null;
if(data.containsKey("stream_cell") && data.getJSONObject("stream_cell")!=null) {
text = data.getJSONObject("stream_cell").getString("raw_data");
}else if(data.containsKey("concern_talk_cell")) {
text = data.getJSONObject("concern_talk_cell").getString("packed_json_str");
}
JSONObject dataJSON = JSONObject.parseObject(text);
if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) {
JSONObject comment_base = dataJSON.getJSONObject("comment_base");
date = new Date(comment_base.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id");
source = comment_base.getJSONObject("user").getJSONObject("info").getString("name");
content = dataJSON.getString("content");
readNum = dataJSON.getJSONObject("action").getInteger("read_count") + "";
likeNum = dataJSON.getJSONObject("action").getInteger("digg_count")+"";
commentNum = dataJSON.getJSONObject("action").getInteger("comment_count") + "";
user_id = comment_base.getJSONObject("user").getJSONObject("info").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
}
title = content.substring(0, count);
if(json.containsKey("data")) {
JSONArray jsonArray = json.getJSONArray("data");
String href = null;
String source = null;
String title = null;
String content = null;
String readNum = null;
String commentNum = null;
String playNum = null;
String userId = null;
String likeNum = null;
String articleType = null;
int count = 16;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
String text = null;
if(data.containsKey("stream_cell") && data.getJSONObject("stream_cell")!=null) {
text = data.getJSONObject("stream_cell").getString("raw_data");
}else if(data.containsKey("concern_talk_cell")) {
text = data.getJSONObject("concern_talk_cell").getString("packed_json_str");
}
}else {
date = new Date(dataJSON.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getString("thread_id");
source = dataJSON.getJSONObject("user").getString("name");
content = dataJSON.getString("content");
readNum = dataJSON.getInteger("read_count") + "";
commentNum = dataJSON.getInteger("comment_count") + "";
likeNum = dataJSON.getInteger("digg_count")+"";
user_id = dataJSON.getJSONObject("user").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
JSONObject dataJSON = JSONObject.parseObject(text);
if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) {
JSONObject commentBase = dataJSON.getJSONObject("comment_base");
date = new Date(commentBase.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id");
source = commentBase.getJSONObject("user").getJSONObject("info").getString("name");
content = dataJSON.getString("content");
readNum = dataJSON.getJSONObject("action").getInteger("read_count") + "";
likeNum = dataJSON.getJSONObject("action").getInteger("digg_count")+"";
commentNum = dataJSON.getJSONObject("action").getInteger("comment_count") + "";
userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
}
title = content.substring(0, count);
}
}else {
date = new Date(dataJSON.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getString("thread_id");
source = dataJSON.getJSONObject("user").getString("name");
content = dataJSON.getString("content");
readNum = dataJSON.getInteger("read_count") + "";
commentNum = dataJSON.getInteger("comment_count") + "";
likeNum = dataJSON.getInteger("digg_count")+"";
userId = dataJSON.getJSONObject("user").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
}
title = content.substring(0, count);
}
title = content.substring(0, count);
}
TouTiaoArticle tt = new TouTiaoArticle(href, title, userId, source, date, content, commentNum,
playNum, readNum, "0", "微头条", articleType,likeNum);
dataList.add(tt);
} catch (Exception e) {
continue;
}
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum,
playNum, readNum, "0", "微头条", articleType,likeNum);
dataList.add(tt);
} catch (Exception e) {
continue;
}
}else {
System.out.println(json);
}
/** 验证是否有下一页数据 **/
if (more) {
if (maxBehotTime != null && maxBehotTime != 0) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment