Commit 5578cebf by zhiwei

修复微头条采集问题

parent ae21017e
...@@ -12,7 +12,11 @@ package com.zhiwei.toutiao.bean; ...@@ -12,7 +12,11 @@ package com.zhiwei.toutiao.bean;
import java.io.Serializable; import java.io.Serializable;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
/** /**
* @Description: * @Description:
...@@ -165,5 +169,4 @@ public class TouTiaoArticle implements Serializable{ ...@@ -165,5 +169,4 @@ public class TouTiaoArticle implements Serializable{
+ "]"; + "]";
} }
} }
...@@ -360,10 +360,11 @@ public class TouTiaoArticleParse { ...@@ -360,10 +360,11 @@ public class TouTiaoArticleParse {
String maxBehotTime) throws IOException { String maxBehotTime) throws IOException {
String url = "https://www.toutiao.com/api/pc/feed/?category=pc_profile_ugc&utm_source=toutiao&visit_user_id=" + userId; String url = "https://www.toutiao.com/api/pc/feed/?category=pc_profile_ugc&utm_source=toutiao&visit_user_id=" + userId;
if (maxBehotTime != null) { if (maxBehotTime != null) {
url = url + "?max_behot_time=" + maxBehotTime; url = url + "&max_behot_time=" + maxBehotTime;
} }
System.out.println(url);
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try { try {
String htmlBody = downloadHtml(url, proxy, headerMap); String htmlBody = downloadHtml(url, proxy, headerMap);
if (htmlBody != null) { if (htmlBody != null) {
...@@ -389,7 +390,7 @@ public class TouTiaoArticleParse { ...@@ -389,7 +390,7 @@ public class TouTiaoArticleParse {
} }
logger.info("微头条采集链接:::{}", url); logger.info("微头条采集链接:::{}", url);
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null && htmlBody.contains("create_time")) { if (htmlBody != null && htmlBody.contains("create_time")) {
...@@ -441,7 +442,7 @@ public class TouTiaoArticleParse { ...@@ -441,7 +442,7 @@ public class TouTiaoArticleParse {
String content = commentBase.getString("content"); String content = commentBase.getString("content");
String readNum = commentBase.getJSONObject("action").getInteger("read_count") + ""; String readNum = commentBase.getJSONObject("action").getInteger("read_count") + "";
String commentNum = commentBase.getJSONObject("action").getInteger("comment_count") + ""; String commentNum = commentBase.getJSONObject("action").getInteger("comment_count") + "";
String user_id = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id"); userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if(dataJSON.containsKey("origin_group")) { if(dataJSON.containsKey("origin_group")) {
String replayUrl = dataJSON.getJSONObject("origin_group").getString("article_url"); String replayUrl = dataJSON.getJSONObject("origin_group").getString("article_url");
String title = dataJSON.getJSONObject("origin_group").getString("title"); String title = dataJSON.getJSONObject("origin_group").getString("title");
...@@ -454,7 +455,7 @@ public class TouTiaoArticleParse { ...@@ -454,7 +455,7 @@ public class TouTiaoArticleParse {
map.put("content", content); map.put("content", content);
map.put("readNum", readNum); map.put("readNum", readNum);
map.put("commentNum", commentNum); map.put("commentNum", commentNum);
map.put("user_id", user_id); map.put("user_id", userId);
dataList.add(map); dataList.add(map);
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -496,9 +497,13 @@ public class TouTiaoArticleParse { ...@@ -496,9 +497,13 @@ public class TouTiaoArticleParse {
if(json.containsKey("has_more")) { if(json.containsKey("has_more")) {
more = json.getBoolean("has_more"); more = json.getBoolean("has_more");
} }
if(json.containsKey("next")) {
maxBehotTime = json.getJSONObject("next").getLongValue("max_behot_time"); maxBehotTime = json.getJSONObject("next").getLongValue("max_behot_time");
JSONArray jsonArray = json.getJSONArray("data"); }
Date date = null; Date date = null;
if(json.containsKey("data")) {
JSONArray jsonArray = json.getJSONArray("data");
String href = null; String href = null;
String source = null; String source = null;
String title = null; String title = null;
...@@ -506,7 +511,7 @@ public class TouTiaoArticleParse { ...@@ -506,7 +511,7 @@ public class TouTiaoArticleParse {
String readNum = null; String readNum = null;
String commentNum = null; String commentNum = null;
String playNum = null; String playNum = null;
String user_id = null; String userId = null;
String likeNum = null; String likeNum = null;
String articleType = null; String articleType = null;
int count = 16; int count = 16;
...@@ -522,15 +527,15 @@ public class TouTiaoArticleParse { ...@@ -522,15 +527,15 @@ public class TouTiaoArticleParse {
JSONObject dataJSON = JSONObject.parseObject(text); JSONObject dataJSON = JSONObject.parseObject(text);
if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) { if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) {
JSONObject comment_base = dataJSON.getJSONObject("comment_base"); JSONObject commentBase = dataJSON.getJSONObject("comment_base");
date = new Date(comment_base.getLongValue("create_time") * 1000); date = new Date(commentBase.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id"); href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id");
source = comment_base.getJSONObject("user").getJSONObject("info").getString("name"); source = commentBase.getJSONObject("user").getJSONObject("info").getString("name");
content = dataJSON.getString("content"); content = dataJSON.getString("content");
readNum = dataJSON.getJSONObject("action").getInteger("read_count") + ""; readNum = dataJSON.getJSONObject("action").getInteger("read_count") + "";
likeNum = dataJSON.getJSONObject("action").getInteger("digg_count")+""; likeNum = dataJSON.getJSONObject("action").getInteger("digg_count")+"";
commentNum = dataJSON.getJSONObject("action").getInteger("comment_count") + ""; commentNum = dataJSON.getJSONObject("action").getInteger("comment_count") + "";
user_id = comment_base.getJSONObject("user").getJSONObject("info").getString("user_id"); userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if (content != null && !"".equals(content)) { if (content != null && !"".equals(content)) {
if (content.length() < 16) { if (content.length() < 16) {
count = content.length(); count = content.length();
...@@ -546,7 +551,7 @@ public class TouTiaoArticleParse { ...@@ -546,7 +551,7 @@ public class TouTiaoArticleParse {
readNum = dataJSON.getInteger("read_count") + ""; readNum = dataJSON.getInteger("read_count") + "";
commentNum = dataJSON.getInteger("comment_count") + ""; commentNum = dataJSON.getInteger("comment_count") + "";
likeNum = dataJSON.getInteger("digg_count")+""; likeNum = dataJSON.getInteger("digg_count")+"";
user_id = dataJSON.getJSONObject("user").getString("user_id"); userId = dataJSON.getJSONObject("user").getString("user_id");
if (content != null && !"".equals(content)) { if (content != null && !"".equals(content)) {
if (content.length() < 16) { if (content.length() < 16) {
count = content.length(); count = content.length();
...@@ -554,13 +559,17 @@ public class TouTiaoArticleParse { ...@@ -554,13 +559,17 @@ public class TouTiaoArticleParse {
title = content.substring(0, count); title = content.substring(0, count);
} }
} }
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum, TouTiaoArticle tt = new TouTiaoArticle(href, title, userId, source, date, content, commentNum,
playNum, readNum, "0", "微头条", articleType,likeNum); playNum, readNum, "0", "微头条", articleType,likeNum);
dataList.add(tt); dataList.add(tt);
} catch (Exception e) { } catch (Exception e) {
continue; continue;
} }
} }
}else {
System.out.println(json);
}
/** 验证是否有下一页数据 **/ /** 验证是否有下一页数据 **/
if (more) { if (more) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment