Commit 5578cebf by zhiwei

修复微头条采集问题

parent ae21017e
......@@ -12,7 +12,11 @@ package com.zhiwei.toutiao.bean;
import java.io.Serializable;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.alibaba.fastjson.JSONObject;
/**
* @Description:
......@@ -165,5 +169,4 @@ public class TouTiaoArticle implements Serializable{
+ "]";
}
}
......@@ -360,10 +360,11 @@ public class TouTiaoArticleParse {
String maxBehotTime) throws IOException {
String url = "https://www.toutiao.com/api/pc/feed/?category=pc_profile_ugc&utm_source=toutiao&visit_user_id=" + userId;
if (maxBehotTime != null) {
url = url + "?max_behot_time=" + maxBehotTime;
url = url + "&max_behot_time=" + maxBehotTime;
}
System.out.println(url);
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao");
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try {
String htmlBody = downloadHtml(url, proxy, headerMap);
if (htmlBody != null) {
......@@ -389,7 +390,7 @@ public class TouTiaoArticleParse {
}
logger.info("微头条采集链接:::{}", url);
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/?tab=weitoutiao");
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + userId + "/");
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null && htmlBody.contains("create_time")) {
......@@ -441,7 +442,7 @@ public class TouTiaoArticleParse {
String content = commentBase.getString("content");
String readNum = commentBase.getJSONObject("action").getInteger("read_count") + "";
String commentNum = commentBase.getJSONObject("action").getInteger("comment_count") + "";
String user_id = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if(dataJSON.containsKey("origin_group")) {
String replayUrl = dataJSON.getJSONObject("origin_group").getString("article_url");
String title = dataJSON.getJSONObject("origin_group").getString("title");
......@@ -454,7 +455,7 @@ public class TouTiaoArticleParse {
map.put("content", content);
map.put("readNum", readNum);
map.put("commentNum", commentNum);
map.put("user_id", user_id);
map.put("user_id", userId);
dataList.add(map);
}
} catch (Exception e) {
......@@ -496,9 +497,13 @@ public class TouTiaoArticleParse {
if(json.containsKey("has_more")) {
more = json.getBoolean("has_more");
}
if(json.containsKey("next")) {
maxBehotTime = json.getJSONObject("next").getLongValue("max_behot_time");
JSONArray jsonArray = json.getJSONArray("data");
}
Date date = null;
if(json.containsKey("data")) {
JSONArray jsonArray = json.getJSONArray("data");
String href = null;
String source = null;
String title = null;
......@@ -506,7 +511,7 @@ public class TouTiaoArticleParse {
String readNum = null;
String commentNum = null;
String playNum = null;
String user_id = null;
String userId = null;
String likeNum = null;
String articleType = null;
int count = 16;
......@@ -522,15 +527,15 @@ public class TouTiaoArticleParse {
JSONObject dataJSON = JSONObject.parseObject(text);
if(dataJSON.containsKey("comment_base") && dataJSON.getJSONObject("comment_base")!=null) {
JSONObject comment_base = dataJSON.getJSONObject("comment_base");
date = new Date(comment_base.getLongValue("create_time") * 1000);
JSONObject commentBase = dataJSON.getJSONObject("comment_base");
date = new Date(commentBase.getLongValue("create_time") * 1000);
href = "https://www.toutiao.com/a" + dataJSON.getLongValue("id");
source = comment_base.getJSONObject("user").getJSONObject("info").getString("name");
source = commentBase.getJSONObject("user").getJSONObject("info").getString("name");
content = dataJSON.getString("content");
readNum = dataJSON.getJSONObject("action").getInteger("read_count") + "";
likeNum = dataJSON.getJSONObject("action").getInteger("digg_count")+"";
commentNum = dataJSON.getJSONObject("action").getInteger("comment_count") + "";
user_id = comment_base.getJSONObject("user").getJSONObject("info").getString("user_id");
userId = commentBase.getJSONObject("user").getJSONObject("info").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
......@@ -546,7 +551,7 @@ public class TouTiaoArticleParse {
readNum = dataJSON.getInteger("read_count") + "";
commentNum = dataJSON.getInteger("comment_count") + "";
likeNum = dataJSON.getInteger("digg_count")+"";
user_id = dataJSON.getJSONObject("user").getString("user_id");
userId = dataJSON.getJSONObject("user").getString("user_id");
if (content != null && !"".equals(content)) {
if (content.length() < 16) {
count = content.length();
......@@ -554,13 +559,17 @@ public class TouTiaoArticleParse {
title = content.substring(0, count);
}
}
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum,
TouTiaoArticle tt = new TouTiaoArticle(href, title, userId, source, date, content, commentNum,
playNum, readNum, "0", "微头条", articleType,likeNum);
dataList.add(tt);
} catch (Exception e) {
continue;
}
}
}else {
System.out.println(json);
}
/** 验证是否有下一页数据 **/
if (more) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment