You need to sign in or sign up before continuing.
Commit b66c4c38 by zhiwei

修复今日头条微头条采集

parent 474fd84b
......@@ -378,11 +378,12 @@ public class TouTiaoArticleParse {
if (max_behot_time != null) {
url = url + "?max_behot_time=" + max_behot_time;
}
logger.info("微头条采集链接:::{}", url);
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/?tab=weitoutiao");
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null) {
if (htmlBody != null && htmlBody.contains("create_time")) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if (dataMap != null && dataMap.size() > 0) {
return dataMap;
......@@ -415,7 +416,10 @@ public class TouTiaoArticleParse {
List<TouTiaoArticle> dataList = new ArrayList<TouTiaoArticle>();
try {
JSONObject json = JSONObject.parseObject(htmlBody);
boolean more = json.getBoolean("has_more");
boolean more = false;
if(json.containsKey("has_more")) {
more = json.getBoolean("has_more");
}
max_behot_time = json.getJSONObject("next").getLongValue("max_behot_time");
JSONArray jsonArray = json.getJSONArray("data");
Date date = null;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment