Commit 9d384b56 by zhiwei

添加更新今日头条阅读数功能

parent 34d3c078
......@@ -13,12 +13,7 @@ package com.zhiwei.toutiao.parse;
import java.io.IOException;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
......@@ -621,6 +616,95 @@ public class TouTiaoArticleParse {
}
/**
* 根据文章url获取itemId
* @param url
* @param proxy
* @return
* @throws Exception
*/
private static String getItemIdByUrl(String url,Proxy proxy) throws Exception
{
String itemId = null;
Map<String,String> headerMap = Tools.getTouTiaoHeader();
String htmlBody = downloadHtml(url, proxy, headerMap);
if(htmlBody != null)
{
if(htmlBody.contains("itemId"))
{
itemId = htmlBody.split("itemId: '")[1]
.split("',")[0].trim();
}
}else
{
logger.info("获取itemId失败,链接地址为:{}",url);
}
return itemId;
}
/**
* 根据文章url获取文章信息
* @param url
* @param proxy
* @return
* @throws Exception
*/
public static TouTiaoArticle getToutiaoArticleInfoByUrl(String url, Proxy proxy) throws Exception
{
String itemId = getItemIdByUrl(url, proxy);
if(Objects.nonNull(itemId)){
for(int i=0; i<3; i++){
try {
String urlNew = "https://m.toutiao.com/i" + itemId + "/info/?_signature=&i="+ itemId;
//设置头信息
Map<String,String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer","https://m.toutiao.com/i" + itemId + "/");
headerMap.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Mobile Safari/537.36");
String htmlBody = downloadHtml(urlNew, proxy, headerMap);
if(htmlBody!=null)
{
try {
JSONObject data = JSONObject.parseObject(htmlBody).getJSONObject("data");
String commentNum = data.getInteger("comment_count").toString();
String readNum = data.getInteger("impression_count").toString();
String playCount = data.getInteger("video_play_count").toString();
String userId = data.getJSONObject("media_user").getLong("id").toString();
String source = data.getString("source");
String title = data.getString("title");
String link = data.getString("url");
String content = data.getString("content");
if(data.containsKey("content") && StringUtils.isNotBlank(content)){
content = Jsoup.parse(content).text();
}
Date time = new Date(data.getLong("publish_time")*1000);
TouTiaoArticle touTiaoArticle = new TouTiaoArticle();
touTiaoArticle.setUrl(url);
touTiaoArticle.setTitle(title);
touTiaoArticle.setUser_id(userId);
touTiaoArticle.setSource(source);
touTiaoArticle.setTime(time);
touTiaoArticle.setContent(content);
touTiaoArticle.setCommentCount(commentNum);
touTiaoArticle.setReadNum(readNum);
touTiaoArticle.setPlayCount(playCount);
return touTiaoArticle;
} catch (Exception e) {
logger.info("获取评论总页数时出现问题:{}",e);
}
}
} catch (Exception e) {
continue;
}
}
}
return null;
}
/**
* 下载数据
* @param url
......
......@@ -8,6 +8,8 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
......@@ -21,6 +23,7 @@ import com.zhiwei.toutiao.bean.TouTiaoComment;
import com.zhiwei.toutiao.util.Tools;
import okhttp3.Response;
import org.jsoup.Jsoup;
/**
* @ClassName: TouTiaoComment
......@@ -232,7 +235,7 @@ public class TouTiaoCommentParse {
* @return int 返回类型
* @throws IOException
*/
public static int getCommentCount(String url,Proxy proxy)
public static int getCommentCount(String url,Proxy proxy) throws Exception
{
String group_id = getGroupId(url, proxy);
for(int i=0; i<3; i++){
......@@ -260,7 +263,6 @@ public class TouTiaoCommentParse {
}
/**
* @Title: getGroupId
* @Description: TODO(获取groupId用于更新评论列表)
......@@ -268,7 +270,7 @@ public class TouTiaoCommentParse {
* @param @return 设定文件
* @return String 返回类型
*/
private static String getGroupId(String url,Proxy proxy)
private static String getGroupId(String url,Proxy proxy) throws Exception
{
String groupId = null;
if(url.contains("/a")||url.contains("/group/"))
......@@ -282,7 +284,7 @@ public class TouTiaoCommentParse {
}
}else if(url.contains("/i")||url.contains("/item/"))
{
groupId = gettGroupIdByUrl(url, proxy);
groupId = getGroupIdByUrl(url, proxy);
}
return groupId;
......@@ -295,11 +297,10 @@ public class TouTiaoCommentParse {
* @param @return 设定文件
* @return String 返回类型
*/
private static String gettGroupIdByUrl(String url,Proxy proxy)
private static String getGroupIdByUrl(String url,Proxy proxy) throws Exception
{
String groupId = null;
Map<String,String> headerMap = Tools.getTouTiaoHeader();
try {
String htmlBody = downloadHtml(url, proxy, headerMap);
if(htmlBody != null)
{
......@@ -312,13 +313,16 @@ public class TouTiaoCommentParse {
{
logger.info("获取groupId失败,链接地址为:{}",url);
}
} catch (Exception e) {
e.printStackTrace();
logger.error("获取groupId失败,链接地址为:{}",url,e);
}
return groupId;
}
/**
* 下载数据
* @param url
* @param proxy
* @param headerMap
* @return
*/
private static String downloadHtml(String url, Proxy proxy, Map<String,String> headerMap) {
// 下载数据页面
for (int i = 1; i <= 3; i++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment