Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
9d384b56
Commit
9d384b56
authored
Oct 28, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加更新今日头条阅读数功能
parent
34d3c078
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
104 additions
and
16 deletions
+104
-16
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+90
-6
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
+14
-10
No files found.
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
9d384b56
...
...
@@ -13,12 +13,7 @@ package com.zhiwei.toutiao.parse;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.*
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
...
...
@@ -621,6 +616,95 @@ public class TouTiaoArticleParse {
}
/**
* 根据文章url获取itemId
* @param url
* @param proxy
* @return
* @throws Exception
*/
private
static
String
getItemIdByUrl
(
String
url
,
Proxy
proxy
)
throws
Exception
{
String
itemId
=
null
;
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
String
htmlBody
=
downloadHtml
(
url
,
proxy
,
headerMap
);
if
(
htmlBody
!=
null
)
{
if
(
htmlBody
.
contains
(
"itemId"
))
{
itemId
=
htmlBody
.
split
(
"itemId: '"
)[
1
]
.
split
(
"',"
)[
0
].
trim
();
}
}
else
{
logger
.
info
(
"获取itemId失败,链接地址为:{}"
,
url
);
}
return
itemId
;
}
/**
* 根据文章url获取文章信息
* @param url
* @param proxy
* @return
* @throws Exception
*/
public
static
TouTiaoArticle
getToutiaoArticleInfoByUrl
(
String
url
,
Proxy
proxy
)
throws
Exception
{
String
itemId
=
getItemIdByUrl
(
url
,
proxy
);
if
(
Objects
.
nonNull
(
itemId
)){
for
(
int
i
=
0
;
i
<
3
;
i
++){
try
{
String
urlNew
=
"https://m.toutiao.com/i"
+
itemId
+
"/info/?_signature=&i="
+
itemId
;
//设置头信息
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"Referer"
,
"https://m.toutiao.com/i"
+
itemId
+
"/"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Mobile Safari/537.36"
);
String
htmlBody
=
downloadHtml
(
urlNew
,
proxy
,
headerMap
);
if
(
htmlBody
!=
null
)
{
try
{
JSONObject
data
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONObject
(
"data"
);
String
commentNum
=
data
.
getInteger
(
"comment_count"
).
toString
();
String
readNum
=
data
.
getInteger
(
"impression_count"
).
toString
();
String
playCount
=
data
.
getInteger
(
"video_play_count"
).
toString
();
String
userId
=
data
.
getJSONObject
(
"media_user"
).
getLong
(
"id"
).
toString
();
String
source
=
data
.
getString
(
"source"
);
String
title
=
data
.
getString
(
"title"
);
String
link
=
data
.
getString
(
"url"
);
String
content
=
data
.
getString
(
"content"
);
if
(
data
.
containsKey
(
"content"
)
&&
StringUtils
.
isNotBlank
(
content
)){
content
=
Jsoup
.
parse
(
content
).
text
();
}
Date
time
=
new
Date
(
data
.
getLong
(
"publish_time"
)*
1000
);
TouTiaoArticle
touTiaoArticle
=
new
TouTiaoArticle
();
touTiaoArticle
.
setUrl
(
url
);
touTiaoArticle
.
setTitle
(
title
);
touTiaoArticle
.
setUser_id
(
userId
);
touTiaoArticle
.
setSource
(
source
);
touTiaoArticle
.
setTime
(
time
);
touTiaoArticle
.
setContent
(
content
);
touTiaoArticle
.
setCommentCount
(
commentNum
);
touTiaoArticle
.
setReadNum
(
readNum
);
touTiaoArticle
.
setPlayCount
(
playCount
);
return
touTiaoArticle
;
}
catch
(
Exception
e
)
{
logger
.
info
(
"获取评论总页数时出现问题:{}"
,
e
);
}
}
}
catch
(
Exception
e
)
{
continue
;
}
}
}
return
null
;
}
/**
* 下载数据
* @param url
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
View file @
9d384b56
...
...
@@ -8,6 +8,8 @@ import java.util.List;
import
java.util.Map
;
import
java.util.Objects
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
...
...
@@ -21,6 +23,7 @@ import com.zhiwei.toutiao.bean.TouTiaoComment;
import
com.zhiwei.toutiao.util.Tools
;
import
okhttp3.Response
;
import
org.jsoup.Jsoup
;
/**
* @ClassName: TouTiaoComment
...
...
@@ -232,7 +235,7 @@ public class TouTiaoCommentParse {
* @return int 返回类型
* @throws IOException
*/
public
static
int
getCommentCount
(
String
url
,
Proxy
proxy
)
public
static
int
getCommentCount
(
String
url
,
Proxy
proxy
)
throws
Exception
{
String
group_id
=
getGroupId
(
url
,
proxy
);
for
(
int
i
=
0
;
i
<
3
;
i
++){
...
...
@@ -260,7 +263,6 @@ public class TouTiaoCommentParse {
}
/**
* @Title: getGroupId
* @Description: TODO(获取groupId用于更新评论列表)
...
...
@@ -268,7 +270,7 @@ public class TouTiaoCommentParse {
* @param @return 设定文件
* @return String 返回类型
*/
private
static
String
getGroupId
(
String
url
,
Proxy
proxy
)
private
static
String
getGroupId
(
String
url
,
Proxy
proxy
)
throws
Exception
{
String
groupId
=
null
;
if
(
url
.
contains
(
"/a"
)||
url
.
contains
(
"/group/"
))
...
...
@@ -282,7 +284,7 @@ public class TouTiaoCommentParse {
}
}
else
if
(
url
.
contains
(
"/i"
)||
url
.
contains
(
"/item/"
))
{
groupId
=
get
tGroupIdByUrl
(
url
,
proxy
);
groupId
=
get
GroupIdByUrl
(
url
,
proxy
);
}
return
groupId
;
...
...
@@ -295,11 +297,10 @@ public class TouTiaoCommentParse {
* @param @return 设定文件
* @return String 返回类型
*/
private
static
String
get
tGroupIdByUrl
(
String
url
,
Proxy
proxy
)
private
static
String
get
GroupIdByUrl
(
String
url
,
Proxy
proxy
)
throws
Exception
{
String
groupId
=
null
;
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
try
{
String
htmlBody
=
downloadHtml
(
url
,
proxy
,
headerMap
);
if
(
htmlBody
!=
null
)
{
...
...
@@ -312,13 +313,16 @@ public class TouTiaoCommentParse {
{
logger
.
info
(
"获取groupId失败,链接地址为:{}"
,
url
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"获取groupId失败,链接地址为:{}"
,
url
,
e
);
}
return
groupId
;
}
/**
* 下载数据
* @param url
* @param proxy
* @param headerMap
* @return
*/
private
static
String
downloadHtml
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
)
{
// 下载数据页面
for
(
int
i
=
1
;
i
<=
3
;
i
++)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment