Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
7eca1950
Commit
7eca1950
authored
Mar 09, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
头条评论书获取失败返回-1
parent
b0b37b1a
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
14 deletions
+79
-14
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+71
-0
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
+8
-14
No files found.
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
7eca1950
...
...
@@ -399,6 +399,77 @@ public class TouTiaoArticleParse {
}
/**
*
* @Description 微头条客户端解析
* @param userId
* @param endDate
* @param proxy
* @param max_behot_time
* @return
*/
public
static
List
<
Map
<
String
,
Object
>>
getClientMicroToutiaoCrawler
(
String
userId
,
ProxyHolder
proxy
,
Long
max_behot_time
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
String
ma
=
""
;
while
(
true
)
{
String
url
=
"https://i.snssdk.com/api/feed/profile/v1/?visited_uid="
+
userId
+
"&offset="
+
max_behot_time
;
System
.
out
.
println
(
url
);
ma
=
String
.
valueOf
(
max_behot_time
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
max_behot_time
=
json
.
getLongValue
(
"offset"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
System
.
out
.
println
(
json
.
toString
());
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
try
{
JSONObject
dataJSON
=
data
.
getJSONObject
(
"content"
).
getJSONObject
(
"raw_data"
);
System
.
out
.
println
(
dataJSON
.
toString
());
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
if
(
dataJSON
.
containsKey
(
"comment_base"
)
&&
dataJSON
.
getJSONObject
(
"comment_base"
)!=
null
)
{
JSONObject
commentBase
=
dataJSON
.
getJSONObject
(
"comment_base"
);
Date
date
=
new
Date
(
commentBase
.
getLongValue
(
"create_time"
)
*
1000
);
String
href
=
"http://weitoutiao.zjurl.cn/ugc/share/wap/comment/"
+
dataJSON
.
getLongValue
(
"id"
);
String
source
=
commentBase
.
getJSONObject
(
"user"
).
getJSONObject
(
"info"
).
getString
(
"name"
);
String
content
=
commentBase
.
getString
(
"content"
);
String
readNum
=
commentBase
.
getJSONObject
(
"action"
).
getInteger
(
"read_count"
)
+
""
;
String
commentNum
=
commentBase
.
getJSONObject
(
"action"
).
getInteger
(
"comment_count"
)
+
""
;
String
user_id
=
commentBase
.
getJSONObject
(
"user"
).
getJSONObject
(
"info"
).
getString
(
"user_id"
);
if
(
dataJSON
.
containsKey
(
"origin_group"
))
{
String
replayUrl
=
dataJSON
.
getJSONObject
(
"origin_group"
).
getString
(
"article_url"
);
String
title
=
dataJSON
.
getJSONObject
(
"origin_group"
).
getString
(
"title"
);
map
.
put
(
"title"
,
title
);
map
.
put
(
"replayUrl"
,
replayUrl
);
}
map
.
put
(
"time"
,
date
);
map
.
put
(
"href"
,
href
);
map
.
put
(
"source"
,
source
);
map
.
put
(
"content"
,
content
);
map
.
put
(
"readNum"
,
readNum
);
map
.
put
(
"commentNum"
,
commentNum
);
map
.
put
(
"user_id"
,
user_id
);
// System.out.println(map.toString());
dataList
.
add
(
map
);
}
}
catch
(
Exception
e
)
{
// System.out.println(data.toString());
e
.
printStackTrace
();
}
}
System
.
out
.
println
(
" 采集到 条 == "
+
dataList
.
size
()
+
" -- "
+
ma
+
" -- "
+
max_behot_time
);
if
(
ma
.
equals
(
String
.
valueOf
(
max_behot_time
)))
{
break
;
}
}
catch
(
Exception
e
)
{
logger
.
info
(
"客户端微头条采集错误 {}"
,
e
);
}
}
return
dataList
;
}
/**
* @Title: parseHtmlByMicroAccount
* @author hero
* @Description: 解析微头条数据
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoCommentParse.java
View file @
7eca1950
...
...
@@ -103,7 +103,7 @@ public class TouTiaoCommentParse {
*/
private
static
List
<
TouTiaoComment
>
analySisComment
(
String
htmlBody
,
String
url
)
{
List
<
TouTiaoComment
>
list
=
new
ArrayList
<
TouTiaoComment
>();
List
<
TouTiaoComment
>
list
=
new
ArrayList
<>();
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
commentes
=
json
.
getJSONArray
(
"data"
);
...
...
@@ -124,9 +124,7 @@ public class TouTiaoCommentParse {
list
.
add
(
ttComment
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
debug
(
"解析今日头条评论列表出现为题,{}"
,
e
.
getMessage
());
return
null
;
logger
.
debug
(
"解析今日头条评论列表出现为题,{}"
,
e
);
}
return
list
;
}
...
...
@@ -154,12 +152,10 @@ public class TouTiaoCommentParse {
return
(
int
)
Math
.
ceil
((
double
)
count
/
20.0
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
info
(
"获取评论总页数时出现问题:{}"
,
e
.
getMessage
());
return
0
;
logger
.
info
(
"获取评论总页数时出现问题:{}"
,
e
);
}
}
return
0
;
return
-
1
;
}
...
...
@@ -224,7 +220,7 @@ public class TouTiaoCommentParse {
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析头条评论数错误:::{}"
,
e
.
fillInStackTrace
());
}
return
0
;
return
-
1
;
}
/**
...
...
@@ -249,19 +245,17 @@ public class TouTiaoCommentParse {
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
data
=
json
.
getJSONObject
(
"data"
);
int
count
=
data
.
getIntValue
(
"total"
);
return
data
.
getIntValue
(
"total"
);
return
count
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
info
(
"获取评论总页数时出现问题:{}"
,
e
.
getMessage
());
logger
.
info
(
"获取评论总页数时出现问题:{}"
,
e
);
}
}
}
catch
(
Exception
e
)
{
continue
;
}
}
return
0
;
return
-
1
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment