Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
b60d852a
Commit
b60d852a
authored
Dec 19, 2019
by
cwy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
头条客户端历史采集修改 并提升版本
parent
1d1ff5f8
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
21 deletions
+38
-21
pom.xml
+1
-1
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+37
-20
No files found.
pom.xml
View file @
b60d852a
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
toutiao
</artifactId>
<artifactId>
toutiao
</artifactId>
<version>
0.4.
2
-SNAPSHOT
</version>
<version>
0.4.
3
-SNAPSHOT
</version>
<dependencies>
<dependencies>
<dependency>
<dependency>
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
b60d852a
...
@@ -13,7 +13,13 @@ package com.zhiwei.toutiao.parse;
...
@@ -13,7 +13,13 @@ package com.zhiwei.toutiao.parse;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.*
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Objects
;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
...
@@ -422,7 +428,7 @@ public class TouTiaoArticleParse {
...
@@ -422,7 +428,7 @@ public class TouTiaoArticleParse {
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
String
ma
=
""
;
String
ma
=
""
;
while
(
true
)
{
while
(
true
)
{
String
url
=
"https://
i.snssdk.com/api/feed/profile/v1/?visited_uid="
+
userId
+
"&offset="
+
maxBehotTime
;
String
url
=
"https://
profile.zjurl.cn/api/feed/profile/v1/?category=profile_all&visited_uid="
+
userId
+
"&stream_api_version=82&offset="
+
maxBehotTime
+
"&version_code=7.5.3&version_name=70503&user_id="
+
userId
+
"&request_source=1&active_tab=dongtai&device_id=65&app_name=news_article"
;
ma
=
String
.
valueOf
(
maxBehotTime
);
ma
=
String
.
valueOf
(
maxBehotTime
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
...
@@ -430,25 +436,35 @@ public class TouTiaoArticleParse {
...
@@ -430,25 +436,35 @@ public class TouTiaoArticleParse {
maxBehotTime
=
json
.
getLongValue
(
"offset"
);
maxBehotTime
=
json
.
getLongValue
(
"offset"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
try
{
try
{
JSONObject
dataJSON
=
data
.
getJSONObject
(
"content"
).
getJSONObject
(
"raw_data"
);
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
JSONObject
dataJSON
=
data
.
getJSONObject
(
"content"
);
if
(
Objects
.
nonNull
(
dataJSON
.
getLongValue
(
"id"
)))
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
if
(
dataJSON
.
containsKey
(
"comment_base"
)
&&
dataJSON
.
getJSONObject
(
"comment_base"
)!=
null
)
{
String
source
=
null
;
JSONObject
commentBase
=
dataJSON
.
getJSONObject
(
"comment_base"
);
Date
date
=
null
;
Date
date
=
new
Date
(
commentBase
.
getLongValue
(
"create_time"
)
*
1000
);
String
content
=
null
;
String
href
=
"http://weitoutiao.zjurl.cn/ugc/share/wap/comment/"
+
dataJSON
.
getLongValue
(
"id"
);
String
title
=
null
;
String
source
=
commentBase
.
getJSONObject
(
"user"
).
getJSONObject
(
"info"
).
getString
(
"name"
);
if
(
Objects
.
nonNull
(
dataJSON
.
getJSONObject
(
"user_info"
)))
{
String
content
=
commentBase
.
getString
(
"content"
);
source
=
dataJSON
.
getJSONObject
(
"user_info"
).
getString
(
"name"
);
String
readNum
=
commentBase
.
getJSONObject
(
"action"
).
getInteger
(
"read_count"
)
+
""
;
userId
=
dataJSON
.
getJSONObject
(
"user_info"
).
getString
(
"user_id"
);
String
commentNum
=
commentBase
.
getJSONObject
(
"action"
).
getInteger
(
"comment_count"
)
+
""
;
date
=
new
Date
(
dataJSON
.
getLongValue
(
"publish_time"
)
*
1000
);
userId
=
commentBase
.
getJSONObject
(
"user"
).
getJSONObject
(
"info"
).
getString
(
"user_id"
);
content
=
dataJSON
.
getString
(
"abstract"
);
if
(
dataJSON
.
containsKey
(
"origin_group"
))
{
title
=
dataJSON
.
getString
(
"title"
);
String
replayUrl
=
dataJSON
.
getJSONObject
(
"origin_group"
).
getString
(
"article_url"
);
map
.
put
(
"type"
,
"文章"
);
String
title
=
dataJSON
.
getJSONObject
(
"origin_group"
).
getString
(
"title"
);
}
map
.
put
(
"title"
,
title
);
if
(
Objects
.
nonNull
(
dataJSON
.
getJSONObject
(
"user"
)))
{
map
.
put
(
"replayUrl"
,
replayUrl
);
source
=
dataJSON
.
getJSONObject
(
"user"
).
getString
(
"name"
);
userId
=
dataJSON
.
getJSONObject
(
"user"
).
getString
(
"user_id"
);
date
=
new
Date
(
dataJSON
.
getLongValue
(
"create_time"
)
*
1000
);
content
=
dataJSON
.
getString
(
"content"
);
map
.
put
(
"type"
,
"微头条"
);
}
}
if
(
Objects
.
nonNull
(
source
))
{
String
href
=
"https://www.toutiao.com/a"
+
dataJSON
.
getLongValue
(
"id"
)
+
"/"
;
String
readNum
=
String
.
valueOf
(
dataJSON
.
getInteger
(
"read_count"
));
String
commentNum
=
String
.
valueOf
(
dataJSON
.
getInteger
(
"comment_count"
));
map
.
put
(
"title"
,
title
);
map
.
put
(
"time"
,
date
);
map
.
put
(
"time"
,
date
);
map
.
put
(
"href"
,
href
);
map
.
put
(
"href"
,
href
);
map
.
put
(
"source"
,
source
);
map
.
put
(
"source"
,
source
);
...
@@ -456,10 +472,11 @@ public class TouTiaoArticleParse {
...
@@ -456,10 +472,11 @@ public class TouTiaoArticleParse {
map
.
put
(
"readNum"
,
readNum
);
map
.
put
(
"readNum"
,
readNum
);
map
.
put
(
"commentNum"
,
commentNum
);
map
.
put
(
"commentNum"
,
commentNum
);
map
.
put
(
"user_id"
,
userId
);
map
.
put
(
"user_id"
,
userId
);
System
.
out
.
println
(
map
.
toString
());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
// System.out.println(data.toString());
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
}
}
...
@@ -686,7 +703,7 @@ public class TouTiaoArticleParse {
...
@@ -686,7 +703,7 @@ public class TouTiaoArticleParse {
String
userId
=
data
.
getJSONObject
(
"media_user"
).
getLong
(
"id"
).
toString
();
String
userId
=
data
.
getJSONObject
(
"media_user"
).
getLong
(
"id"
).
toString
();
String
source
=
data
.
getString
(
"source"
);
String
source
=
data
.
getString
(
"source"
);
String
title
=
data
.
getString
(
"title"
);
String
title
=
data
.
getString
(
"title"
);
String
link
=
data
.
getString
(
"url"
);
//
String link = data.getString("url");
String
content
=
data
.
getString
(
"content"
);
String
content
=
data
.
getString
(
"content"
);
if
(
data
.
containsKey
(
"content"
)
&&
StringUtils
.
isNotBlank
(
content
)){
if
(
data
.
containsKey
(
"content"
)
&&
StringUtils
.
isNotBlank
(
content
)){
content
=
Jsoup
.
parse
(
content
).
text
();
content
=
Jsoup
.
parse
(
content
).
text
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment