Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
255db67e
Commit
255db67e
authored
Nov 10, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
文章添加采集标签
parent
cf30abcd
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
60 additions
and
26 deletions
+60
-26
src/main/java/com/zhiwei/toutiao/bean/TouTiaoArticle.java
+13
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
+21
-12
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+26
-12
No files found.
src/main/java/com/zhiwei/toutiao/bean/TouTiaoArticle.java
View file @
255db67e
...
@@ -12,6 +12,7 @@ package com.zhiwei.toutiao.bean;
...
@@ -12,6 +12,7 @@ package com.zhiwei.toutiao.bean;
import
java.io.Serializable
;
import
java.io.Serializable
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
/**
/**
* @Description:
* @Description:
...
@@ -33,6 +34,8 @@ public class TouTiaoArticle implements Serializable{
...
@@ -33,6 +34,8 @@ public class TouTiaoArticle implements Serializable{
private
String
playCount
;
private
String
playCount
;
private
String
readNum
;
private
String
readNum
;
private
String
shareNum
;
private
String
shareNum
;
private
List
<
String
>
labelList
;
public
String
getCommentCount
()
{
public
String
getCommentCount
()
{
return
commentCount
;
return
commentCount
;
}
}
...
@@ -82,8 +85,6 @@ public class TouTiaoArticle implements Serializable{
...
@@ -82,8 +85,6 @@ public class TouTiaoArticle implements Serializable{
this
.
content
=
content
;
this
.
content
=
content
;
}
}
public
TouTiaoArticle
(){}
public
String
getUser_id
()
{
public
String
getUser_id
()
{
return
user_id
;
return
user_id
;
}
}
...
@@ -102,6 +103,14 @@ public class TouTiaoArticle implements Serializable{
...
@@ -102,6 +103,14 @@ public class TouTiaoArticle implements Serializable{
public
void
setShareNum
(
String
shareNum
)
{
public
void
setShareNum
(
String
shareNum
)
{
this
.
shareNum
=
shareNum
;
this
.
shareNum
=
shareNum
;
}
}
public
List
<
String
>
getLabelList
()
{
return
labelList
;
}
public
void
setLabelList
(
List
<
String
>
labelList
)
{
this
.
labelList
=
labelList
;
}
public
TouTiaoArticle
(){}
public
TouTiaoArticle
(
String
url
,
String
title
,
String
user_id
,
public
TouTiaoArticle
(
String
url
,
String
title
,
String
user_id
,
String
source
,
Date
time
,
String
content
,
String
commentCount
,
String
source
,
Date
time
,
String
content
,
String
commentCount
,
String
playCount
,
String
readNum
,
String
shareNum
,
String
type
)
String
playCount
,
String
readNum
,
String
shareNum
,
String
type
)
...
@@ -133,7 +142,9 @@ public class TouTiaoArticle implements Serializable{
...
@@ -133,7 +142,9 @@ public class TouTiaoArticle implements Serializable{
+
", playCount = "
+
playCount
+
", playCount = "
+
playCount
+
", readNum = "
+
readNum
+
", readNum = "
+
readNum
+
", shareNum = "
+
shareNum
+
", shareNum = "
+
shareNum
+
", labelList = "
+
labelList
+
"]"
;
+
"]"
;
}
}
}
}
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
View file @
255db67e
...
@@ -191,37 +191,46 @@ public class TouTiaoAccountParse {
...
@@ -191,37 +191,46 @@ public class TouTiaoAccountParse {
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
Long
user_id
=
null
;
String
name
=
null
;
Long
media_id
=
-
1L
;
String
description
=
null
;
Integer
user_verified
=
-
1
;
String
verify_content
=
null
;
Integer
follow_count
=
-
1
;
Date
create_time
=
null
;
String
img_url
=
null
;
String
gender
=
null
;
String
user_type
=
null
;
TouTiaoAccount
tta
=
null
;
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
try
{
try
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
if
(
data
.
getLong
(
"id"
)
==
null
)
{
if
(
data
.
getLong
(
"id"
)
==
null
)
{
continue
;
continue
;
}
}
long
user_id
=
data
.
getLong
(
"id"
);
user_id
=
data
.
getLong
(
"id"
);
String
name
=
data
.
getString
(
"name"
);
name
=
data
.
getString
(
"name"
);
long
media_id
=
-
1L
;
if
(
data
.
getLong
(
"media_id"
)
!=
null
)
{
if
(
data
.
getLong
(
"media_id"
)
!=
null
)
{
media_id
=
data
.
getLong
(
"media_id"
);
media_id
=
data
.
getLong
(
"media_id"
);
}
}
String
description
=
data
.
getString
(
"description"
);
description
=
data
.
getString
(
"description"
);
int
user_verified
=
-
1
;
if
(
data
.
getInteger
(
"user_verified"
)
!=
null
)
{
if
(
data
.
getInteger
(
"user_verified"
)
!=
null
)
{
user_verified
=
data
.
getInteger
(
"user_verified"
);
user_verified
=
data
.
getInteger
(
"user_verified"
);
}
}
String
verify_content
=
data
.
getString
(
"verify_content"
);
verify_content
=
data
.
getString
(
"verify_content"
);
int
follow_count
=
-
1
;
if
(
data
.
getInteger
(
"follow_count"
)
!=
null
)
{
if
(
data
.
getInteger
(
"follow_count"
)
!=
null
)
{
follow_count
=
data
.
getInteger
(
"follow_count"
);
follow_count
=
data
.
getInteger
(
"follow_count"
);
}
}
String
img_url
=
"https:"
+
data
.
getString
(
"avatar_url"
);
img_url
=
"https:"
+
data
.
getString
(
"avatar_url"
);
Date
create_time
=
null
;
if
(
data
.
getString
(
"create_time"
)
!=
null
)
{
if
(
data
.
getString
(
"create_time"
)
!=
null
)
{
create_time
=
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"create_time"
))*
1000
);
create_time
=
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"create_time"
))*
1000
);
}
}
String
gender
=
data
.
getString
(
"gender"
);
gender
=
data
.
getString
(
"gender"
);
String
user_type
=
data
.
getString
(
"user_type"
);
user_type
=
data
.
getString
(
"user_type"
);
if
(
name
!=
null
&&
name
.
equals
(
word
)){
if
(
name
!=
null
&&
name
.
equals
(
word
)){
TouTiaoAccount
tta
=
new
TouTiaoAccount
(
user_id
,
name
,
media_id
,
description
,
user_verified
,
tta
=
new
TouTiaoAccount
(
user_id
,
name
,
media_id
,
description
,
user_verified
,
verify_content
,
follow_count
,
img_url
,
create_time
,
gender
,
user_type
);
verify_content
,
follow_count
,
img_url
,
create_time
,
gender
,
user_type
);
ZhiWeiTools
.
sleep
(
1000
);
ZhiWeiTools
.
sleep
(
1000
);
TouTiaoAccount
ttaUpdate
=
getTouTiaoAccountInfoByUserId
(
user_id
+
""
,
proxy
);
TouTiaoAccount
ttaUpdate
=
getTouTiaoAccountInfoByUserId
(
user_id
+
""
,
proxy
);
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
255db67e
...
@@ -57,7 +57,7 @@ public class TouTiaoArticleParse {
...
@@ -57,7 +57,7 @@ public class TouTiaoArticleParse {
if
(
max_behot_time
!=
null
){
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
}
System
.
out
.
println
(
url
);
System
.
out
.
println
(
"url=========="
+
url
);
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"Referer"
,
url
);
headerMap
.
put
(
"Referer"
,
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
...
@@ -96,28 +96,42 @@ public class TouTiaoArticleParse {
...
@@ -96,28 +96,42 @@ public class TouTiaoArticleParse {
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
max_behot_time
=
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
);
max_behot_time
=
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
);
String
title
=
null
;
String
content
=
null
;
String
time
=
null
;
Date
date
=
null
;
String
readNum
=
null
;
String
commentNum
=
null
;
String
playNum
=
null
;
String
shareNum
=
null
;
String
source
=
null
;
String
user_id
=
null
;
List
<
String
>
labelList
=
null
;
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
try
{
try
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
String
href
=
"https://www.toutiao.com/"
;
String
href
=
"https://www.toutiao.com/"
;
if
(
data
.
containsKey
(
"group_id"
)){
if
(
data
.
containsKey
(
"group_id"
)){
href
=
href
+
"a"
+
data
.
getLongValue
(
"group_id"
);
href
=
href
+
"a"
+
data
.
getLongValue
(
"group_id"
);
String
title
=
data
.
getString
(
"title"
);
title
=
data
.
getString
(
"title"
);
String
content
=
data
.
getString
(
"abstract"
);
content
=
data
.
getString
(
"abstract"
);
String
time
=
data
.
getLongValue
(
"behot_time"
)*
1000
+
""
;
time
=
data
.
getLongValue
(
"behot_time"
)*
1000
+
""
;
Date
date
=
TimeParse
.
stringFormartDate
(
time
);
date
=
TimeParse
.
stringFormartDate
(
time
);
String
readNum
=
data
.
getString
(
"total_read_count"
);
readNum
=
data
.
getString
(
"total_read_count"
);
if
(
readNum
==
null
)
{
if
(
readNum
==
null
)
{
readNum
=
data
.
getInteger
(
"internal_visit_count"
)+
""
;
readNum
=
data
.
getInteger
(
"internal_visit_count"
)+
""
;
}
}
String
commentNum
=
data
.
getString
(
"comments_count"
);
commentNum
=
data
.
getString
(
"comments_count"
);
String
playNum
=
data
.
getString
(
"detail_play_effective_count"
);
playNum
=
data
.
getString
(
"detail_play_effective_count"
);
String
shareNum
=
data
.
getString
(
"share_count"
);
shareNum
=
data
.
getString
(
"share_count"
);
String
source
=
data
.
getString
(
"source"
);
source
=
data
.
getString
(
"source"
);
String
user_id
=
data
.
getLong
(
"creator_uid"
)+
""
;
user_id
=
data
.
getLong
(
"creator_uid"
)+
""
;
TouTiaoArticle
tt
=
new
TouTiaoArticle
(
href
,
title
,
user_id
,
source
,
date
,
content
,
commentNum
,
playNum
,
readNum
,
shareNum
,
"今日头条"
);
TouTiaoArticle
tt
=
new
TouTiaoArticle
(
href
,
title
,
user_id
,
source
,
date
,
content
,
commentNum
,
playNum
,
readNum
,
shareNum
,
"今日头条"
);
if
(
data
.
containsKey
(
"label"
)){
labelList
=
data
.
getJSONArray
(
"label"
).
toJavaList
(
String
.
class
);
tt
.
setLabelList
(
labelList
);
}
dataList
.
add
(
tt
);
dataList
.
add
(
tt
);
System
.
out
.
println
(
tt
.
toString
());
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出现问题,{}"
,
e
.
getMessage
());
logger
.
error
(
"数据解析出现问题,{}"
,
e
.
getMessage
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment