Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
47079954
Commit
47079954
authored
Dec 14, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
采集核心包升级
parent
3e39658a
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
19 deletions
+16
-19
pom.xml
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+13
-16
src/test/java/com/zhiwei/toutiao/test/TouTiaoExample.java
+1
-1
No files found.
pom.xml
View file @
47079954
...
...
@@ -9,12 +9,12 @@
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.1.
0
-SNAPSHOT
</version>
<version>
0.1.
1
-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.1.
0
-RELEASE
</version>
<version>
0.1.
1
-RELEASE
</version>
</dependency>
</dependencies>
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
47079954
...
...
@@ -124,9 +124,6 @@ public class TouTiaoArticleParse {
String
cp
=
signature
.
getCp
();
String
_signature
=
signature
.
getSignature
();
String
url
=
"https://www.toutiao.com/c/user/article/?page_type=1&user_id="
+
user_id
+
"&max_behot_time="
+
max_behot_time
+
"&count=20&as="
+
as
+
"&cp="
+
cp
+
"&_signature="
+
_signature
;
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
headerMap
.
put
(
"referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
...
...
@@ -152,28 +149,28 @@ public class TouTiaoArticleParse {
}
public
static
Map
<
String
,
Object
>
getTouTiaoHistory
(
String
user_id
,
String
max_behot_time
,
Date
endData
,
ProxyHolder
proxy
)
throws
Exception
{
Signature
signature
=
new
Signature
(
user_id
,
max_behot_time
);
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
_signature
=
signature
.
getSignature
();
String
url
=
"https://www.toutiao.com/c/user/article/?page_type=1&user_id="
+
user_id
+
"&max_behot_time="
+
max_behot_time
+
"&count=20&as="
+
as
+
"&cp="
+
cp
+
"&_signature="
+
_signature
;
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
headerMap
.
put
(
"referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
String
htmlBody
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++){
Signature
signature
=
new
Signature
(
user_id
,
max_behot_time
);
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
_signature
=
signature
.
getSignature
();
String
url
=
"https://www.toutiao.com/c/user/article/?page_type=1&user_id="
+
user_id
+
"&max_behot_time="
+
max_behot_time
+
"&count=20&as="
+
as
+
"&cp="
+
cp
+
"&_signature="
+
_signature
;
logger
.
info
(
"当前采集的历史文章链接:::{}"
,
url
);
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
headerMap
.
put
(
"referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"behot_time"
)){
Map
<
String
,
Object
>
ttList
=
parseHtmlByAccount
(
user_id
,
htmlBody
,
endData
);
if
(
ttList
!=
null
&&
ttList
.
size
()>
0
){
return
ttList
;
}
else
{
break
;
}
}
else
{
logger
.
info
(
"数据为null
"
);
logger
.
info
(
"数据为null
,获取到的文本为:::{}"
,
htmlBody
);
continue
;
}
}
catch
(
Exception
e
)
{
...
...
src/test/java/com/zhiwei/toutiao/test/TouTiaoExample.java
View file @
47079954
...
...
@@ -39,7 +39,7 @@ public class TouTiaoExample {
ProxyFactory
.
init
(
registry
,
group
,
GroupType
.
PROVIDER
);
List
<
String
>
urlList
=
new
ArrayList
<
String
>();
urlList
.
add
(
"
6075371636
"
);
urlList
.
add
(
"
1920576965
"
);
Date
endTime
=
TimeParse
.
stringFormartDate
(
"2018-10-01"
);
for
(
String
url
:
urlList
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment