Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
9e236728
Commit
9e236728
authored
Nov 19, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微头条添加头信息中cookie,不然无数据
parent
fdb8e380
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
69 deletions
+72
-69
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+5
-3
src/main/java/com/zhiwei/toutiao/util/Tools.java
+1
-0
src/test/java/com/zhiwei/toutiao/test/TouTiaoExample.java
+66
-66
No files found.
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
9e236728
...
@@ -175,8 +175,10 @@ public class TouTiaoArticleParse {
...
@@ -175,8 +175,10 @@ public class TouTiaoArticleParse {
}
}
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
System
.
out
.
println
(
url
);
try
{
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
System
.
out
.
println
(
htmlBody
);
if
(
htmlBody
!=
null
)
{
if
(
htmlBody
!=
null
)
{
Map
<
String
,
Object
>
dataMap
=
parseHtmlByMicroAccount
(
htmlBody
,
endDate
);
Map
<
String
,
Object
>
dataMap
=
parseHtmlByMicroAccount
(
htmlBody
,
endDate
);
if
(
dataMap
!=
null
&&
dataMap
.
size
()>
0
){
if
(
dataMap
!=
null
&&
dataMap
.
size
()>
0
){
...
@@ -189,7 +191,6 @@ public class TouTiaoArticleParse {
...
@@ -189,7 +191,6 @@ public class TouTiaoArticleParse {
logger
.
info
(
"获取数据出错::{},数据为null"
,
e
.
fillInStackTrace
());
logger
.
info
(
"获取数据出错::{},数据为null"
,
e
.
fillInStackTrace
());
return
null
;
return
null
;
}
}
return
null
;
return
null
;
}
}
...
@@ -245,9 +246,10 @@ public class TouTiaoArticleParse {
...
@@ -245,9 +246,10 @@ public class TouTiaoArticleParse {
continue
;
continue
;
}
}
}
}
System
.
out
.
println
(
source
+
"========="
+
dataList
.
size
());
/**验证是否有下一页数据**/
/**验证是否有下一页数据**/
if
(
more
){
if
(
more
){
if
(
max_behot_time
!=
null
&&
!
"0"
.
equals
(
max_behot_time
)
){
if
(
max_behot_time
!=
null
&&
max_behot_time
!=
0
){
if
(
endDate
.
after
(
date
)){
if
(
endDate
.
after
(
date
)){
max_behot_time
=
null
;
max_behot_time
=
null
;
}
}
...
@@ -259,7 +261,7 @@ public class TouTiaoArticleParse {
...
@@ -259,7 +261,7 @@ public class TouTiaoArticleParse {
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
map
.
put
(
"max_behot_time"
,
max_behot_time
);
map
.
put
(
"max_behot_time"
,
max_behot_time
+
""
);
map
.
put
(
"data"
,
dataList
);
map
.
put
(
"data"
,
dataList
);
return
map
;
return
map
;
...
...
src/main/java/com/zhiwei/toutiao/util/Tools.java
View file @
9e236728
...
@@ -161,6 +161,7 @@ public class Tools {
...
@@ -161,6 +161,7 @@ public class Tools {
headerMap
.
put
(
"Upgrade-Insecure-Requests"
,
"1"
);
headerMap
.
put
(
"Upgrade-Insecure-Requests"
,
"1"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
);
headerMap
.
put
(
"Accept-Encoding"
,
"deflate, br"
);
headerMap
.
put
(
"Accept-Encoding"
,
"deflate, br"
);
headerMap
.
put
(
"cookie"
,
""
);
return
headerMap
;
return
headerMap
;
}
}
...
...
src/test/java/com/zhiwei/toutiao/test/TouTiaoExample.java
View file @
9e236728
/
//
**
/**
//
* @Title: TouTiaoExample.java
* @Title: TouTiaoExample.java
//
* @Package com.zhiwei.toutiao.test
* @Package com.zhiwei.toutiao.test
//
* @Description:
* @Description:
//
* @author hero
* @author hero
//
* @date 2016年9月2日 上午11:48:51
* @date 2016年9月2日 上午11:48:51
//
* @version V1.0
* @version V1.0
//
*/
*/
/
//
**
/**
//
*
*
//
*/
*/
//
package com.zhiwei.toutiao.test;
package
com
.
zhiwei
.
toutiao
.
test
;
//
//
import java.util.ArrayList;
import
java.util.ArrayList
;
//
import java.util.Date;
import
java.util.Date
;
//
import java.util.List;
import
java.util.List
;
//
import java.util.Map;
import
java.util.Map
;
//
//
import com.zhiwei.tools.timeparse.TimeParse;
import
com.zhiwei.tools.timeparse.TimeParse
;
//
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
//
import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
import
com.zhiwei.toutiao.parse.TouTiaoArticleParse
;
//
/
//
**
/**
//
* @Description:
* @Description:
//
* @author hero
* @author hero
//
* @date 2016年9月2日 上午11:48:51
* @date 2016年9月2日 上午11:48:51
//
*/
*/
//
public class TouTiaoExample {
public
class
TouTiaoExample
{
//
//
@SuppressWarnings("unchecked")
@SuppressWarnings
(
"unchecked"
)
//
public static void main(String[] args) throws Exception {
public
static
void
main
(
String
[]
args
)
throws
Exception
{
//
long a = System.currentTimeMillis();
long
a
=
System
.
currentTimeMillis
();
//
List<String> urlList = new ArrayList<String>();
List
<
String
>
urlList
=
new
ArrayList
<
String
>();
// urlList.add("6859134443
");
urlList
.
add
(
"5757091251
"
);
//
//
System.out.println(urlList.size());
System
.
out
.
println
(
urlList
.
size
());
//
//
Date endTime = TimeParse.stringFormartDate("2018-04-01");
Date
endTime
=
TimeParse
.
stringFormartDate
(
"2018-04-01"
);
//
//
for (String url : urlList) {
for
(
String
url
:
urlList
)
{
//
String mid = url;
String
mid
=
url
;
//
String max_behot_time = "0";
String
max_behot_time
=
"0"
;
//
while (true) {
while
(
true
)
{
//
Map<String, Object> dataMap = null;
Map
<
String
,
Object
>
dataMap
=
null
;
// dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null
);
dataMap
=
TouTiaoArticleParse
.
getMicroTouTiaoCrawler
(
mid
,
endTime
,
null
,
max_behot_time
);
//
if (dataMap != null) {
if
(
dataMap
!=
null
)
{
//
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
List
<
TouTiaoArticle
>
ttlist
=
(
List
<
TouTiaoArticle
>)
dataMap
.
get
(
"data"
);
//
max_behot_time = (String) dataMap.get("max_behot_time");
max_behot_time
=
(
String
)
dataMap
.
get
(
"max_behot_time"
);
//
System.out.println(max_behot_time + "=======" + ttlist.size());
System
.
out
.
println
(
max_behot_time
+
"======="
+
ttlist
.
size
());
//
if (max_behot_time == null || ttlist.isEmpty()) {
if
(
max_behot_time
==
null
||
ttlist
.
isEmpty
())
{
//
break;
break
;
//
} else {
}
else
{
//
if (ttlist.size() > 0) {
if
(
ttlist
.
size
()
>
0
)
{
//
for (TouTiaoArticle tt : ttlist) {
for
(
TouTiaoArticle
tt
:
ttlist
)
{
//
System.out.println(tt);
System
.
out
.
println
(
tt
);
//
}
}
//
}
}
//
}
}
//
}
}
//
}
}
//
}
}
//
long b = System.currentTimeMillis();
long
b
=
System
.
currentTimeMillis
();
//
System.out.println("一轮的采集时间为:" + (b - a) / 1000);
System
.
out
.
println
(
"一轮的采集时间为:"
+
(
b
-
a
)
/
1000
);
//
}
}
//
//
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment