Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
1d4533e0
Commit
1d4533e0
authored
Dec 08, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加sign加密参数解密,并实现pc端网页头条号历史文章及关注列表采集
parent
c2e5c825
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
477 additions
and
155 deletions
+477
-155
pom.xml
+7
-2
src/main/java/com/zhiwei/toutiao/bean/Signature.java
+204
-0
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
+94
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+151
-19
src/main/java/com/zhiwei/toutiao/util/Tools.java
+5
-130
src/main/resources/signature.js
+12
-0
src/test/java/com/zhiwei/toutiao/test/TouTiaoChannelExample.java
+4
-2
No files found.
pom.xml
View file @
1d4533e0
...
@@ -3,13 +3,18 @@
...
@@ -3,13 +3,18 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
toutiao
</artifactId>
<artifactId>
toutiao
</artifactId>
<version>
0.2.
4
-SNAPSHOT
</version>
<version>
0.2.
6
-SNAPSHOT
</version>
<dependencies>
<dependencies>
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.0.9-SNAPSHOT
</version>
<version>
0.1.0-SNAPSHOT
</version>
</dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.0.6-RELEASE
</version>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/toutiao/bean/Signature.java
0 → 100644
View file @
1d4533e0
package
com
.
zhiwei
.
toutiao
.
bean
;
import
java.io.BufferedReader
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.io.InputStreamReader
;
import
java.security.MessageDigest
;
import
java.security.NoSuchAlgorithmException
;
import
java.util.Date
;
import
javax.script.Invocable
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
/**
* 今日头条签名类
* @author bewiler hk
*
*/
public
class
Signature
{
private
String
cp
;
private
String
as
;
private
String
signature
;
public
Signature
(
String
userId
,
String
max_behot_time
){
this
.
signature
=
this
.
getSign
(
userId
,
max_behot_time
);
getASCP
();
}
public
Signature
(){
getASCP
();
}
public
String
getCp
()
{
return
cp
;
}
public
String
getAs
()
{
return
as
;
}
public
String
getSignature
()
{
return
signature
;
}
/**
* 获取加密参数
* @return
* @throws IOException
*/
private
String
getSign
(
String
userId
,
String
max_behot_time
){
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
ScriptEngine
engine
=
manager
.
getEngineByName
(
"javascript"
);
String
jsText
=
getJSText
();
// 读取js文件
String
str
=
"0"
;
if
(
userId
!=
null
){
str
=
userId
+
max_behot_time
;
}
try
{
engine
.
eval
(
jsText
);
if
(
engine
instanceof
Invocable
)
{
Invocable
invoke
=
(
Invocable
)
engine
;
String
sign
=
invoke
.
invokeFunction
(
"merge"
,
str
).
toString
();
return
sign
;
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
return
null
;
}
/**
* @Title: getAS
* @Description: TODO(获取今日头条加密值)
* @param @return 设定文件
* @return String 返回类型
*/
private
void
getASCP
()
{
long
i
=
(
long
)
Math
.
floor
(
new
Date
().
getTime
()/
1000L
);
String
t
=
Long
.
toHexString
(
i
).
toUpperCase
();
char
[]
ts
=
t
.
toCharArray
();
String
e
=
parseStrToMd5L32
(
i
+
""
).
toString
().
toUpperCase
();
char
[]
s
=
e
.
substring
(
0
,
5
).
toCharArray
();
char
[]
a
=
e
.
substring
(
e
.
length
()-
5
,
e
.
length
()).
toCharArray
();
String
c
=
""
;
String
o
=
""
;
for
(
int
n
=
0
;
5
>
n
;
n
++)
{
o
+=
""
+
s
[
n
]
+
ts
[
n
];
}
for
(
int
r
=
0
;
5
>
r
;
r
++)
{
c
+=
""
+
ts
[
r
+
3
]
+
a
[
r
];
}
String
as
=
"A1"
+
o
+
t
.
substring
(
t
.
length
()-
3
,
t
.
length
());
String
cp
=
t
.
substring
(
0
,
3
)
+
c
+
"E1"
;
this
.
as
=
as
;
this
.
cp
=
cp
;
}
/**
* 计算字符串Md5
* @Title: md5
* @param str
* @return String
*/
public
static
String
md5
(
String
str
)
{
String
result
=
null
;
try
{
MessageDigest
md
=
MessageDigest
.
getInstance
(
"MD5"
);
byte
[]
bytes
=
str
.
getBytes
(
"utf-8"
);
md
.
update
(
bytes
);
bytes
=
md
.
digest
();
result
=
bytesToHexString
(
bytes
);
}
catch
(
Exception
e
)
{}
return
result
;
}
/**
* 将二进制转换成16进制字符串
* @Title bytesToHexString
* @param buf
* @return String
*/
private
static
String
bytesToHexString
(
byte
bytes
[])
{
String
result
=
null
;
if
(
bytes
!=
null
)
{
if
(
bytes
.
length
>
0
)
{
StringBuffer
sb
=
new
StringBuffer
();
for
(
int
i
=
0
;
i
<
bytes
.
length
;
i
++)
{
String
hex
=
Integer
.
toHexString
(
bytes
[
i
]
&
0xFF
);
if
(
hex
.
length
()
==
1
)
{
hex
=
'0'
+
hex
;
}
sb
.
append
(
hex
);
}
result
=
sb
.
toString
().
toLowerCase
();
}
}
return
result
;
}
/**
* @param str
* @return
* @Date: 2013-9-6
* @Author: lulei
* @Description: 32位小写MD5
*/
public
static
String
parseStrToMd5L32
(
String
str
){
String
reStr
=
null
;
try
{
MessageDigest
md5
=
MessageDigest
.
getInstance
(
"MD5"
);
byte
[]
bytes
=
md5
.
digest
(
str
.
getBytes
());
StringBuffer
stringBuffer
=
new
StringBuffer
();
for
(
byte
b
:
bytes
){
int
bt
=
b
&
0xff
;
if
(
bt
<
16
){
stringBuffer
.
append
(
0
);
}
stringBuffer
.
append
(
Integer
.
toHexString
(
bt
));
}
reStr
=
stringBuffer
.
toString
();
}
catch
(
NoSuchAlgorithmException
e
)
{
e
.
printStackTrace
();
}
return
reStr
;
}
/**
* 读取js文件
* @return
*/
private
String
getJSText
()
{
try
{
StringBuffer
sb
=
new
StringBuffer
();
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"signature.js"
);
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
));
String
line
=
""
;
while
((
line
=
br
.
readLine
())!=
null
)
{
sb
.
append
(
line
);
}
br
.
close
();
return
sb
.
toString
();
}
catch
(
IOException
e
)
{
return
null
;
}
}
}
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
View file @
1d4533e0
...
@@ -13,8 +13,10 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -13,8 +13,10 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.toutiao.bean.Signature
;
import
com.zhiwei.toutiao.bean.TouTiaoAccount
;
import
com.zhiwei.toutiao.bean.TouTiaoAccount
;
import
com.zhiwei.toutiao.util.Tools
;
import
com.zhiwei.toutiao.util.Tools
;
...
@@ -142,16 +144,18 @@ public class TouTiaoAccountParse {
...
@@ -142,16 +144,18 @@ public class TouTiaoAccountParse {
* @param @return 设定文件
* @param @return 设定文件
* @return List<TouTiaoAccount> 返回类型
* @return List<TouTiaoAccount> 返回类型
*/
*/
@Deprecated
public
static
List
<
TouTiaoAccount
>
getFriendsList
(
String
userid
,
Proxy
proxy
,
long
sleep
){
public
static
List
<
TouTiaoAccount
>
getFriendsList
(
String
userid
,
Proxy
proxy
,
long
sleep
){
List
<
TouTiaoAccount
>
ttaList
=
new
ArrayList
<>();
List
<
TouTiaoAccount
>
ttaList
=
new
ArrayList
<>();
boolean
more
=
true
;
boolean
more
=
true
;
int
page
=
0
;
int
page
=
0
;
while
(
more
){
while
(
more
){
String
url
=
"http://i
s.snssdk.com/user/following/?offset="
+
page
*
50
+
"&device_id=35330393347&count=50&user_id="
+
userid
+
"
&ts="
+
System
.
currentTimeMillis
()/
1000
;
String
url
=
"http://i
t-hl.snssdk.com/user/relation/following/v2/?user_id="
+
userid
+
"&device_id=54560738994&cursor=&iid=53238029655&offset="
+
page
*
50
+
"&count=50
&ts="
+
System
.
currentTimeMillis
()/
1000
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
page
++;
page
++;
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"Host"
,
"is.snssdk.com"
);
headerMap
.
put
(
"User-Agent"
,
"Dalvik/2.1.0 (Linux; U; Android 8.1.0; MI 8 MIUI/V10.0.11.0.OEACNFH) NewsArticle/7.0.1 cronet/TTNetVersion:pre_blink_merge-277498-gd2bb364e 2018-08-24"
);
headerMap
.
put
(
"Host"
,
"it-hl.snssdk.com"
);
try
{
try
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
...
@@ -178,6 +182,40 @@ public class TouTiaoAccountParse {
...
@@ -178,6 +182,40 @@ public class TouTiaoAccountParse {
}
}
public
static
List
<
TouTiaoAccount
>
getFriendsList
(
String
userid
,
ProxyHolder
proxy
){
List
<
TouTiaoAccount
>
ttaList
=
new
ArrayList
<>();
Signature
signature
=
new
Signature
(
userid
,
"0"
);
String
_signature
=
signature
.
getSignature
();
boolean
more
=
true
;
while
(
more
){
String
url
=
"https://www.toutiao.com/c/user/following/?user_id="
+
userid
+
"&cursor=0&count=100&_signature="
+
_signature
;
System
.
out
.
println
(
url
);
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"referer"
,
"ihttps://www.toutiao.com/c/user/relation/"
+
userid
+
"/?tab=following"
);
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
try
{
String
htmlBody
=
null
;
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"name"
)){
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
more
=
json
.
getBooleanValue
(
"has_more"
);
List
<
TouTiaoAccount
>
dataList
=
parseFans
(
json
);
if
(
dataList
!=
null
&&
!
dataList
.
isEmpty
()){
ttaList
.
addAll
(
dataList
);
}
else
{
more
=
false
;
}
}
else
{
more
=
false
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取今日头条帐号数据连接超时"
,
e
.
fillInStackTrace
());
more
=
false
;
return
null
;
}
}
return
ttaList
;
}
/**
/**
* @Title: parseHtmlByAccount
* @Title: parseHtmlByAccount
...
@@ -356,6 +394,60 @@ public class TouTiaoAccountParse {
...
@@ -356,6 +394,60 @@ public class TouTiaoAccountParse {
}
}
/**
* 解析头条粉丝列表新接口
* @param json
* @return
*/
private
static
List
<
TouTiaoAccount
>
parseFans
(
JSONObject
json
)
{
List
<
TouTiaoAccount
>
ttaList
=
null
;
try
{
ttaList
=
new
ArrayList
<>();
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
Long
user_id
=
null
;
String
name
=
null
;
Long
media_id
=
null
;
String
description
=
null
;
Integer
user_verified
=
null
;
String
verify_content
=
null
;
int
follow_count
=
0
;
String
img_url
=
null
;
Date
create_time
=
null
;
String
gender
=
null
;
String
user_type
=
null
;
TouTiaoAccount
tta
=
null
;
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
try
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
user_id
=
data
.
getLong
(
"user_id"
);
media_id
=
data
.
getLong
(
"media_id"
);
name
=
data
.
getString
(
"name"
);
img_url
=
"https:"
+
data
.
getString
(
"avatar_url"
);
user_verified
=
data
.
getInteger
(
"user_verified"
);
verify_content
=
data
.
getString
(
"verified_content"
);
tta
=
new
TouTiaoAccount
(
user_id
,
name
,
media_id
,
description
,
user_verified
,
verify_content
,
follow_count
,
img_url
,
create_time
,
gender
,
user_type
);
ttaList
.
add
(
tta
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出现问题,{}"
,
e
);
continue
;
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出现问题,{}"
,
e
);
return
null
;
}
return
ttaList
;
}
/***
/***
* @Title: parseHtmlByFans
* @Title: parseHtmlByFans
* @author hero
* @author hero
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
1d4533e0
...
@@ -28,6 +28,7 @@ import com.zhiwei.crawler.core.HttpBoot;
...
@@ -28,6 +28,7 @@ import com.zhiwei.crawler.core.HttpBoot;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.toutiao.bean.Signature
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
import
com.zhiwei.toutiao.util.Tools
;
import
com.zhiwei.toutiao.util.Tools
;
...
@@ -51,10 +52,10 @@ public class TouTiaoArticleParse {
...
@@ -51,10 +52,10 @@ public class TouTiaoArticleParse {
* @return List<TouTiao> 返回类型
* @return List<TouTiao> 返回类型
* @throws Exception
* @throws Exception
*/
*/
@Deprecated
public
static
Map
<
String
,
Object
>
getTouTiaoList
(
String
media_id
,
String
max_behot_time
,
Date
endData
,
Proxy
proxy
)
throws
Exception
{
public
static
Map
<
String
,
Object
>
getTouTiaoList
(
String
media_id
,
String
max_behot_time
,
Date
endData
,
Proxy
proxy
)
throws
Exception
{
String
as
=
Tools
.
getAS
().
split
(
"_"
)[
0
];
Signature
signature
=
new
Signature
();
String
cp
=
Tools
.
getAS
().
split
(
"_"
)[
1
];
String
url
=
"https://www.toutiao.com/pgc/ma/?page_type=1&media_id="
+
media_id
+
"&count=20&as="
+
signature
.
getAs
()+
"&cp="
+
signature
.
getCp
();
String
url
=
"https://www.toutiao.com/pgc/ma/?page_type=1&media_id="
+
media_id
+
"&count=20&as="
+
as
+
"&cp="
+
cp
;
if
(
max_behot_time
!=
null
){
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
}
...
@@ -79,10 +80,11 @@ public class TouTiaoArticleParse {
...
@@ -79,10 +80,11 @@ public class TouTiaoArticleParse {
return
null
;
return
null
;
}
}
@Deprecated
public
static
Map
<
String
,
Object
>
getTouTiaoList
(
String
media_id
,
Long
max_behot_time
,
Date
endData
,
ProxyHolder
proxy
)
throws
Exception
{
public
static
Map
<
String
,
Object
>
getTouTiaoList
(
String
media_id
,
Long
max_behot_time
,
Date
endData
,
ProxyHolder
proxy
)
throws
Exception
{
String
as
=
Tools
.
getAS
().
split
(
"_"
)[
0
];
Signature
signature
=
new
Signature
();
String
cp
=
Tools
.
getAS
().
split
(
"_"
)[
1
];
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
url
=
"https://www.toutiao.com/pgc/ma/?page_type=1&media_id="
+
media_id
+
"&count=20&as="
+
as
+
"&cp="
+
cp
;
String
url
=
"https://www.toutiao.com/pgc/ma/?page_type=1&media_id="
+
media_id
+
"&count=20&as="
+
as
+
"&cp="
+
cp
;
if
(
max_behot_time
!=
null
){
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
...
@@ -102,12 +104,81 @@ public class TouTiaoArticleParse {
...
@@ -102,12 +104,81 @@ public class TouTiaoArticleParse {
logger
.
info
(
"数据为null"
);
logger
.
info
(
"数据为null"
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取今日头条帐号数据连接超时"
,
e
);
throw
e
;
}
return
null
;
}
/**
* 获取今日头条历史文章接口新
* @param user_id
* @param max_behot_time
* @param endData
* @param proxy
* @return
* @throws Exception
*/
public
static
Map
<
String
,
Object
>
getTouTiaoHistory
(
String
user_id
,
String
max_behot_time
,
Date
endData
,
Proxy
proxy
)
throws
Exception
{
Signature
signature
=
new
Signature
(
user_id
,
max_behot_time
);
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
url
=
"https://www.toutiao.com/c/user/article/?page_type=1&user_id="
+
user_id
+
"&max_behot_time="
+
max_behot_time
+
"&count=20&as="
+
as
+
"&cp="
+
cp
+
"&_signature="
+
signature
;
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
System
.
out
.
println
(
"url=========="
+
url
);
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
headerMap
.
put
(
"referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"behot_time"
)){
Map
<
String
,
Object
>
ttList
=
parseHtmlByAccount
(
user_id
,
htmlBody
,
endData
);
if
(
ttList
!=
null
&&
ttList
.
size
()>
0
){
return
ttList
;
}
}
else
{
logger
.
info
(
"数据为null"
);
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取今日头条帐号数据连接超时"
,
e
.
fillInStackTrace
());
logger
.
error
(
"获取今日头条帐号数据连接超时"
,
e
.
fillInStackTrace
());
throw
e
;
throw
e
;
}
}
return
null
;
return
null
;
}
}
public
static
Map
<
String
,
Object
>
getTouTiaoHistory
(
String
user_id
,
String
max_behot_time
,
Date
endData
,
ProxyHolder
proxy
)
throws
Exception
{
Signature
signature
=
new
Signature
(
user_id
,
max_behot_time
);
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
url
=
"https://www.toutiao.com/c/user/article/?page_type=1&user_id="
+
user_id
+
"&max_behot_time="
+
max_behot_time
+
"&count=20&as="
+
as
+
"&cp="
+
cp
+
"&_signature="
+
signature
;
if
(
max_behot_time
!=
null
){
url
=
url
+
"&max_behot_time="
+
max_behot_time
;
}
System
.
out
.
println
(
"url=========="
+
url
);
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"user-agent"
,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"
);
headerMap
.
put
(
"referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
String
htmlBody
=
null
;
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"behot_time"
)){
Map
<
String
,
Object
>
ttList
=
parseHtmlByAccount
(
user_id
,
htmlBody
,
endData
);
if
(
ttList
!=
null
&&
ttList
.
size
()>
0
){
return
ttList
;
}
}
else
{
logger
.
info
(
"数据为null"
);
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取今日头条帐号数据连接超时"
,
e
);
throw
e
;
}
return
null
;
}
/***
/***
* 根据帐号解析历史文章地址
* 根据帐号解析历史文章地址
...
@@ -119,14 +190,13 @@ public class TouTiaoArticleParse {
...
@@ -119,14 +190,13 @@ public class TouTiaoArticleParse {
* @return List<String> 返回类型
* @return List<String> 返回类型
*/
*/
private
static
Map
<
String
,
Object
>
parseHtmlByAccount
(
String
htmlBody
,
Date
endDate
)
{
private
static
Map
<
String
,
Object
>
parseHtmlByAccount
(
String
htmlBody
,
Date
endDate
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Stri
ng
max_behot_time
=
null
;
Lo
ng
max_behot_time
=
null
;
List
<
TouTiaoArticle
>
dataList
=
new
ArrayList
<
TouTiaoArticle
>();
List
<
TouTiaoArticle
>
dataList
=
new
ArrayList
<
TouTiaoArticle
>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
max_behot_time
=
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
);
max_behot_time
=
Long
.
valueOf
(
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
)
);
String
title
=
null
;
String
title
=
null
;
String
content
=
null
;
String
content
=
null
;
String
time
=
null
;
String
time
=
null
;
...
@@ -148,10 +218,7 @@ public class TouTiaoArticleParse {
...
@@ -148,10 +218,7 @@ public class TouTiaoArticleParse {
content
=
data
.
getString
(
"abstract"
);
content
=
data
.
getString
(
"abstract"
);
time
=
data
.
getLongValue
(
"behot_time"
)*
1000
+
""
;
time
=
data
.
getLongValue
(
"behot_time"
)*
1000
+
""
;
date
=
TimeParse
.
stringFormartDate
(
time
);
date
=
TimeParse
.
stringFormartDate
(
time
);
readNum
=
data
.
getString
(
"total_read_count"
);
readNum
=
data
.
getString
(
"go_detail_count"
);
if
(
readNum
==
null
)
{
readNum
=
data
.
getInteger
(
"internal_visit_count"
)+
""
;
}
commentNum
=
data
.
getString
(
"comments_count"
);
commentNum
=
data
.
getString
(
"comments_count"
);
playNum
=
data
.
getString
(
"detail_play_effective_count"
);
playNum
=
data
.
getString
(
"detail_play_effective_count"
);
shareNum
=
data
.
getString
(
"share_count"
);
shareNum
=
data
.
getString
(
"share_count"
);
...
@@ -174,20 +241,85 @@ public class TouTiaoArticleParse {
...
@@ -174,20 +241,85 @@ public class TouTiaoArticleParse {
return
null
;
return
null
;
}
}
if
(
max_behot_time
!=
null
&&
!
"0"
.
equals
(
max_behot_time
)){
if
(
endDate
!=
null
){
Date
nextDate
=
new
Date
(
Long
.
valueOf
(
max_behot_time
+
"000"
));
if
(
max_behot_time
!=
null
&&
!
"0"
.
equals
(
max_behot_time
)){
if
(
endDate
.
after
(
nextDate
)){
Date
nextDate
=
new
Date
(
Long
.
valueOf
(
max_behot_time
+
"000"
));
max_behot_time
=
null
;
if
(
endDate
.
after
(
nextDate
)){
max_behot_time
=
null
;
}
}
}
}
}
map
.
put
(
"max_behot_time"
,
max_behot_time
);
map
.
put
(
"max_behot_time"
,
max_behot_time
);
map
.
put
(
"data"
,
dataList
);
map
.
put
(
"data"
,
dataList
);
return
map
;
return
map
;
}
}
private
static
Map
<
String
,
Object
>
parseHtmlByAccount
(
String
user_id
,
String
htmlBody
,
Date
endDate
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Long
max_behot_time
=
null
;
List
<
TouTiaoArticle
>
dataList
=
new
ArrayList
<
TouTiaoArticle
>();
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
max_behot_time
=
Long
.
valueOf
(
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
));
String
title
=
null
;
String
content
=
null
;
String
time
=
null
;
Date
date
=
null
;
String
readNum
=
null
;
String
commentNum
=
null
;
String
playNum
=
null
;
String
shareNum
=
null
;
String
source
=
null
;
List
<
String
>
labelList
=
null
;
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
try
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
String
href
=
"https://www.toutiao.com/"
;
if
(
data
.
containsKey
(
"group_id"
)){
href
=
href
+
"a"
+
data
.
getLongValue
(
"group_id"
);
title
=
data
.
getString
(
"title"
);
content
=
data
.
getString
(
"abstract"
);
time
=
data
.
getLongValue
(
"behot_time"
)*
1000
+
""
;
date
=
TimeParse
.
stringFormartDate
(
time
);
readNum
=
data
.
getString
(
"go_detail_count"
);
commentNum
=
data
.
getString
(
"comments_count"
);
playNum
=
data
.
getString
(
"detail_play_effective_count"
);
shareNum
=
data
.
getString
(
"share_count"
);
source
=
data
.
getString
(
"source"
);
TouTiaoArticle
tt
=
new
TouTiaoArticle
(
href
,
title
,
user_id
,
source
,
date
,
content
,
commentNum
,
playNum
,
readNum
,
shareNum
,
"今日头条"
);
if
(
data
.
containsKey
(
"label"
)){
labelList
=
data
.
getJSONArray
(
"label"
).
toJavaList
(
String
.
class
);
tt
.
setLabelList
(
labelList
);
}
dataList
.
add
(
tt
);
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出现问题,{}"
,
e
.
getMessage
());
continue
;
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据解析出现问题,{}"
,
e
.
getMessage
());
return
null
;
}
if
(
endDate
!=
null
){
if
(
max_behot_time
!=
null
&&
!
"0"
.
equals
(
max_behot_time
)){
Date
nextDate
=
new
Date
(
Long
.
valueOf
(
max_behot_time
+
"000"
));
if
(
endDate
.
after
(
nextDate
)){
max_behot_time
=
null
;
}
}
}
map
.
put
(
"max_behot_time"
,
max_behot_time
);
map
.
put
(
"data"
,
dataList
);
return
map
;
}
/**
/**
* @Title: getMicroTouTiaoCrawler
* @Title: getMicroTouTiaoCrawler
* @author hero
* @author hero
...
...
src/main/java/com/zhiwei/toutiao/util/Tools.java
View file @
1d4533e0
package
com
.
zhiwei
.
toutiao
.
util
;
package
com
.
zhiwei
.
toutiao
.
util
;
import
java.io.BufferedReader
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.io.InputStreamReader
;
import
java.security.MessageDigest
;
import
java.security.NoSuchAlgorithmException
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -14,132 +7,10 @@ public class Tools {
...
@@ -14,132 +7,10 @@ public class Tools {
public
static
String
getText
(
String
textFileName
)
{
try
{
StringBuffer
sb
=
new
StringBuffer
();
InputStream
is
=
Thread
.
currentThread
().
getContextClassLoader
()
.
getResourceAsStream
(
"tac_sign.txt"
);
BufferedReader
br
=
new
BufferedReader
(
new
InputStreamReader
(
is
));
String
line
=
""
;
while
((
line
=
br
.
readLine
())!=
null
)
{
sb
.
append
(
line
);
}
br
.
close
();
return
sb
.
toString
();
}
catch
(
IOException
e
)
{
return
null
;
}
}
/**
* @Title: getAS
* @Description: TODO(获取今日头条加密值)
* @param @return 设定文件
* @return String 返回类型
*/
public
static
String
getAS
()
{
long
i
=
(
long
)
Math
.
floor
(
new
Date
().
getTime
()/
1000L
);
String
t
=
Long
.
toHexString
(
i
).
toUpperCase
();
char
[]
ts
=
t
.
toCharArray
();
String
e
=
parseStrToMd5L32
(
i
+
""
).
toString
().
toUpperCase
();
// System.out.println(i+"========"+t);
char
[]
s
=
e
.
substring
(
0
,
5
).
toCharArray
();
char
[]
a
=
e
.
substring
(
e
.
length
()-
5
,
e
.
length
()).
toCharArray
();
String
c
=
""
;
String
o
=
""
;
for
(
int
n
=
0
;
5
>
n
;
n
++)
{
o
+=
""
+
s
[
n
]
+
ts
[
n
];
}
for
(
int
r
=
0
;
5
>
r
;
r
++)
{
c
+=
""
+
ts
[
r
+
3
]
+
a
[
r
];
}
String
as
=
"A1"
+
o
+
t
.
substring
(
t
.
length
()-
3
,
t
.
length
());
String
cp
=
t
.
substring
(
0
,
3
)
+
c
+
"E1"
;
return
as
+
"_"
+
cp
;
}
// public static void main(String[] args) {
// Tools.getAS();
// }
/**
* 计算字符串Md5
* @Title: md5
* @param str
* @return String
*/
public
static
String
md5
(
String
str
)
{
String
result
=
null
;
try
{
MessageDigest
md
=
MessageDigest
.
getInstance
(
"MD5"
);
byte
[]
bytes
=
str
.
getBytes
(
"utf-8"
);
md
.
update
(
bytes
);
bytes
=
md
.
digest
();
result
=
bytesToHexString
(
bytes
);
}
catch
(
Exception
e
)
{}
return
result
;
}
/**
* 将二进制转换成16进制字符串
* @Title bytesToHexString
* @param buf
* @return String
*/
private
static
String
bytesToHexString
(
byte
bytes
[])
{
String
result
=
null
;
if
(
bytes
!=
null
)
{
if
(
bytes
.
length
>
0
)
{
StringBuffer
sb
=
new
StringBuffer
();
for
(
int
i
=
0
;
i
<
bytes
.
length
;
i
++)
{
String
hex
=
Integer
.
toHexString
(
bytes
[
i
]
&
0xFF
);
if
(
hex
.
length
()
==
1
)
{
hex
=
'0'
+
hex
;
}
sb
.
append
(
hex
);
}
result
=
sb
.
toString
().
toLowerCase
();
}
}
return
result
;
}
/**
* @param str
* @return
* @Date: 2013-9-6
* @Author: lulei
* @Description: 32位小写MD5
*/
public
static
String
parseStrToMd5L32
(
String
str
){
String
reStr
=
null
;
try
{
MessageDigest
md5
=
MessageDigest
.
getInstance
(
"MD5"
);
byte
[]
bytes
=
md5
.
digest
(
str
.
getBytes
());
StringBuffer
stringBuffer
=
new
StringBuffer
();
for
(
byte
b
:
bytes
){
int
bt
=
b
&
0xff
;
if
(
bt
<
16
){
stringBuffer
.
append
(
0
);
}
stringBuffer
.
append
(
Integer
.
toHexString
(
bt
));
}
reStr
=
stringBuffer
.
toString
();
}
catch
(
NoSuchAlgorithmException
e
)
{
e
.
printStackTrace
();
}
return
reStr
;
}
/**
/**
* @Title: getTouTiaoHeader
* @Title: getTouTiaoHeader
...
@@ -266,4 +137,8 @@ public class Tools {
...
@@ -266,4 +137,8 @@ public class Tools {
return
headerMap
;
return
headerMap
;
}
}
}
}
src/main/resources/signature.js
0 → 100644
View file @
1d4533e0
File added
src/test/java/com/zhiwei/toutiao/test/TouTiaoChannelExample.java
View file @
1d4533e0
...
@@ -3,6 +3,7 @@ package com.zhiwei.toutiao.test;
...
@@ -3,6 +3,7 @@ package com.zhiwei.toutiao.test;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
com.zhiwei.toutiao.bean.Signature
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
import
com.zhiwei.toutiao.bean.TouTiaoArticle
;
import
com.zhiwei.toutiao.parse.TouTiaoChannelParse
;
import
com.zhiwei.toutiao.parse.TouTiaoChannelParse
;
import
com.zhiwei.toutiao.util.Tools
;
import
com.zhiwei.toutiao.util.Tools
;
...
@@ -23,8 +24,9 @@ public class TouTiaoChannelExample {
...
@@ -23,8 +24,9 @@ public class TouTiaoChannelExample {
if
(
i
==
0
){
if
(
i
==
0
){
max_behot_time
=
0
;
max_behot_time
=
0
;
}
}
String
as
=
Tools
.
getAS
().
split
(
"_"
)[
0
];
Signature
signature
=
new
Signature
();
String
cp
=
Tools
.
getAS
().
split
(
"_"
)[
1
];
String
as
=
signature
.
getAs
();
String
cp
=
signature
.
getCp
();
String
url
=
"http://www.toutiao.com/api/pc/feed/?category=news_tech&utm_source=toutiao"
String
url
=
"http://www.toutiao.com/api/pc/feed/?category=news_tech&utm_source=toutiao"
+
"&widen=1&max_behot_time="
+
max_behot_time
+
"&max_behot_time_tmp="
+
max_behot_time
+
"&widen=1&max_behot_time="
+
max_behot_time
+
"&max_behot_time_tmp="
+
max_behot_time
+
"&tadrequire=true&as="
+
as
+
"&cp="
+
cp
;
+
"&tadrequire=true&as="
+
as
+
"&cp="
+
cp
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment