Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
toutiao
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
toutiao
Commits
c3de2f29
Commit
c3de2f29
authored
Oct 18, 2018
by
[zhangzhiwei]
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
因修改采集核心包版本,修改相应的方法
parent
48b45d95
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
13 deletions
+14
-13
pom.xml
+2
-2
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
+7
-7
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
+3
-3
src/main/java/com/zhiwei/wangyi/parse/WangyiNewParse.java
+2
-1
No files found.
pom.xml
View file @
c3de2f29
...
@@ -3,13 +3,13 @@
...
@@ -3,13 +3,13 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
toutiao
</artifactId>
<artifactId>
toutiao
</artifactId>
<version>
0.2.
2
-SNAPSHOT
</version>
<version>
0.2.
3
-SNAPSHOT
</version>
<dependencies>
<dependencies>
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.0.
5
-SNAPSHOT
</version>
<version>
0.0.
8
-SNAPSHOT
</version>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoAccountParse.java
View file @
c3de2f29
...
@@ -29,7 +29,7 @@ public class TouTiaoAccountParse {
...
@@ -29,7 +29,7 @@ public class TouTiaoAccountParse {
private
TouTiaoAccountParse
()
{}
private
TouTiaoAccountParse
()
{}
private
static
Map
<
String
,
String
>
headerMap
;
private
static
Map
<
String
,
String
>
headerMap
;
private
static
Logger
logger
=
LogManager
.
getLogger
(
TouTiaoAccountParse
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
TouTiaoAccountParse
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
* @Title: getTouTiaoAccountInfo
* @Title: getTouTiaoAccountInfo
* @author hero
* @author hero
...
@@ -45,13 +45,13 @@ public class TouTiaoAccountParse {
...
@@ -45,13 +45,13 @@ public class TouTiaoAccountParse {
TouTiaoAccount
tta
=
null
;
TouTiaoAccount
tta
=
null
;
try
{
try
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
!=
null
){
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
if
(
tta
==
null
){
if
(
tta
==
null
){
url
=
"https://www.toutiao.com/search_content/?offset=0&format=json&keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)+
"&autoload=true&count=20&cur_tab=4&from=media"
;
url
=
"https://www.toutiao.com/search_content/?offset=0&format=json&keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)+
"&autoload=true&count=20&cur_tab=4&from=media"
;
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/search/?keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
));
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/search/?keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
));
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
!=
null
){
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
}
}
...
@@ -59,7 +59,7 @@ public class TouTiaoAccountParse {
...
@@ -59,7 +59,7 @@ public class TouTiaoAccountParse {
}
else
{
}
else
{
url
=
"https://www.toutiao.com/search_content/?offset=0&format=json&keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)+
"&autoload=true&count=20&cur_tab=4&from=media"
;
url
=
"https://www.toutiao.com/search_content/?offset=0&format=json&keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)+
"&autoload=true&count=20&cur_tab=4&from=media"
;
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/search/?keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
));
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/search/?keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
));
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
!=
null
){
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
tta
=
parseHtmlByAccount
(
htmlBody
,
name
,
proxy
);
}
}
...
@@ -80,7 +80,7 @@ public class TouTiaoAccountParse {
...
@@ -80,7 +80,7 @@ public class TouTiaoAccountParse {
TouTiaoAccount
tta
=
null
;
TouTiaoAccount
tta
=
null
;
try
{
try
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
!=
null
){
tta
=
parseAccountByUserId
(
htmlBody
,
user_id
,
proxy
);
tta
=
parseAccountByUserId
(
htmlBody
,
user_id
,
proxy
);
}
}
...
@@ -113,7 +113,7 @@ public class TouTiaoAccountParse {
...
@@ -113,7 +113,7 @@ public class TouTiaoAccountParse {
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
=
Tools
.
getTouTiaoHeader
();
try
{
try
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
htmlBody
!=
null
){
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
list
.
addAll
(
parseHtmlByWord
(
json
,
proxy
));
list
.
addAll
(
parseHtmlByWord
(
json
,
proxy
));
...
@@ -153,7 +153,7 @@ public class TouTiaoAccountParse {
...
@@ -153,7 +153,7 @@ public class TouTiaoAccountParse {
headerMap
.
put
(
"Host"
,
"is.snssdk.com"
);
headerMap
.
put
(
"Host"
,
"is.snssdk.com"
);
try
{
try
{
String
htmlBody
=
null
;
String
htmlBody
=
null
;
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"name"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"name"
)){
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
more
=
json
.
getJSONObject
(
"data"
).
getBooleanValue
(
"has_more"
);
more
=
json
.
getJSONObject
(
"data"
).
getBooleanValue
(
"has_more"
);
...
...
src/main/java/com/zhiwei/toutiao/parse/TouTiaoArticleParse.java
View file @
c3de2f29
...
@@ -38,7 +38,7 @@ import com.zhiwei.toutiao.util.Tools;
...
@@ -38,7 +38,7 @@ import com.zhiwei.toutiao.util.Tools;
public
class
TouTiaoArticleParse
{
public
class
TouTiaoArticleParse
{
private
TouTiaoArticleParse
()
{}
private
TouTiaoArticleParse
()
{}
private
static
Logger
logger
=
LogManager
.
getLogger
(
TouTiaoArticleParse
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
TouTiaoArticleParse
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/***
/***
* 获取头条数据
* 获取头条数据
*
*
...
@@ -60,7 +60,7 @@ public class TouTiaoArticleParse {
...
@@ -60,7 +60,7 @@ public class TouTiaoArticleParse {
headerMap
.
put
(
"Referer"
,
url
);
headerMap
.
put
(
"Referer"
,
url
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
try
{
try
{
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"behot_time"
)){
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"behot_time"
)){
Map
<
String
,
Object
>
ttList
=
parseHtmlByAccount
(
htmlBody
,
endData
);
Map
<
String
,
Object
>
ttList
=
parseHtmlByAccount
(
htmlBody
,
endData
);
if
(
ttList
!=
null
&&
ttList
.
size
()>
0
){
if
(
ttList
!=
null
&&
ttList
.
size
()>
0
){
...
@@ -155,7 +155,7 @@ public class TouTiaoArticleParse {
...
@@ -155,7 +155,7 @@ public class TouTiaoArticleParse {
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
Map
<
String
,
String
>
headerMap
=
Tools
.
getTouTiaoHeader
();
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
headerMap
.
put
(
"Referer"
,
"https://www.toutiao.com/c/user/"
+
user_id
+
"/"
);
try
{
try
{
String
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
String
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
)
{
if
(
htmlBody
!=
null
)
{
Map
<
String
,
Object
>
dataMap
=
parseHtmlByMicroAccount
(
htmlBody
,
endDate
);
Map
<
String
,
Object
>
dataMap
=
parseHtmlByMicroAccount
(
htmlBody
,
endDate
);
if
(
dataMap
!=
null
&&
dataMap
.
size
()>
0
){
if
(
dataMap
!=
null
&&
dataMap
.
size
()>
0
){
...
...
src/main/java/com/zhiwei/wangyi/parse/WangyiNewParse.java
View file @
c3de2f29
...
@@ -19,6 +19,7 @@ import com.zhiwei.wangyi.bean.WangYiNews;
...
@@ -19,6 +19,7 @@ import com.zhiwei.wangyi.bean.WangYiNews;
public
class
WangyiNewParse
{
public
class
WangyiNewParse
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
WangyiNewParse
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
WangyiNewParse
.
class
);
private
static
boolean
finish
=
true
;
private
static
boolean
finish
=
true
;
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
* @Title: getWYHistory
* @Title: getWYHistory
* @Description: TODO(根据文章地址解析网易号历史文章)
* @Description: TODO(根据文章地址解析网易号历史文章)
...
@@ -38,7 +39,7 @@ public class WangyiNewParse {
...
@@ -38,7 +39,7 @@ public class WangyiNewParse {
{
{
String
url
=
"http://c.m.163.com/nc/subscribe/list/"
+
tid
+
"/all/"
+
page
*
20
+
"-20.html"
;
String
url
=
"http://c.m.163.com/nc/subscribe/list/"
+
tid
+
"/all/"
+
page
*
20
+
"-20.html"
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
String
htmlBody
=
H
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
String
htmlBody
=
h
ttpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
)
if
(
htmlBody
!=
null
)
{
{
List
<
WangYiNews
>
wyList
=
analysis
(
htmlBody
,
endTime
);
List
<
WangYiNews
>
wyList
=
analysis
(
htmlBody
,
endTime
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment