Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
6e601527
Commit
6e601527
authored
Feb 10, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
快报参数修改
parent
a8ebdd2c
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
51 additions
and
16 deletions
+51
-16
src/main/java/com/zhiwei/httpclient/HeadGet.java
+11
-3
src/main/java/com/zhiwei/parse/QQKB.java
+32
-3
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
+2
-3
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
+5
-6
src/test/java/com/zhiwei/crawler/YidianzixunCommentExample.java
+1
-1
No files found.
src/main/java/com/zhiwei/httpclient/HeadGet.java
View file @
6e601527
...
...
@@ -644,11 +644,19 @@ public class HeadGet {
return
headerMap
;
}
public
static
Map
<
String
,
String
>
getQQkuaiCommentHeaderMap
(
String
cookie
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
if
(
cookie
!=
null
)
{
headerMap
.
put
(
"Cookie"
,
cookie
);
}
return
headerMap
;
}
public
static
void
main
(
String
[]
args
)
{
String
url
=
"http
://so.bbs.xiaomi.cn/?q=%E5%B0%8F%E7%B1%B3%20%E7%94%B5%E9%A5%AD%E7%85%B2%20%E5%BC%80%E8%A3%82&p=1&fid=0&time=63072000&order=1
"
;
String
url
=
"http
s://view.inews.qq.com/a/NEW2018021000440002
"
;
String
cookie
=
"mstuid=1518141097798_2540; Hm_lvt_71558e7b4aa822e282e758f8dc0b88b0=1518141098; lastsource=so.bbs.xiaomi.cn; mstz=||795199218.38||http%3A%2F%2Fso.bbs.xiaomi.cn%2F%3Fq%3D%25e5%25b0%258f%25e7%25b1%25b3%2520%25e7%2594%25b5%25e9%25a5%25ad%25e7%2585%25b2%2520%25e5%25bc%2580%25e8%25a3%2582%7Cp%3D1%7Cfid%3D0%7Ctime%3D31536000%7Corder%3D1|http%3A%2F%2Fso.bbs.xiaomi.cn%2F%3Fq%3D%25e5%25b0%258f%25e7%25b1%25b3%2520%25e7%2594%25b5%25e9%25a5%25ad%25e7%2585%25b2%2520%25e5%25bc%2580%25e8%25a3%2582%7Cp%3D1%7Cfid%3D0%7Ctime%3D63072000%7Corder%3D1; xm_vistor=1518141097798_2540_1518141097798-1518142530797; msttime=http%3A%2F%2Fso.bbs.xiaomi.cn%2F%3Fq%3D%25E5%25B0%258F%25E7%25B1%25B3%2520%25E7%2594%25B5%25E9%25A5%25AD%25E7%2585%25B2%2520%25E5%25BC%2580%25E8%25A3%2582%26p%3D1%26fid%3D0%26time%3D63072000%26order%3D1; msttime1=http%3A%2F%2Fso.bbs.xiaomi.cn%2F%3Fq%3D%25E5%25B0%258F%25E7%25B1%25B3%2520%25E7%2594%25B5%25E9%25A5%25AD%25E7%2585%25B2%2520%25E5%25BC%2580%25E8%25A3%2582%26p%3D1%26fid%3D0%26time%3D63072000%26order%3D1; Hm_lpvt_71558e7b4aa822e282e758f8dc0b88b0=1518142531"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
get
xiaomiShequByWord
HeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
get
QQkuaiComment
HeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
System
.
out
.
println
(
result
);
System
.
out
.
println
(
result
.
length
());
}
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
6e601527
...
...
@@ -12,6 +12,7 @@ import com.zhiwei.httpclient.HeadGet;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.QQKBAccountAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
public
class
QQKB
{
...
...
@@ -86,8 +87,10 @@ public class QQKB {
* @param article_id
* @return
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
cookie
,
String
comment_id
,
String
article_id
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
cookie
,
String
url
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
String
comment_id
=
getCid
(
url
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
...
...
@@ -95,7 +98,7 @@ public class QQKB {
while
(
true
)
{
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
headerMap
,
paramMap
);
System
.
out
.
println
(
result
);
//
System.out.println(result);
paramMap
.
clear
();
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
cookie
,
comment_id
,
article_id
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
...
...
@@ -116,13 +119,39 @@ public class QQKB {
/**
*
* @Description 获取cid
* @param url
* @return
*/
private
static
String
getCid
(
String
url
)
{
try
{
if
(
url
.
contains
(
"view.inews.qq.com"
)){
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
result
=
result
.
split
(
"window.__initData = "
)[
1
].
split
(
";</script>"
)[
0
];
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"content"
).
getString
(
"cid"
);
}
else
if
(
url
.
contains
(
"kuaibao.qq.com"
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
String
cid
=
result
.
split
(
"var commentId = "
)[
1
].
split
(
";"
)[
0
];
return
cid
.
substring
(
1
,
cid
.
length
()-
1
);
}
return
null
;
}
catch
(
Exception
e
)
{
return
null
;
}
}
/**
*
* @Description 获取天天快报评论数
* @param cookie
* @param comment_id
* @param article_id
* @return
*/
public
static
int
getCommentCount
(
String
cookie
,
String
comment_id
,
String
article_id
)
{
public
static
int
getCommentCount
(
String
cookie
,
String
url
)
{
String
comment_id
=
getCid
(
url
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
View file @
6e601527
...
...
@@ -10,10 +10,9 @@ public class QQKBCommentCountExample {
@Test
public
void
qqkbCommentCountTest
()
{
String
cookie
=
"phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"
;
String
comment_id
=
"2334642420"
;
String
article_id
=
"20180103A09WKN00"
;
String
url
=
""
;
int
i
=
QQKB
.
getCommentCount
(
cookie
,
comment_id
,
article_id
);
int
i
=
QQKB
.
getCommentCount
(
cookie
,
url
);
System
.
out
.
println
(
i
);
}
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
View file @
6e601527
...
...
@@ -12,15 +12,14 @@ public class QQKBCommentExample {
@Test
public
void
qqkbCommentTest
()
{
String
cookie
=
"phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"
;
String
comment_id
=
"2334642420"
;
String
article_id
=
"20180103A09WKN00"
;
String
cookie
=
"pt2gguin=o0497332654; RK=rNiJG0RRet; ptcz=eddb2b402bdd8edb07d61b33c48be64a5d872b5142ae43783fcffbe66757f55e; pgv_pvid=686711225; o_cookie=497332654; pac_uid=1_497332654; pgv_pvi=6052195328; pgv_si=s6545613824; _qpsvr_localtk=0.8777812441478456; uin=o0497332654; ptisp=ctc; pgv_info=ssid=s6960235172; tvfe_boss_uuid=4391dae664ebb053"
;
String
url
=
"https://kuaibao.qq.com/s/NEW2018021000440000"
;
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
cookie
,
comment_id
,
article_id
);
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
cookie
,
url
);
System
.
out
.
println
(
dataList
.
size
());
}
}
}
src/test/java/com/zhiwei/crawler/YidianzixunCommentExample.java
View file @
6e601527
...
...
@@ -11,7 +11,7 @@ public class YidianzixunCommentExample {
@Test
public
void
yidianzixunCommentTest
()
{
String
url
=
"http://www.yidianzixun.com/article/0
HjrjVFY
"
;
String
url
=
"http://www.yidianzixun.com/article/0
ILHigvv
"
;
List
<
Map
<
String
,
Object
>>
lists
=
Yidianzixun
.
getYidianzixunCommentData
(
url
);
System
.
out
.
println
(
lists
.
size
());
for
(
Map
<
String
,
Object
>
map
:
lists
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment