Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
e5ce0110
Commit
e5ce0110
authored
Feb 10, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
参数修改
parent
6e601527
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
28 additions
and
18 deletions
+28
-18
src/main/java/com/zhiwei/parse/QQKB.java
+3
-3
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
+7
-8
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
+15
-6
src/test/java/com/zhiwei/crawler/WangyiCommentExample.java
+3
-1
No files found.
src/main/java/com/zhiwei/parse/QQKB.java
View file @
e5ce0110
...
...
@@ -87,11 +87,11 @@ public class QQKB {
* @param article_id
* @return
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
cookie
,
String
url
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
url
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
String
comment_id
=
getCid
(
url
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
null
);
try
{
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
int
i
=
1
;
...
...
@@ -100,7 +100,7 @@ public class QQKB {
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
headerMap
,
paramMap
);
// System.out.println(result);
paramMap
.
clear
();
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
cookie
,
comment_id
,
article_id
);
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
null
,
comment_id
,
article_id
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
}
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
View file @
e5ce0110
...
...
@@ -42,16 +42,15 @@ public class FenghuangCommentAnalysis {
*/
public
String
getdocUrl
(
String
url
)
{
try
{
if
(
url
.
contains
(
"/a"
))
{
url
=
url
.
replace
(
":"
,
"%3A"
);
url
=
"https://user.iclient.ifeng.com/Social_Api_Comment/getCommentList?comments_url="
+
url
+
"&hasChild=1&limit=30&page="
;
// System.out.println(url);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
result
=
result
.
split
(
"commentsUrl = '"
)[
1
].
split
(
"',"
)[
0
];
System
.
out
.
println
(
result
);
if
(
result
.
contains
(
"/a"
))
{
result
=
result
.
replace
(
":"
,
"%3A"
);
url
=
"https://user.iclient.ifeng.com/Social_Api_Comment/getCommentList?comments_url="
+
result
+
"&hasChild=1&limit=30&page="
;
}
else
{
String
docUrl
=
""
;
// if(url.contains("?")) {
// url = url.split("\\?")[0];
// }
// docUrl = url.split("//")[1].split("/")[1];
docUrl
=
url
.
substring
(
url
.
length
()-
8
,
url
.
length
());
url
=
"https://user.iclient.ifeng.com/Social_Api_Comment/getCommentList?comments_url=sub_"
+
docUrl
+
"&hasChild=1&limit=30&page="
;
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
View file @
e5ce0110
package
com
.
zhiwei
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.junit.Test
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.QQKB
;
public
class
QQKBCommentExample
{
//天天快报与腾讯新闻都可用 不用cookie
@Test
public
void
qqkbCommentTest
()
{
String
cookie
=
"pt2gguin=o0497332654; RK=rNiJG0RRet; ptcz=eddb2b402bdd8edb07d61b33c48be64a5d872b5142ae43783fcffbe66757f55e; pgv_pvid=686711225; o_cookie=497332654; pac_uid=1_497332654; pgv_pvi=6052195328; pgv_si=s6545613824; _qpsvr_localtk=0.8777812441478456; uin=o0497332654; ptisp=ctc; pgv_info=ssid=s6960235172; tvfe_boss_uuid=4391dae664ebb053"
;
String
url
=
"https://kuaibao.qq.com/s/NEW2018021000440000"
;
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
cookie
,
url
);
String
url
=
"https://kuaibao.qq.com/s/20180116C0EA8G00"
;
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
url
);
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"reply_id"
);
//id
headList
.
add
(
"like"
);
//点赞数
headList
.
add
(
"name"
);
//呢称
headList
.
add
(
"reply_num"
);
//回复数
headList
.
add
(
"time"
);
//时间
headList
.
add
(
"content"
);
//内容
System
.
out
.
println
(
dataList
.
size
());
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D:\\crawlerdata\\快报评论采集.xlsx"
,
"sada"
,
headList
,
dataList
);
}
...
...
src/test/java/com/zhiwei/crawler/WangyiCommentExample.java
View file @
e5ce0110
...
...
@@ -14,7 +14,9 @@ public class WangyiCommentExample {
//若出错 可能数据有重复 以id为准
@Test
public
void
wangyiCommentTest
()
{
String
id
=
"D77CENT50001875P"
;
String
url
=
"http://news.163.com/18/0210/09/DA9B8PVJ000189FH.html"
;
String
id
=
url
.
split
(
"/"
)[
6
].
split
(
".ht"
)[
0
];
List
<
Map
<
String
,
Object
>>
lists
=
Wangyi
.
getWangyiCommentData
(
id
);
System
.
out
.
println
(
lists
.
size
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment