Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
2a35dd02
Commit
2a35dd02
authored
Feb 25, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提升版本 修改脉脉采集
parent
b3d545a3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
35 changed files
with
377 additions
and
306 deletions
+377
-306
pom.xml
+1
-1
src/main/java/com/zhiwei/httpclient/HttpClient.java
+17
-0
src/main/java/com/zhiwei/parse/Aika.java
+4
-3
src/main/java/com/zhiwei/parse/Aiqiyi.java
+1
-1
src/main/java/com/zhiwei/parse/Chejia.java
+4
-4
src/main/java/com/zhiwei/parse/Fenghuang.java
+4
-3
src/main/java/com/zhiwei/parse/Maimai.java
+98
-5
src/main/java/com/zhiwei/parse/Pcauto.java
+4
-3
src/main/java/com/zhiwei/parse/QQKB.java
+0
-4
src/main/java/com/zhiwei/parse/QQKandian.java
+2
-2
src/main/java/com/zhiwei/parse/QicheHome.java
+3
-3
src/main/java/com/zhiwei/parse/SinaKeji.java
+3
-3
src/main/java/com/zhiwei/parse/SouBao.java
+0
-0
src/main/java/com/zhiwei/parse/Souhu.java
+4
-3
src/main/java/com/zhiwei/parse/TechTx.java
+4
-3
src/main/java/com/zhiwei/parse/Wangyi.java
+2
-1
src/main/java/com/zhiwei/parse/Xueqiu.java
+0
-1
src/main/java/com/zhiwei/parse/Yiche.java
+6
-5
src/main/java/com/zhiwei/parse/Youku.java
+0
-2
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
+0
-6
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
+5
-5
src/main/java/com/zhiwei/parse/analysis/MaimaiBywordAnalysis.java
+9
-6
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
+3
-3
src/main/java/com/zhiwei/parse/shipin/QQTV.java
+2
-2
src/main/java/com/zhiwei/parse/shipin/SohuTV.java
+1
-0
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
+44
-40
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
+45
-45
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
+5
-11
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
+3
-3
src/test/java/com/zhiwei/keyword/YoukuKeyWordTest.java
+0
-68
src/test/java/com/zhiwei/shipin/BilibiliTest.java
+2
-2
src/test/java/com/zhiwei/shipin/DouyinHotExample.java
+27
-27
src/test/java/com/zhiwei/shipin/QQTVTest.java
+1
-2
src/test/java/com/zhiwei/shipin/SohuTVTest.java
+38
-39
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
+35
-0
No files found.
pom.xml
View file @
2a35dd02
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
articlenewscrawler
</artifactId>
<artifactId>
articlenewscrawler
</artifactId>
<version>
0.0.
8
-SNAPSHOT
</version>
<version>
0.0.
9
-SNAPSHOT
</version>
<name>
articlenewscrawler
</name>
<name>
articlenewscrawler
</name>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
...
...
src/main/java/com/zhiwei/httpclient/HttpClient.java
View file @
2a35dd02
...
@@ -9,6 +9,7 @@ import org.slf4j.LoggerFactory;
...
@@ -9,6 +9,7 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -32,7 +33,23 @@ public class HttpClient {
...
@@ -32,7 +33,23 @@ public class HttpClient {
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
return
null
;
return
null
;
}
}
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param url
* @param cookie
* @return
* @throws IOException
*/
public
static
String
executeHttpRequestGet
(
String
url
,
ProxyHolder
proxy
,
Map
<
String
,
String
>
headerMap
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
return
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
return
null
;
}
}
}
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
...
...
src/main/java/com/zhiwei/parse/Aika.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.AikaCommentAnalysis
;
import
com.zhiwei.parse.analysis.AikaCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -22,7 +23,7 @@ public class Aika {
...
@@ -22,7 +23,7 @@ public class Aika {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getAikaComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getAikaComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
commentId
=
getCommentId
(
url
);
String
commentId
=
getCommentId
(
url
);
if
(
nonNull
(
commentId
))
{
if
(
nonNull
(
commentId
))
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
...
...
src/main/java/com/zhiwei/parse/Aiqiyi.java
View file @
2a35dd02
...
@@ -29,7 +29,7 @@ public class Aiqiyi {
...
@@ -29,7 +29,7 @@ public class Aiqiyi {
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
for
(
int
i
=
1
;
i
<=
5
;
i
++)
{
int
count
=
dataList
.
size
();
int
count
=
dataList
.
size
();
String
url
=
"https://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg__t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_4_bitrate_"
;
String
url
=
"https://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg__t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_4_bitrate_"
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
...
...
src/main/java/com/zhiwei/parse/Chejia.java
View file @
2a35dd02
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.Date
;
...
@@ -17,6 +16,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -17,6 +16,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -34,7 +34,7 @@ public class Chejia {
...
@@ -34,7 +34,7 @@ public class Chejia {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
int
getChejiaCommentCount
(
String
url
,
Proxy
proxy
)
{
public
static
int
getChejiaCommentCount
(
String
url
,
Proxy
Holder
proxy
)
{
String
id
=
getCommentUrl
(
url
,
proxy
);
String
id
=
getCommentUrl
(
url
,
proxy
);
if
(
nonNull
(
id
))
{
if
(
nonNull
(
id
))
{
System
.
out
.
println
(
id
);
System
.
out
.
println
(
id
);
...
@@ -57,7 +57,7 @@ public class Chejia {
...
@@ -57,7 +57,7 @@ public class Chejia {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getChejiaComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getChejiaComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
nUrl
=
getCommentUrl
(
url
,
proxy
);
String
nUrl
=
getCommentUrl
(
url
,
proxy
);
if
(
nonNull
(
nUrl
))
{
if
(
nonNull
(
nUrl
))
{
int
page
=
1
;
int
page
=
1
;
...
@@ -98,7 +98,7 @@ public class Chejia {
...
@@ -98,7 +98,7 @@ public class Chejia {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
private
static
String
getCommentUrl
(
String
url
,
Proxy
proxy
)
{
private
static
String
getCommentUrl
(
String
url
,
Proxy
Holder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
objectID
=
response
.
body
().
string
().
split
(
"pvTrack.object = "
)[
1
].
split
(
";"
)[
0
].
replace
(
"\""
,
""
);
String
objectID
=
response
.
body
().
string
().
split
(
"pvTrack.object = "
)[
1
].
split
(
";"
)[
0
].
replace
(
"\""
,
""
);
return
"https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="
+
objectID
;
return
"https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="
+
objectID
;
...
...
src/main/java/com/zhiwei/parse/Fenghuang.java
View file @
2a35dd02
...
@@ -10,6 +10,7 @@ import java.util.Map;
...
@@ -10,6 +10,7 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.FenghuangAccountAnalysis
;
import
com.zhiwei.parse.analysis.FenghuangAccountAnalysis
;
...
@@ -64,7 +65,7 @@ public class Fenghuang {
...
@@ -64,7 +65,7 @@ public class Fenghuang {
* @param docUrl
* @param docUrl
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData
(
String
url
,
Proxy
Holder
proxy
)
{
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
if
(
url
==
null
)
{
if
(
url
==
null
)
{
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
...
@@ -92,7 +93,7 @@ public class Fenghuang {
...
@@ -92,7 +93,7 @@ public class Fenghuang {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData2
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData2
(
String
url
,
Proxy
Holder
proxy
)
{
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
if
(
url
==
null
)
{
if
(
url
==
null
)
{
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
...
@@ -118,7 +119,7 @@ public class Fenghuang {
...
@@ -118,7 +119,7 @@ public class Fenghuang {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
public
static
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
Holder
proxy
)
{
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
if
(
url
==
null
)
{
if
(
url
==
null
)
{
...
...
src/main/java/com/zhiwei/parse/Maimai.java
View file @
2a35dd02
...
@@ -9,6 +9,7 @@ import java.util.Collections;
...
@@ -9,6 +9,7 @@ import java.util.Collections;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -21,6 +22,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
...
@@ -21,6 +22,7 @@ import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -51,7 +53,7 @@ public class Maimai {
...
@@ -51,7 +53,7 @@ public class Maimai {
int
i
=
20
;
int
i
=
20
;
while
(
f
)
{
while
(
f
)
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getData
(
result
,
time
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getData
(
result
,
time
,
key
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
daList
!=
null
&&
!
daList
.
isEmpty
())
{
if
(
daList
!=
null
&&
!
daList
.
isEmpty
())
{
...
@@ -89,7 +91,7 @@ public class Maimai {
...
@@ -89,7 +91,7 @@ public class Maimai {
int
i
=
20
;
int
i
=
20
;
while
(
f
)
{
while
(
f
)
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getDataByNoName
(
result
,
time
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getDataByNoName
(
result
,
time
,
key
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
daList
!=
null
&&
daList
.
size
()
>
0
)
{
if
(
daList
!=
null
&&
daList
.
size
()
>
0
)
{
...
@@ -129,6 +131,40 @@ public class Maimai {
...
@@ -129,6 +131,40 @@ public class Maimai {
map
.
put
(
"gid"
,
data
.
getLong
(
"id"
));
map
.
put
(
"gid"
,
data
.
getLong
(
"id"
));
map
.
put
(
"title"
,
data
.
getString
(
"text"
));
map
.
put
(
"title"
,
data
.
getString
(
"text"
));
map
.
put
(
"author"
,
data
.
getString
(
"author"
));
map
.
put
(
"author"
,
data
.
getString
(
"author"
));
map
.
put
(
"userId"
,
data
.
getString
(
"mmid"
));
return
map
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 脉脉 转评攒 获取失败 {}"
,
e
);
}
return
Collections
.
emptyMap
();
}
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 获取脉脉转评赞
* @param url
* @param proxy
* @return
*/
public
static
Map
<
String
,
Object
>
getMaiaiCount
(
String
url
,
String
cookie
,
ProxyHolder
proxy
)
{
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
if
(
Objects
.
nonNull
(
cookie
)
&&
!
cookie
.
isEmpty
())
{
headers
.
put
(
"cookie"
,
cookie
);
}
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headers
),
proxy
)){
String
result
=
response
.
body
().
string
();
result
=
result
.
split
(
"JSON.parse\\(\""
)[
1
].
split
(
"\"\\);\\</script\\>"
)[
0
];
result
=
ZhiWeiTools
.
decodeUnicode
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
data
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"gossip"
);
map
.
put
(
"like"
,
data
.
getInteger
(
"likes"
));
map
.
put
(
"spreads"
,
data
.
getInteger
(
"spreads"
));
map
.
put
(
"cmts"
,
data
.
getInteger
(
"cmts"
));
map
.
put
(
"gid"
,
data
.
getLong
(
"id"
));
map
.
put
(
"title"
,
data
.
getString
(
"text"
));
map
.
put
(
"author"
,
data
.
getString
(
"author"
));
map
.
put
(
"userId"
,
data
.
getString
(
"mmid"
));
return
map
;
return
map
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
" 脉脉 转评攒 获取失败 {}"
,
e
);
logger
.
error
(
" 脉脉 转评攒 获取失败 {}"
,
e
);
...
@@ -144,9 +180,13 @@ public class Maimai {
...
@@ -144,9 +180,13 @@ public class Maimai {
* @return
* @return
*/
*/
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getMaimaiCommentList
(
String
url
,
ProxyHolder
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getMaimaiCommentList
(
String
url
,
String
cookie
,
ProxyHolder
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
Object
>
mmid
=
getMaiaiCount
(
url
,
proxy
);
Map
<
String
,
Object
>
mmid
=
getMaiaiCount
(
url
,
cookie
,
proxy
);
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
if
(
Objects
.
nonNull
(
cookie
)
&&
!
cookie
.
isEmpty
())
{
headers
.
put
(
"cookie"
,
cookie
);
}
if
(
mmid
!=
null
)
{
if
(
mmid
!=
null
)
{
String
gid
=
String
.
valueOf
(
mmid
.
get
(
"gid"
));
String
gid
=
String
.
valueOf
(
mmid
.
get
(
"gid"
));
boolean
more
=
true
;
boolean
more
=
true
;
...
@@ -154,7 +194,10 @@ public class Maimai {
...
@@ -154,7 +194,10 @@ public class Maimai {
while
(
more
)
{
while
(
more
)
{
try
{
try
{
String
link
=
"https://maimai.cn/sdk/web/gossip/getcmts?gid="
+
gid
+
"&page="
+
page
+
"&count=50&hotcmts_limit_count=100"
;
String
link
=
"https://maimai.cn/sdk/web/gossip/getcmts?gid="
+
gid
+
"&page="
+
page
+
"&count=50&hotcmts_limit_count=100"
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
link
),
proxy
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
link
,
headers
),
proxy
).
body
().
string
();
if
(
Objects
.
nonNull
(
cookie
)
&&
!
cookie
.
isEmpty
())
{
ZhiWeiTools
.
sleep
(
2000
);
}
if
(
htmlBody
!=
null
&&
htmlBody
.
length
()>
0
)
{
if
(
htmlBody
!=
null
&&
htmlBody
.
length
()>
0
)
{
JSONObject
dataJson
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
dataJson
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
commentJson
=
dataJson
.
getJSONArray
(
"comments"
);
JSONArray
commentJson
=
dataJson
.
getJSONArray
(
"comments"
);
...
@@ -184,4 +227,54 @@ public class Maimai {
...
@@ -184,4 +227,54 @@ public class Maimai {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
public
static
List
<
Map
<
String
,
Object
>>
getUserList
(
String
word
,
String
cookie
,
Proxy
proxy
)
{
String
url
=
"https://maimai.cn/search/contacts?count=50&query="
+
URLCodeUtil
.
getURLEncode
(
word
,
"utf-8"
)+
"&dist=0&searchTokens=&highlight=true&jsononly=1&pc=1&page="
;
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
System
.
out
.
println
(
url
);
headers
.
put
(
"cookie"
,
cookie
);
headers
.
put
(
"referer"
,
"https://maimai.cn/web/search_center?type=contact&query="
+
URLCodeUtil
.
getURLEncode
(
word
,
"utf-8"
)+
"&highlight=true"
);
int
page
=
0
;
while
(
true
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
+
page
,
headers
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"contacts"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
getUserMap
(
data
);
dataList
.
add
(
map
);
}
page
++;
logger
.
info
(
" 采集到 {} 页 ,一共采集到 {} 条"
,
page
,
dataList
.
size
());
ZhiWeiTools
.
sleep
(
2000
);
if
(
jsonArray
.
isEmpty
())
{
break
;
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
break
;
}
}
return
dataList
;
}
private
static
Map
<
String
,
Object
>
getUserMap
(
JSONObject
data
)
{
try
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
da
=
data
.
getJSONObject
(
"contact"
);
map
.
put
(
"id"
,
data
.
getString
(
"uid"
));
map
.
put
(
"name"
,
da
.
getString
(
"name"
));
map
.
put
(
"gender"
,
da
.
getInteger
(
"gender"
));
map
.
put
(
"url"
,
"https://maimai.cn/contact/detail/"
+
da
.
getString
(
"encode_mmid"
));
map
.
put
(
"rank"
,
da
.
getInteger
(
"rank"
));
map
.
put
(
"compos"
,
da
.
getString
(
"compos"
));
map
.
put
(
"city"
,
da
.
getString
(
"city"
));
return
map
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 脉脉用户解析出错 "
);
}
return
Collections
.
emptyMap
();
}
}
}
src/main/java/com/zhiwei/parse/Pcauto.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.PcautoCommentAnalysis
;
import
com.zhiwei.parse.analysis.PcautoCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -24,7 +25,7 @@ public class Pcauto {
...
@@ -24,7 +25,7 @@ public class Pcauto {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getPcAutoComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getPcAutoComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
newUrl
=
getCommentUrl
(
url
,
proxy
);
String
newUrl
=
getCommentUrl
(
url
,
proxy
);
if
(
nonNull
(
newUrl
))
{
if
(
nonNull
(
newUrl
))
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
...
@@ -53,7 +54,7 @@ public class Pcauto {
...
@@ -53,7 +54,7 @@ public class Pcauto {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
private
static
String
getCommentUrl
(
String
url
,
Proxy
proxy
)
{
private
static
String
getCommentUrl
(
String
url
,
Proxy
Holder
proxy
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"https://cmt.pcauto.com.cn/action/topic/get_data.jsp?url="
+
url
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"https://cmt.pcauto.com.cn/action/topic/get_data.jsp?url="
+
url
),
proxy
).
body
().
string
();
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
2a35dd02
...
@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
...
@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -19,13 +18,10 @@ import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
...
@@ -19,13 +18,10 @@ import com.zhiwei.parse.analysis.QQKBAccountAnalysis;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
QQKB
{
public
class
QQKB
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKB
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKB
.
class
);
private
static
QQKBAccountAnalysis
qqAccountAnalysis
=
new
QQKBAccountAnalysis
();
private
static
QQKBAccountAnalysis
qqAccountAnalysis
=
new
QQKBAccountAnalysis
();
private
static
QQKBCommentAnalysis
qqkbCommentAnalysis
=
new
QQKBCommentAnalysis
();
private
static
QQKBCommentAnalysis
qqkbCommentAnalysis
=
new
QQKBCommentAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
...
...
src/main/java/com/zhiwei/parse/QQKandian.java
View file @
2a35dd02
...
@@ -32,9 +32,9 @@ public class QQKandian {
...
@@ -32,9 +32,9 @@ public class QQKandian {
public
List
<
QQKandianUser
>
getUser
(
String
name
,
Proxy
proxy
)
{
public
List
<
QQKandianUser
>
getUser
(
String
name
,
Proxy
proxy
)
{
if
(
name
!=
null
&&
name
.
length
()
>
0
)
{
if
(
name
!=
null
&&
name
.
length
()
>
0
)
{
List
<
QQKandianUser
>
dataList
=
new
ArrayList
<
QQKandianUser
>();
List
<
QQKandianUser
>
dataList
=
new
ArrayList
<>();
OkHttpClient
okhttp
=
HttpClientBuilder
.
newInstance
();
OkHttpClient
okhttp
=
HttpClientBuilder
.
newInstance
();
Map
<
String
,
String
>
map
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
map
=
new
HashMap
<>();
map
.
put
(
"Host"
,
"sou.qq.com"
);
map
.
put
(
"Host"
,
"sou.qq.com"
);
map
.
put
(
"Referer"
,
"https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E7%9F%A5%E5%90%8D%E5%A4%A7V%E7%AB%A0%E6%96%87%E6%B6%89%E6%80%A7%E4%BE%B5"
);
map
.
put
(
"Referer"
,
"https://sou.qq.com/kandian/kd.html?_bid=3216&_wv=3&_wwv=1293&_wvSb=0&hotword=%E7%9F%A5%E5%90%8D%E5%A4%A7V%E7%AB%A0%E6%96%87%E6%B6%89%E6%80%A7%E4%BE%B5"
);
map
.
put
(
"Cookie"
,
"skey=MUzU7gdtRz; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6"
);
map
.
put
(
"Cookie"
,
"skey=MUzU7gdtRz; uin=o0497332654; RK=rNiJH0RBav; pgv_pvid=8990378504; pt2gguin=o0497332654; ptcz=062d936df33011f468637ee72be262a020a8df79977df7e7bde9c105b2b2ddf6"
);
...
...
src/main/java/com/zhiwei/parse/QicheHome.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
...
@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis
;
import
com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -21,7 +21,7 @@ public class QicheHome {
...
@@ -21,7 +21,7 @@ public class QicheHome {
private
static
QicheHomeKwyWordAnalysis
qicheHomeKwyWordAnalysis
=
new
QicheHomeKwyWordAnalysis
();
private
static
QicheHomeKwyWordAnalysis
qicheHomeKwyWordAnalysis
=
new
QicheHomeKwyWordAnalysis
();
public
static
List
<
Map
<
String
,
Object
>>
getQiCheComment
(
String
articleid
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQiCheComment
(
String
articleid
,
Proxy
Holder
proxy
)
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
int
page
=
1
;
int
page
=
1
;
int
count
=
2
;
int
count
=
2
;
...
@@ -35,7 +35,7 @@ public class QicheHome {
...
@@ -35,7 +35,7 @@ public class QicheHome {
}
}
bodyList
.
addAll
(
qicheHomeKwyWordAnalysis
.
getData
(
result
));
bodyList
.
addAll
(
qicheHomeKwyWordAnalysis
.
getData
(
result
));
logger
.
info
(
"采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}"
,
articleid
,
count
,
page
,
bodyList
.
size
());
logger
.
info
(
"采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}"
,
articleid
,
count
,
page
,
bodyList
.
size
());
ZhiWeiTools
.
sleep
(
30
00
);
ZhiWeiTools
.
sleep
(
2
00
);
if
(
page
>
count
)
{
if
(
page
>
count
)
{
break
;
break
;
}
}
...
...
src/main/java/com/zhiwei/parse/SinaKeji.java
View file @
2a35dd02
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
...
@@ -14,6 +13,7 @@ import org.slf4j.LoggerFactory;
...
@@ -14,6 +13,7 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.SinaKejiCommentAnalysis
;
import
com.zhiwei.parse.analysis.SinaKejiCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -30,7 +30,7 @@ public class SinaKeji {
...
@@ -30,7 +30,7 @@ public class SinaKeji {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSinaKejiComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSinaKejiComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
commentId
=
getCommentId
(
url
,
proxy
);
String
commentId
=
getCommentId
(
url
,
proxy
);
if
(
nonNull
(
commentId
))
{
if
(
nonNull
(
commentId
))
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
...
@@ -60,7 +60,7 @@ public class SinaKeji {
...
@@ -60,7 +60,7 @@ public class SinaKeji {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
private
static
String
getCommentId
(
String
url
,
Proxy
proxy
)
{
private
static
String
getCommentId
(
String
url
,
Proxy
Holder
proxy
)
{
String
commentId
=
null
;
String
commentId
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/SouBao.java
View file @
2a35dd02
This diff is collapsed.
Click to expand it.
src/main/java/com/zhiwei/parse/Souhu.java
View file @
2a35dd02
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.SouhuAccountAnalysis
;
import
com.zhiwei.parse.analysis.SouhuAccountAnalysis
;
...
@@ -34,7 +35,7 @@ public class Souhu {
...
@@ -34,7 +35,7 @@ public class Souhu {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
int
getSouhuCommentCount
(
String
url
,
Proxy
proxy
)
{
public
static
int
getSouhuCommentCount
(
String
url
,
Proxy
Holder
proxy
)
{
try
{
try
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
);
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
);
if
(
nonNull
(
newurl
))
{
if
(
nonNull
(
newurl
))
{
...
@@ -139,13 +140,13 @@ public class Souhu {
...
@@ -139,13 +140,13 @@ public class Souhu {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSouhuCommentData
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSouhuCommentData
(
String
url
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
int
j
=
1
;
int
j
=
1
;
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
ProxyFactory
.
getNatProxy
()
)
+
"&page_no="
+
j
;
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
)
+
"&page_no="
+
j
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
ProxyFactory
.
getNatProxy
(),
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
ProxyFactory
.
getNatProxy
(),
headerMap
);
System
.
out
.
println
(
newurl
);
System
.
out
.
println
(
newurl
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
...
...
src/main/java/com/zhiwei/parse/TechTx.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
...
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
...
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.TechTxCommentAnalysis
;
import
com.zhiwei.parse.analysis.TechTxCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -23,7 +24,7 @@ public class TechTx {
...
@@ -23,7 +24,7 @@ public class TechTx {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getTechTxComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getTechTxComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
commentID
=
getCommentId
(
url
,
proxy
);
String
commentID
=
getCommentId
(
url
,
proxy
);
String
next
=
""
;
String
next
=
""
;
if
(
nonNull
(
commentID
))
{
if
(
nonNull
(
commentID
))
{
...
@@ -53,7 +54,7 @@ public class TechTx {
...
@@ -53,7 +54,7 @@ public class TechTx {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
private
static
String
getCommentId
(
String
url
,
Proxy
proxy
)
{
private
static
String
getCommentId
(
String
url
,
Proxy
Holder
proxy
)
{
String
commentID
=
null
;
String
commentID
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/Wangyi.java
View file @
2a35dd02
...
@@ -10,6 +10,7 @@ import org.slf4j.Logger;
...
@@ -10,6 +10,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.WangyiCommentAnalysis
;
import
com.zhiwei.parse.analysis.WangyiCommentAnalysis
;
...
@@ -61,7 +62,7 @@ public class Wangyi {
...
@@ -61,7 +62,7 @@ public class Wangyi {
* @param id
* @param id
* @return
* @return
*/
*/
public
static
int
getWangyiCommentCount
(
String
id
,
Proxy
proxy
)
{
public
static
int
getWangyiCommentCount
(
String
id
,
Proxy
Holder
proxy
)
{
try
{
try
{
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
;
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
...
...
src/main/java/com/zhiwei/parse/Xueqiu.java
View file @
2a35dd02
...
@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
...
@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
...
src/main/java/com/zhiwei/parse/Yiche.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -14,9 +15,9 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -14,9 +15,9 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
okhttp3.Response
;
import
okhttp3.Response
;
public
class
Yiche
{
public
class
Yiche
{
...
@@ -31,7 +32,7 @@ public class Yiche {
...
@@ -31,7 +32,7 @@ public class Yiche {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
int
getYicheCount
(
String
url
,
Proxy
proxy
)
{
public
static
int
getYicheCount
(
String
url
,
Proxy
Holder
proxy
)
{
String
nurl
=
getnewsId
(
url
,
proxy
);
String
nurl
=
getnewsId
(
url
,
proxy
);
if
(
nonNull
(
nurl
))
{
if
(
nonNull
(
nurl
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
nurl
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
nurl
),
proxy
)){
...
@@ -52,7 +53,7 @@ public class Yiche {
...
@@ -52,7 +53,7 @@ public class Yiche {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getYicheComment
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getYicheComment
(
String
url
,
Proxy
Holder
proxy
)
{
String
nUrl
=
getnewsId
(
url
,
proxy
);
String
nUrl
=
getnewsId
(
url
,
proxy
);
if
(
nonNull
(
nUrl
))
{
if
(
nonNull
(
nUrl
))
{
int
page
=
1
;
int
page
=
1
;
...
@@ -92,7 +93,7 @@ public class Yiche {
...
@@ -92,7 +93,7 @@ public class Yiche {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
private
static
String
getnewsId
(
String
url
,
Proxy
proxy
)
{
private
static
String
getnewsId
(
String
url
,
Proxy
Holder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
String
productId
=
result
.
split
(
"productId: "
)[
1
].
split
(
","
)[
0
];
String
productId
=
result
.
split
(
"productId: "
)[
1
].
split
(
","
)[
0
];
...
...
src/main/java/com/zhiwei/parse/Youku.java
View file @
2a35dd02
...
@@ -56,8 +56,6 @@ public class Youku {
...
@@ -56,8 +56,6 @@ public class Youku {
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
" Exception {} "
,
e
);
logger
.
error
(
" Exception {} "
,
e
);
}
}
}
}
return
list
;
return
list
;
...
...
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
View file @
2a35dd02
...
@@ -13,14 +13,8 @@ import org.jsoup.select.Elements;
...
@@ -13,14 +13,8 @@ import org.jsoup.select.Elements;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
okhttp3.Response
;
public
class
AiqiyiByWordAnalysis
{
public
class
AiqiyiByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.Date
;
...
@@ -15,6 +14,7 @@ import com.alibaba.fastjson.JSONArray;
...
@@ -15,6 +14,7 @@ import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
...
@@ -25,7 +25,7 @@ public class FenghuangCommentAnalysis {
...
@@ -25,7 +25,7 @@ public class FenghuangCommentAnalysis {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
Holder
proxy
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
...
@@ -46,7 +46,7 @@ public class FenghuangCommentAnalysis {
...
@@ -46,7 +46,7 @@ public class FenghuangCommentAnalysis {
* @param url
* @param url
* @return
* @return
*/
*/
public
String
getdocUrl
(
String
url
,
Proxy
proxy
)
{
public
String
getdocUrl
(
String
url
,
Proxy
Holder
proxy
)
{
String
docUrl
=
null
;
String
docUrl
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
...
@@ -79,7 +79,7 @@ public class FenghuangCommentAnalysis {
...
@@ -79,7 +79,7 @@ public class FenghuangCommentAnalysis {
* @param url
* @param url
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
...
@@ -106,7 +106,7 @@ public class FenghuangCommentAnalysis {
...
@@ -106,7 +106,7 @@ public class FenghuangCommentAnalysis {
* @param proxy
* @param proxy
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getData2
(
String
url
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getData2
(
String
url
,
Proxy
Holder
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
...
...
src/main/java/com/zhiwei/parse/analysis/MaimaiBywordAnalysis.java
View file @
2a35dd02
...
@@ -11,17 +11,17 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -11,17 +11,17 @@ import com.alibaba.fastjson.JSONObject;
public
class
MaimaiBywordAnalysis
{
public
class
MaimaiBywordAnalysis
{
public
Map
<
String
,
Object
>
getData
(
String
result
,
String
time
)
{
public
Map
<
String
,
Object
>
getData
(
String
result
,
String
time
,
String
key
)
{
Map
<
String
,
Object
>
map1
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map1
=
new
HashMap
<>();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"feeds"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"feeds"
);
boolean
f
=
true
;
boolean
f
=
true
;
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
f
=
json
.
getJSONObject
(
"data"
).
getInteger
(
"more"
)==
1
?
true
:
false
;
f
=
json
.
getJSONObject
(
"data"
).
getInteger
(
"more"
)==
1
?
true
:
false
;
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
url
=
"https://maimai.cn/article/detail?fid="
+
data
.
getJSONObject
(
"feed"
).
getString
(
"id"
);
String
url
=
"https://maimai.cn/article/detail?fid="
+
data
.
getJSONObject
(
"feed"
).
getString
(
"id"
)
+
"&efid="
+
data
.
getString
(
"efid"
)
;
String
atime
=
data
.
getJSONObject
(
"feed"
).
getString
(
"crtime_string"
);
String
atime
=
data
.
getJSONObject
(
"feed"
).
getString
(
"crtime_string"
);
if
(
time
.
compareTo
(
atime
)
>
-
1
)
{
if
(
time
.
compareTo
(
atime
)
>
-
1
)
{
f
=
false
;
f
=
false
;
...
@@ -34,6 +34,8 @@ public class MaimaiBywordAnalysis {
...
@@ -34,6 +34,8 @@ public class MaimaiBywordAnalysis {
map
.
put
(
"like"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"likes"
));
map
.
put
(
"like"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"likes"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"spreads"
));
//传播数
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"spreads"
));
//传播数
map
.
put
(
"career"
,
data
.
getJSONObject
(
"contact"
).
getString
(
"career"
));
map
.
put
(
"word"
,
key
);
// System.out.println(map.toString());
// System.out.println(map.toString());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
@@ -42,7 +44,7 @@ public class MaimaiBywordAnalysis {
...
@@ -42,7 +44,7 @@ public class MaimaiBywordAnalysis {
return
map1
;
return
map1
;
}
}
public
Map
<
String
,
Object
>
getDataByNoName
(
String
result
,
String
time
)
{
public
Map
<
String
,
Object
>
getDataByNoName
(
String
result
,
String
time
,
String
key
)
{
Map
<
String
,
Object
>
map1
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map1
=
new
HashMap
<
String
,
Object
>();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"gossips"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"gossips"
);
...
@@ -65,6 +67,7 @@ public class MaimaiBywordAnalysis {
...
@@ -65,6 +67,7 @@ public class MaimaiBywordAnalysis {
map
.
put
(
"like"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"likes"
));
map
.
put
(
"like"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"likes"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"search_order"
));
//传播数
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"search_order"
));
//传播数
map
.
put
(
"word"
,
key
);
// System.out.println(map.toString());
// System.out.println(map.toString());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
...
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
View file @
2a35dd02
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
...
@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -27,7 +27,7 @@ public class SouhuCommentAnalysis {
...
@@ -27,7 +27,7 @@ public class SouhuCommentAnalysis {
* @param url
* @param url
* @return
* @return
*/
*/
public
String
getSouhuURL
(
String
url
,
Proxy
proxy
)
{
public
String
getSouhuURL
(
String
url
,
Proxy
Holder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
String
source_id
=
result
.
split
(
"news_id: \""
)[
1
].
split
(
"\","
)[
0
];
String
source_id
=
result
.
split
(
"news_id: \""
)[
1
].
split
(
"\","
)[
0
];
...
@@ -39,7 +39,7 @@ public class SouhuCommentAnalysis {
...
@@ -39,7 +39,7 @@ public class SouhuCommentAnalysis {
return
null
;
return
null
;
}
}
public
int
getSouhuCommentCount
(
String
url
,
Proxy
proxy
)
{
public
int
getSouhuCommentCount
(
String
url
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
int
i
;
int
i
;
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/shipin/QQTV.java
View file @
2a35dd02
...
@@ -4,7 +4,6 @@ import java.net.Proxy;
...
@@ -4,7 +4,6 @@ import java.net.Proxy;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -55,7 +54,7 @@ public class QQTV {
...
@@ -55,7 +54,7 @@ public class QQTV {
String
nurl
=
element
.
select
(
"h2.result_title"
).
select
(
"a"
).
attr
(
"href"
);
String
nurl
=
element
.
select
(
"h2.result_title"
).
select
(
"a"
).
attr
(
"href"
);
Map
<
String
,
Object
>
map
=
getUrlData
(
nurl
,
ProxyFactory
.
getNatProxy
());
Map
<
String
,
Object
>
map
=
getUrlData
(
nurl
,
ProxyFactory
.
getNatProxy
());
if
(
Objects
.
nonNull
(
map
)
&&
time
.
compareTo
(
String
.
valueOf
(
map
.
get
(
"time"
)))
<
1
)
{
if
(
Objects
.
nonNull
(
map
)
&&
time
.
compareTo
(
String
.
valueOf
(
map
.
get
(
"time"
)))
<
1
)
{
System
.
out
.
println
(
map
.
toString
());
//
System.out.println(map.toString());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
...
@@ -64,6 +63,7 @@ public class QQTV {
...
@@ -64,6 +63,7 @@ public class QQTV {
if
(
count
!=
dataList
.
size
())
{
if
(
count
!=
dataList
.
size
())
{
continue
;
continue
;
}
}
break
;
break
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
" 数据采集出错 {} "
,
e
);
logger
.
error
(
" 数据采集出错 {} "
,
e
);
...
...
src/main/java/com/zhiwei/parse/shipin/SohuTV.java
View file @
2a35dd02
...
@@ -35,6 +35,7 @@ public class SohuTV {
...
@@ -35,6 +35,7 @@ public class SohuTV {
headers
.
put
(
"cookie"
,
cookie
);
headers
.
put
(
"cookie"
,
cookie
);
while
(
true
)
{
while
(
true
)
{
int
count
=
dataList
.
size
();
int
count
=
dataList
.
size
();
System
.
out
.
println
(
url
+
page
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
+
page
,
headers
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
+
page
,
headers
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
Document
document
=
Jsoup
.
parse
(
result
);
Document
document
=
Jsoup
.
parse
(
result
);
...
...
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
View file @
2a35dd02
//package com.zhiwei.Comment;
package
com
.
zhiwei
.
Comment
;
//
//import java.util.ArrayList;
import
java.util.ArrayList
;
//import java.util.List;
import
java.util.List
;
//import java.util.Map;
import
java.util.Map
;
//
//import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//import com.zhiwei.common.config.GroupType;
import
com.zhiwei.common.config.GroupType
;
//import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
//import com.zhiwei.parse.Maimai;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//import com.zhiwei.parse.Yiche;
import
com.zhiwei.parse.Maimai
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//
//public class MaimaiCommentCountTest {
public
class
MaimaiCommentCountTest
{
// @Test
@Test
// public void f() {
public
void
f
()
{
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
// GroupType.PROVIDER);
GroupType
.
PROVIDER
);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
// Map<String, Object> map = poi
Map
<
String
,
Object
>
map
=
poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", 0);
.
importExcel
(
"C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx"
,
0
);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
String
cookie
=
"_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"rhItcea5qkO6WCSnVcczW/NRVLLCTsq3kQbpUCGAwQ0ceLunVJRjT5rgoFVYrIBA8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAyMTY3MDQ5LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=zbs4cHtzTcHWvjtkpjAZmoqLXsQ"
;
// List<String> headList = (List<String>) map.get("head");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
// for (Map<String, Object> map1 : list) {
List
<
String
>
headList
=
(
List
<
String
>)
map
.
get
(
"head"
);
// String url = map1.get("地址") + "";
for
(
Map
<
String
,
Object
>
map1
:
list
)
{
// Map<String,Object> map3 = Maimai.getMaiaiCount(url, ProxyFactory.getNatProxy());
String
url
=
map1
.
get
(
"地址"
)
+
""
;
// map1.putAll(map3);
Map
<
String
,
Object
>
map3
=
Maimai
.
getMaiaiCount
(
url
,
null
,
ProxyHolder
.
NAT_PROXY
);
// ZhiWeiTools.sleep(100);
System
.
out
.
println
(
map3
.
toString
());
// }
System
.
out
.
println
(
url
);
// headList.add("like");
map1
.
putAll
(
map3
);
// headList.add("spreads");
ZhiWeiTools
.
sleep
(
500
);
// headList.add("cmts");
System
.
out
.
println
(
"--------------------------"
);
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", "评论采集", headList,
}
// list);
headList
.
add
(
"like"
);
// }
headList
.
add
(
"spreads"
);
//}
headList
.
add
(
"cmts"
);
poi
.
exportExcel
(
"C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx"
,
"评论采集"
,
headList
,
list
);
}
}
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
View file @
2a35dd02
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Aiqiyi
;
//
import com.zhiwei.parse.Aiqiyi;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
AiqiyiByWordExample
{
//
public class AiqiyiByWordExample {
//
//
@Test
//
@Test
public
void
aiqiyiByWordTest
()
{
//
public void aiqiyiByWordTest() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
String
w
:
wordList
)
{
//
for(String w : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
,
null
);
//
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
//
if(dataList != null && dataList.size() >= 1) {
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
}
//
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"count"
);
//
headList.add("count");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"title"
);
//
headList.add("title");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata/爱奇艺关键词采集.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList);
//
//
//
}
//
}
//
//
//
}
//
}
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
View file @
2a35dd02
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -11,8 +12,8 @@ public class MaimaiBywordExample {
...
@@ -11,8 +12,8 @@ public class MaimaiBywordExample {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
String
word
=
"美团|某团|MT|大众点评|新美大|美团点评"
;
String
word
=
"美团|某团|MT|大众点评|新美大|美团点评"
;
String
cookie
=
"_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=
3oatshv55and4kwcz9gdpie7qdpj27yt; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHxwdGRMcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1548984672861; token=\"Ap1u6QzIdn8FCrohEAEPI86n9mNSKk1qJWlauQ8KeSbn7fDKTu6bN2Yv6B9V19nO8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoibVVlSlRTUW1NdVdUTUUtRjV0SjBZbExtIiwibWlkNDU2ODc2MCI6ZmFsc2UsInN0YXR1cyI6dHJ1ZSwiX2V4cGlyZSI6MTU0OTA3MTEzOTA2NywiX21heEFnZSI6ODY0MDAwMDB9; session.sig=UOz44C2rF-uJFxFvSwHyII5aJxM
"
;
String
cookie
=
"_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=
lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"G8eNNNylPoi3oIPLUr/d/RDaMgtnpZCskxT7wu1pRRrkiy3J8G7StHgTx9DQBq4O8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAwNjY1Njg4LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=b_tga85tZskxsgKX8YIM_JKByi0
"
;
String
time
=
"2019-0
1-24
00:00:00"
;
String
time
=
"2019-0
2-15
00:00:00"
;
String
[]
words
=
word
.
split
(
"\\|"
);
String
[]
words
=
word
.
split
(
"\\|"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
...
@@ -23,16 +24,9 @@ public class MaimaiBywordExample {
...
@@ -23,16 +24,9 @@ public class MaimaiBywordExample {
// bodyList.addAll(c);
// bodyList.addAll(c);
bodyList
.
addAll
(
c2
);
bodyList
.
addAll
(
c2
);
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
Arrays
.
asList
(
"time"
,
"url"
,
"text"
,
"name"
,
"like"
,
"comment_count"
,
"spreads"
,
"word"
);
headList
.
add
(
"time"
);
headList
.
add
(
"url"
);
headList
.
add
(
"text"
);
headList
.
add
(
"name"
);
headList
.
add
(
"like"
);
headList
.
add
(
"comment_count"
);
headList
.
add
(
"spreads"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-02
01
.xlsx"
,
"脉脉关键词"
,
headList
,
bodyList
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-02
22
.xlsx"
,
"脉脉关键词"
,
headList
,
bodyList
);
}
}
}
}
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
View file @
2a35dd02
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -8,13 +7,14 @@ import org.junit.Test;
...
@@ -8,13 +7,14 @@ import org.junit.Test;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SouhuCommentCountExample
{
public
class
SouhuCommentCountExample
{
@SuppressWarnings
(
"unchecked"
)
@Test
@Test
public
void
souhuCommentCountTest
()
{
public
void
souhuCommentCountTest
()
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
...
@@ -28,7 +28,7 @@ public class SouhuCommentCountExample {
...
@@ -28,7 +28,7 @@ public class SouhuCommentCountExample {
try
{
try
{
url
=
map1
.
get
(
"url"
)+
""
;
url
=
map1
.
get
(
"url"
)+
""
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
Proxy
Factory
.
getNatProxy
()
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
Proxy
Holder
.
NAT_PROXY
);
map1
.
put
(
"count"
,
i
);
map1
.
put
(
"count"
,
i
);
System
.
out
.
println
(
map1
.
toString
());
System
.
out
.
println
(
map1
.
toString
());
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/test/java/com/zhiwei/keyword/YoukuKeyWordTest.java
deleted
100644 → 0
View file @
b3d545a3
package
com
.
zhiwei
.
keyword
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Youku
;
public
class
YoukuKeyWordTest
{
@Test
public
void
f
()
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
String
word
=
"帮宝适 二噁英,"
+
"帮宝适 二恶英,"
+
"帮宝适 甲醛,"
+
"帮宝适 荧光,"
+
"帮宝适 致癌,"
+
"帮宝适 有毒,"
+
"帮宝适 超标,"
+
"帮宝适 防腐剂,"
+
"帮宝适 起诉,"
+
"帮宝适 伤害,"
+
"帮宝适 气味,"
+
"帮宝适 异味,"
+
"帮宝适 起坨,"
+
"帮宝适 异物,"
+
"帮宝适 漏,"
+
"帮宝适 刺鼻,"
+
"帮宝适 勒,"
+
"帮宝适 脱皮,"
+
"帮宝适 划伤,"
+
"绿帮 二噁英,"
+
"绿帮 二恶英,"
+
"绿帮 甲醛,"
+
"绿帮 荧光,"
+
"绿帮 致癌,"
+
"绿帮 有毒,"
+
"绿帮 超标,"
+
"绿帮 起诉,"
+
"绿帮 气味,"
+
"绿帮 异味,"
+
"绿帮 异物,"
+
"绿帮 漏,"
+
"绿帮 刺鼻,"
+
"绿帮 勒,"
+
"绿帮 脱皮"
;
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
String
[]
words
=
word
.
split
(
","
);
for
(
String
w
:
words
)
{
System
.
out
.
println
(
w
);
bodyList
.
addAll
(
Youku
.
getDataList
(
w
));
}
List
<
String
>
headList
=
new
ArrayList
<>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"url"
);
headList
.
add
(
"uper"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"C:\\Users\\byte-zbs\\Desktop\\tx\\优酷数据-txh-0121.xlsx"
,
"数据"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/shipin/BilibiliTest.java
View file @
2a35dd02
...
@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile;
...
@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile;
public
class
BilibiliTest
{
public
class
BilibiliTest
{
@Test
@Test
public
void
f
()
{
public
void
f
()
{
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词
-2
.txt"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
...
@@ -32,7 +32,7 @@ public class BilibiliTest {
...
@@ -32,7 +32,7 @@ public class BilibiliTest {
headlist
.
add
(
"title"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"url"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-txh-0
130
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-txh-0
219-农药
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
}
}
}
}
src/test/java/com/zhiwei/shipin/DouyinHotExample.java
View file @
2a35dd02
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Douyin
;
//
import com.zhiwei.parse.Douyin;
//
public
class
DouyinHotExample
{
//
public class DouyinHotExample {
//
public
static
void
main
(
String
[]
args
)
{
//
public static void main(String[] args) {
//
List
<
Map
<
String
,
Object
>>
bodyList
=
Douyin
.
getDouyinHotData
(
"https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501"
,
null
);
//
List<Map<String,Object>> bodyList = Douyin.getDouyinHotData("https://www.iesdouyin.com/share/challenge/1604239741363223?utm_campaign=client_share&app=aweme&utm_medium=ios&tt_from=qq&utm_source=qq&iid=36454376501",null);
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"text"
);
//
headList.add("text");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"author"
);
//
headList.add("author");
headList
.
add
(
"comment_count"
);
//
headList.add("comment_count");
headList
.
add
(
"like_count"
);
//
headList.add("like_count");
headList
.
add
(
"share_count"
);
//
headList.add("share_count");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D:\\crawlerdata\\抖音热门采集测试.xlsx"
,
"asd"
,
headList
,
bodyList
);
//
poi.exportExcel("D:\\crawlerdata\\抖音热门采集测试.xlsx", "asd", headList, bodyList);
}
//
}
//
}
//
}
src/test/java/com/zhiwei/shipin/QQTVTest.java
View file @
2a35dd02
...
@@ -11,7 +11,6 @@ import com.zhiwei.crawler.proxy.ProxyFactory;
...
@@ -11,7 +11,6 @@ import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.shipin.QQTV
;
import
com.zhiwei.parse.shipin.QQTV
;
import
com.zhiwei.parse.shipin.SohuTV
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.util.WordReadFile
;
import
com.zhiwei.util.WordReadFile
;
...
@@ -19,7 +18,7 @@ public class QQTVTest {
...
@@ -19,7 +18,7 @@ public class QQTVTest {
@Test
@Test
public
void
f
()
{
public
void
f
()
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
String
time
=
"
2018
-07-01 00:00:00"
;
String
time
=
"
1970
-07-01 00:00:00"
;
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
...
...
src/test/java/com/zhiwei/shipin/SohuTVTest.java
View file @
2a35dd02
package
com
.
zhiwei
.
shipin
;
//package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//import java.util.ArrayList;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
org.testng.annotations.Test
;
//import org.testng.annotations.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.BiliBili
;
//import com.zhiwei.parse.shipin.SohuTV;
import
com.zhiwei.parse.shipin.SohuTV
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//import com.zhiwei.util.WordReadFile;
import
com.zhiwei.util.WordReadFile
;
//
//public class SohuTVTest {
public
class
SohuTVTest
{
// @Test
@Test
// public void f() {
public
void
f
()
{
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
// List<Map<String, Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
// String cookie = "SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321";
String
cookie
=
"SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321"
;
// for (String word : wordList) {
for
(
String
word
:
wordList
)
{
// List<Map<String, Object>> dataList = SohuTV.sohuTVData(word, cookie, null);
List
<
Map
<
String
,
Object
>>
dataList
=
SohuTV
.
sohuTVData
(
word
,
cookie
,
null
);
// if (dataList != null) {
if
(
dataList
!=
null
)
{
// System.out.println(word + " ----- " + dataList.size());
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
// bodyList.addAll(dataList);
bodyList
.
addAll
(
dataList
);
// }
}
// ZhiWeiTools.sleep(1000);
ZhiWeiTools
.
sleep
(
1000
);
// }
}
// List<String> headlist = new ArrayList<>();
List
<
String
>
headlist
=
new
ArrayList
<>();
// headlist.add("playCount");
headlist
.
add
(
"playCount"
);
// headlist.add("time");
headlist
.
add
(
"time"
);
// headlist.add("source");
headlist
.
add
(
"source"
);
// headlist.add("title");
headlist
.
add
(
"title"
);
// headlist.add("url");
headlist
.
add
(
"url"
);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// poi.exportExcel("D://crawlerdata//搜狐视频关键词采集数据-txh-0219.xlsx", "B站数据", headlist, bodyList);
poi
.
exportExcel
(
"D://crawlerdata//搜狐视频关键词采集数据-txh-0130.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
//
// }
}
//}
}
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
0 → 100644
View file @
2a35dd02
//package com.zhiwei.shipin;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Youku;
//import com.zhiwei.util.WordReadFile;
//
//public class YoukuKeyWordTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// List<String> words = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String w : words) {
// System.out.println(w);
// bodyList.addAll(Youku.getDataList(w));
// }
// List<String> headList = new ArrayList<>();
// headList.add("title");
// headList.add("time");
// headList.add("url");
// headList.add("uper");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//优酷数据-txh-0219.xlsx", "数据", headList, bodyList);
//
// }
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment