Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
d979d793
Commit
d979d793
authored
Jan 24, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
脉脉 评论采集 和部分视频采集
parent
1116d3c5
Hide whitespace changes
Inline
Side-by-side
Showing
51 changed files
with
1126 additions
and
622 deletions
+1126
-622
pom.xml
+1
-1
src/main/java/com/zhiwei/httpclient/HeadGet.java
+7
-27
src/main/java/com/zhiwei/httpclient/HttpClient.java
+12
-9
src/main/java/com/zhiwei/parse/Aiqiyi.java
+1
-1
src/main/java/com/zhiwei/parse/BiliBili.java
+8
-7
src/main/java/com/zhiwei/parse/Chejia.java
+111
-0
src/main/java/com/zhiwei/parse/Douyin.java
+1
-0
src/main/java/com/zhiwei/parse/Gftai.java
+0
-1
src/main/java/com/zhiwei/parse/Maimai.java
+113
-5
src/main/java/com/zhiwei/parse/PearVideo.java
+17
-10
src/main/java/com/zhiwei/parse/QQKB.java
+20
-11
src/main/java/com/zhiwei/parse/QQKandian.java
+0
-3
src/main/java/com/zhiwei/parse/QicheHome.java
+0
-1
src/main/java/com/zhiwei/parse/SinaTousu.java
+0
-1
src/main/java/com/zhiwei/parse/SouBao.java
+1
-0
src/main/java/com/zhiwei/parse/Souhu.java
+4
-3
src/main/java/com/zhiwei/parse/Toutiao.java
+0
-4
src/main/java/com/zhiwei/parse/Xueqiu.java
+23
-2
src/main/java/com/zhiwei/parse/Yiche.java
+1
-0
src/main/java/com/zhiwei/parse/Youku.java
+67
-0
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
+10
-4
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
+7
-3
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
+19
-18
src/main/java/com/zhiwei/parse/analysis/DayuCommentAnalysis.java
+64
-68
src/main/java/com/zhiwei/parse/analysis/DoubanCommentAnalysis.java
+0
-2
src/main/java/com/zhiwei/parse/analysis/DouyinHotDataAnalysis.java
+0
-4
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
+8
-3
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
+24
-31
src/main/java/com/zhiwei/parse/analysis/MaimaiBywordAnalysis.java
+2
-6
src/main/java/com/zhiwei/parse/analysis/MeipaiByWordAnalysis.java
+0
-1
src/main/java/com/zhiwei/parse/analysis/QQKBCommentAnalysis.java
+13
-16
src/main/java/com/zhiwei/parse/analysis/QicheHomeKwyWordAnalysis.java
+0
-4
src/main/java/com/zhiwei/parse/analysis/WangyiHistoryAnalysis.java
+3
-2
src/main/resources/log4j.properties
+2
-1
src/test/java/com/zhiwei/Comment/ChejiaCommentCountTest.java
+42
-0
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
+40
-0
src/test/java/com/zhiwei/Comment/XueqiuCommentCountTest.java
+48
-0
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
+42
-42
src/test/java/com/zhiwei/crawler/BaijiaAccountExample.java
+89
-89
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
+50
-50
src/test/java/com/zhiwei/crawler/DayuByWordExample.java
+25
-25
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
+10
-7
src/test/java/com/zhiwei/crawler/PearVideoByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
+5
-2
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
+30
-4
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
+5
-2
src/test/java/com/zhiwei/hsitory/QQkandianHistoryExample.java
+42
-42
src/test/java/com/zhiwei/keyword/GftaiTest.java
+33
-33
src/test/java/com/zhiwei/keyword/KuaiTousuTest.java
+38
-38
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
+38
-38
src/test/java/com/zhiwei/keyword/YoukuKeyWordTest.java
+49
-0
No files found.
pom.xml
View file @
d979d793
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
articlenewscrawler
</artifactId>
<artifactId>
articlenewscrawler
</artifactId>
<version>
0.0.
4
-SNAPSHOT
</version>
<version>
0.0.
8
-SNAPSHOT
</version>
<name>
articlenewscrawler
</name>
<name>
articlenewscrawler
</name>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
...
...
src/main/java/com/zhiwei/httpclient/HeadGet.java
View file @
d979d793
package
com
.
zhiwei
.
httpclient
;
package
com
.
zhiwei
.
httpclient
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.InetSocketAddress
;
import
java.net.Proxy
;
import
java.net.SocketAddress
;
import
java.net.URLEncoder
;
import
java.net.Proxy.Type
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
public
class
HeadGet
{
public
class
HeadGet
{
/**
/**
...
@@ -409,12 +401,10 @@ public class HeadGet {
...
@@ -409,12 +401,10 @@ public class HeadGet {
* @return
* @return
*/
*/
public
static
Map
<
String
,
String
>
getPearVideoByWordHeaderMap
(
String
cookie
)
{
public
static
Map
<
String
,
String
>
getPearVideoByWordHeaderMap
(
String
cookie
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"Host"
,
"www.pearvideo.com"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9,en;q=0.8"
);
headerMap
.
put
(
"Connection"
,
"keep-alive"
);
headerMap
.
put
(
"Accept"
,
"text/html, */*; q=0.01"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
);
headerMap
.
put
(
"Accept"
,
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
);
if
(
cookie
!=
null
)
{
if
(
cookie
!=
null
)
{
headerMap
.
put
(
"Cookie"
,
cookie
);
headerMap
.
put
(
"Cookie"
,
cookie
);
}
}
...
@@ -492,8 +482,8 @@ public class HeadGet {
...
@@ -492,8 +482,8 @@ public class HeadGet {
*/
*/
public
static
Map
<
String
,
String
>
getQQKBCommentHeaderMap
(
String
cookie
)
{
public
static
Map
<
String
,
String
>
getQQKBCommentHeaderMap
(
String
cookie
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"User-Agent"
,
//
headerMap.put("User-Agent",
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)"
);
//
"天天快报 4.6.2 qnreading (iPhone8,1; iOS 11.2.1; zh_CN; 4.6.2.89)");
headerMap
.
put
(
"Accept"
,
headerMap
.
put
(
"Accept"
,
"*/*"
);
"*/*"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-Hans-CN;q=1"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-Hans-CN;q=1"
);
...
@@ -514,7 +504,7 @@ public class HeadGet {
...
@@ -514,7 +504,7 @@ public class HeadGet {
* @return
* @return
*/
*/
public
static
Map
<
String
,
Object
>
getQQKBCommentParamMap
(
String
comment_id
,
String
article_id
){
public
static
Map
<
String
,
Object
>
getQQKBCommentParamMap
(
String
comment_id
,
String
article_id
){
Map
<
String
,
Object
>
param
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
param
=
new
HashMap
<>();
param
.
put
(
"chlid"
,
"daily_timeline"
);
param
.
put
(
"chlid"
,
"daily_timeline"
);
param
.
put
(
"comment_id"
,
comment_id
);
param
.
put
(
"comment_id"
,
comment_id
);
param
.
put
(
"page"
,
1
);
param
.
put
(
"page"
,
1
);
...
@@ -944,15 +934,5 @@ public class HeadGet {
...
@@ -944,15 +934,5 @@ public class HeadGet {
return
paramMap
;
return
paramMap
;
}
}
public
static
void
main
(
String
[]
args
)
throws
UnsupportedEncodingException
{
String
url
=
"http://180.186.38.200/rest/n/feed/profile2"
;
System
.
out
.
println
(
url
);
String
cookie
=
""
;
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getKuaishouParamMap
();
String
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
null
,
headerMap
,
paramMap
);
System
.
out
.
println
(
result
);
System
.
out
.
println
(
result
.
length
());
}
}
}
src/main/java/com/zhiwei/httpclient/HttpClient.java
View file @
d979d793
...
@@ -7,11 +7,16 @@ import java.util.Map;
...
@@ -7,11 +7,16 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.tools.httpclient.HttpClientTemplateOK
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
okhttp3.Response
;
public
class
HttpClient
{
public
class
HttpClient
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @Description (TODO这里用一句话描述这个方法的作用)
...
@@ -21,22 +26,20 @@ public class HttpClient {
...
@@ -21,22 +26,20 @@ public class HttpClient {
* @throws IOException
* @throws IOException
*/
*/
public
static
String
executeHttpRequestGet
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
)
{
public
static
String
executeHttpRequestGet
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
)
{
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
String
result
=
HttpClientTemplateOK
.
get
(
url
,
proxy
,
headerMap
);
return
response
.
body
().
string
();
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
()
);
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
return
null
;
return
null
;
}
}
}
}
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
headerMap
,
paramMap
),
proxy
)){
String
result
=
HttpClientTemplateOK
.
post
(
url
,
proxy
,
headerMap
,
paramMap
);
return
response
.
body
().
string
();
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
()
);
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
return
null
;
return
null
;
}
}
...
...
src/main/java/com/zhiwei/parse/Aiqiyi.java
View file @
d979d793
...
@@ -28,7 +28,7 @@ public class Aiqiyi {
...
@@ -28,7 +28,7 @@ public class Aiqiyi {
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap1
=
HeadGet
.
getAiqiyiHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap1
=
HeadGet
.
getAiqiyiHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
String
url
=
"http://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"
;
String
url
=
"http://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"
;
...
...
src/main/java/com/zhiwei/parse/BiliBili.java
View file @
d979d793
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -23,7 +23,7 @@ import okhttp3.Request;
...
@@ -23,7 +23,7 @@ import okhttp3.Request;
public
class
BiliBili
{
public
class
BiliBili
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BiliBili
.
class
);
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
BiliBili
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
...
@@ -46,6 +46,7 @@ public class BiliBili {
...
@@ -46,6 +46,7 @@ public class BiliBili {
while
(
more
)
{
while
(
more
)
{
map
.
clear
();
map
.
clear
();
String
ur
=
url
+
"&page="
+
n
;
String
ur
=
url
+
"&page="
+
n
;
System
.
out
.
println
(
ur
);
request
=
HttpRequestBuilder
.
newGetRequest
(
ur
,
header
);
request
=
HttpRequestBuilder
.
newGetRequest
(
ur
,
header
);
String
result2
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
String
result2
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
);
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
);
...
@@ -60,13 +61,13 @@ public class BiliBili {
...
@@ -60,13 +61,13 @@ public class BiliBili {
}
}
return
bodyList
;
return
bodyList
;
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"e "
,
e
);
}
catch
(
IO
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
(
);
logger
.
error
(
"e "
,
e
);
}
}
return
null
;
return
Collections
.
emptyList
()
;
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
...
@@ -88,7 +89,7 @@ public class BiliBili {
...
@@ -88,7 +89,7 @@ public class BiliBili {
headlist
.
add
(
"title"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"url"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-
竹鼠
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-
txh
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
}
}
...
...
src/main/java/com/zhiwei/parse/Chejia.java
0 → 100644
View file @
d979d793
package
com
.
zhiwei
.
parse
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
Chejia
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
Chejia
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
*
* @Description 车家 号 评论数
* @param url
* @param proxy
* @return
*/
public
static
int
getChejiaCommentCount
(
String
url
,
Proxy
proxy
)
{
String
id
=
getCommentUrl
(
url
,
proxy
);
if
(
nonNull
(
id
))
{
System
.
out
.
println
(
id
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
id
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getInteger
(
"commentcount"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"error {} "
,
e
);
}
}
return
-
1
;
}
/**
*
* @Description 车家 号 评论数
* @param url
* @param proxy
* @return
*/
public
static
List
<
Map
<
String
,
Object
>>
getChejiaComment
(
String
url
,
Proxy
proxy
)
{
String
nUrl
=
getCommentUrl
(
url
,
proxy
);
if
(
nonNull
(
nUrl
))
{
int
page
=
1
;
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
boolean
f
=
true
;
while
(
f
)
{
String
surl
=
nUrl
+
"&page="
+
page
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
surl
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"commentlist"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"source"
,
data
.
getString
(
"RMemberName"
));
String
time
=
data
.
getString
(
"RReplyDate"
);
time
=
time
.
split
(
"/Date\\("
)[
1
].
split
(
"\\+"
)[
0
];
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
Long
.
parseLong
(
time
)),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"content"
,
data
.
getString
(
"RContent"
));
map
.
put
(
"like"
,
data
.
get
(
"RUp"
));
map
.
put
(
"id"
,
data
.
getString
(
"ReplyId"
));
bodyList
.
add
(
map
);
}
int
total
=
json
.
getInteger
(
"commentcount"
);
logger
.
info
(
" 一共采集 了 {} 条 采集到 {} 页 一共有 {} 条"
,
bodyList
.
size
(),
page
,
total
);
if
(
page
*
50
>
total
)
{
f
=
false
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"error {} "
,
e
);
f
=
false
;
}
ZhiWeiTools
.
sleep
(
2000
);
page
++;
}
return
bodyList
;
}
return
Collections
.
emptyList
();
}
private
static
String
getCommentUrl
(
String
url
,
Proxy
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
objectID
=
response
.
body
().
string
().
split
(
"pvTrack.object = "
)[
1
].
split
(
";"
)[
0
].
replace
(
"\""
,
""
);
return
"https://reply.autohome.com.cn/api/comments/show.json?appid=21&count=50&id="
+
objectID
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"error {} "
,
e
);
}
return
null
;
}
}
src/main/java/com/zhiwei/parse/Douyin.java
View file @
d979d793
...
@@ -25,6 +25,7 @@ public class Douyin {
...
@@ -25,6 +25,7 @@ public class Douyin {
* @param url
* @param url
* @return
* @return
*/
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getDouyinHotData
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDouyinHotData
(
String
url
,
Proxy
proxy
)
{
String
iid
=
url
.
split
(
"iid="
)[
1
].
split
(
"&"
)[
0
];
String
iid
=
url
.
split
(
"iid="
)[
1
].
split
(
"&"
)[
0
];
String
ch_id
=
url
.
split
(
"challenge/"
)[
1
].
split
(
"\\?"
)[
0
];
String
ch_id
=
url
.
split
(
"challenge/"
)[
1
].
split
(
"\\?"
)[
0
];
...
...
src/main/java/com/zhiwei/parse/Gftai.java
View file @
d979d793
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
...
src/main/java/com/zhiwei/parse/Maimai.java
View file @
d979d793
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
static
com
.
alibaba
.
fastjson
.
JSON
.
toJavaObject
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
Maimai
{
public
class
Maimai
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Maimai
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Maimai
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
MaimaiBywordAnalysis
maimaiBywordAnalysis
=
new
MaimaiBywordAnalysis
();
private
static
MaimaiBywordAnalysis
maimaiBywordAnalysis
=
new
MaimaiBywordAnalysis
();
/**
*
* @Description 实名动态
* @param key
* @param cookie
* @param time
* @param proxy
* @return
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
boolean
f
=
true
;
boolean
f
=
true
;
try
{
try
{
String
url
=
"https://maimai.cn/search/feeds?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"
;
String
url
=
"https://maimai.cn/search/feeds?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"
;
...
@@ -32,11 +54,11 @@ public class Maimai {
...
@@ -32,11 +54,11 @@ public class Maimai {
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getData
(
result
,
time
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getData
(
result
,
time
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
daList
!=
null
&&
daList
.
size
()
>
0
)
{
if
(
daList
!=
null
&&
!
daList
.
isEmpty
()
)
{
dataList
.
addAll
(
daList
);
dataList
.
addAll
(
daList
);
url
=
"https://maimai.cn/search/feeds?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"&highlight=true&sortby=time&jsononly=1"
;
url
=
"https://maimai.cn/search/feeds?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"&highlight=true&sortby=time&jsononly=1"
;
i
+=
20
;
i
+=
20
;
logger
.
info
(
"{}
==采集到的数据量=="
+
dataList
.
size
(),
key
);
logger
.
info
(
"{}
==采集到的数据量== {}"
,
dataList
.
size
(),
key
);
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
}
else
{
}
else
{
break
;
break
;
...
@@ -48,9 +70,19 @@ public class Maimai {
...
@@ -48,9 +70,19 @@ public class Maimai {
return
dataList
;
return
dataList
;
}
}
/**
*
* @Description 职言交流
* @param key
* @param cookie
* @param time
* @param proxy
* @return
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getDataByNoName
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDataByNoName
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
boolean
f
=
true
;
boolean
f
=
true
;
try
{
try
{
String
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"
;
String
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset=0&highlight=true&sortby=time&jsononly=1"
;
...
@@ -64,7 +96,7 @@ public class Maimai {
...
@@ -64,7 +96,7 @@ public class Maimai {
dataList
.
addAll
(
daList
);
dataList
.
addAll
(
daList
);
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"highlight=true&sortby=time&jsononly=1"
;
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"highlight=true&sortby=time&jsononly=1"
;
i
+=
20
;
i
+=
20
;
logger
.
info
(
"{}
==采集到的数据量=="
+
dataList
.
size
(),
key
);
logger
.
info
(
"{}
==采集到的数据量== {} "
,
dataList
.
size
(),
key
);
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
}
else
{
}
else
{
break
;
break
;
...
@@ -76,4 +108,80 @@ public class Maimai {
...
@@ -76,4 +108,80 @@ public class Maimai {
return
dataList
;
return
dataList
;
}
}
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 获取脉脉转评赞
* @param url
* @param proxy
* @return
*/
public
static
Map
<
String
,
Object
>
getMaiaiCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
result
=
result
.
split
(
"JSON.parse\\(\""
)[
1
].
split
(
"\"\\);\\</script\\>"
)[
0
];
result
=
ZhiWeiTools
.
decodeUnicode
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
data
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"gossip"
);
map
.
put
(
"like"
,
data
.
getInteger
(
"likes"
));
map
.
put
(
"spreads"
,
data
.
getInteger
(
"spreads"
));
map
.
put
(
"cmts"
,
data
.
getInteger
(
"cmts"
));
map
.
put
(
"gid"
,
data
.
getLong
(
"id"
));
map
.
put
(
"title"
,
data
.
getString
(
"text"
));
map
.
put
(
"author"
,
data
.
getString
(
"author"
));
return
map
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 脉脉 转评攒 获取失败 {}"
,
e
);
}
return
Collections
.
emptyMap
();
}
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 脉脉评论采集获取
* @param url
* @param proxy
* @return
*/
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getMaimaiCommentList
(
String
url
,
ProxyHolder
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
Object
>
mmid
=
getMaiaiCount
(
url
,
proxy
);
if
(
mmid
!=
null
)
{
String
gid
=
String
.
valueOf
(
mmid
.
get
(
"gid"
));
boolean
more
=
true
;
int
page
=
0
;
while
(
more
)
{
try
{
String
link
=
"https://maimai.cn/sdk/web/gossip/getcmts?gid="
+
gid
+
"&page="
+
page
+
"&count=50&hotcmts_limit_count=100"
;
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
link
),
proxy
).
body
().
string
();
if
(
htmlBody
!=
null
&&
htmlBody
.
length
()>
0
)
{
JSONObject
dataJson
=
JSONObject
.
parseObject
(
htmlBody
);
JSONArray
commentJson
=
dataJson
.
getJSONArray
(
"comments"
);
if
(
commentJson
!=
null
&&
!
commentJson
.
isEmpty
())
{
for
(
int
i
=
0
;
i
<
commentJson
.
size
();
i
++)
{
JSONObject
json
=
commentJson
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
dataMap
=
toJavaObject
(
json
,
Map
.
class
);
dataMap
.
put
(
"fromUrl"
,
url
);
dataMap
.
putAll
(
mmid
);
dataList
.
add
(
dataMap
);
}
page
++;
}
else
{
more
=
false
;
}
int
moreInt
=
dataJson
.
getIntValue
(
"more"
);
if
(
moreInt
==
0
)
{
more
=
false
;
}
}
}
catch
(
Exception
e
)
{
logger
.
info
(
"数据采集出错 {}"
,
e
);
}
}
return
dataList
;
}
return
Collections
.
emptyList
();
}
}
}
src/main/java/com/zhiwei/parse/PearVideo.java
View file @
d979d793
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.PearVideoByWordAnalysis
;
import
com.zhiwei.parse.analysis.PearVideoByWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -26,24 +26,31 @@ public class PearVideo {
...
@@ -26,24 +26,31 @@ public class PearVideo {
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getPearVideoData
(
String
word
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getPearVideoData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getPearVideoByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9,en;q=0.8"
);
headerMap
.
put
(
"Accept"
,
"text/html, */*; q=0.01"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
);
headerMap
.
put
(
":authority"
,
"www.pearvideo.com"
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
headerMap
.
put
(
"referer"
,
"https://www.pearvideo.com/search.jsp?start=0&k="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
));
for
(
int
i
=
0
;
i
<=
9000
;
i
+=
10
)
{
for
(
int
i
=
0
;
i
<=
9000
;
i
+=
10
)
{
String
url
=
"http
://www.pearvideo.com/search_loading.jsp?start="
+
i
+
"&k="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
;
String
url
=
"http
s://www.pearvideo.com/search_loading.jsp?start="
+
i
+
"&k="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
+
"&sort=first_publish_time"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
dataList1
=
pearVideoByWordAnalysis
.
getPearVideoData
(
result
);
List
<
Map
<
String
,
Object
>>
dataList1
=
pearVideoByWordAnalysis
.
getPearVideoData
(
result
);
if
(
dataList1
!=
null
&&
dataList1
.
size
()
>
0
)
{
if
(
dataList1
!=
null
&&
!
dataList1
.
isEmpty
()
)
{
dataList
.
addAll
(
dataList1
);
dataList
.
addAll
(
dataList1
);
}
}
System
.
out
.
println
(
i
+
"=========="
+
dataList
.
size
());
System
.
out
.
println
(
i
+
"=========="
+
dataList
.
size
());
ZhiWeiTools
.
sleep
(
4000
);
ZhiWeiTools
.
sleep
(
4000
);
}
}
return
dataList
;
return
dataList
;
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取数据出错"
,
e
.
getMessage
());
logger
.
error
(
"获取数据出错 {}"
,
e
);
e
.
printStackTrace
();
return
Collections
.
emptyList
();
return
null
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
d979d793
...
@@ -11,16 +11,21 @@ import org.slf4j.LoggerFactory;
...
@@ -11,16 +11,21 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.QQKBAccountAnalysis
;
import
com.zhiwei.parse.analysis.QQKBAccountAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
QQKB
{
public
class
QQKB
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKB
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKB
.
class
);
private
static
QQKBAccountAnalysis
qqAccountAnalysis
=
new
QQKBAccountAnalysis
();
private
static
QQKBAccountAnalysis
qqAccountAnalysis
=
new
QQKBAccountAnalysis
();
private
static
QQKBCommentAnalysis
qqkbCommentAnalysis
=
new
QQKBCommentAnalysis
();
private
static
QQKBCommentAnalysis
qqkbCommentAnalysis
=
new
QQKBCommentAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
...
@@ -113,18 +118,22 @@ public class QQKB {
...
@@ -113,18 +118,22 @@ public class QQKB {
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
int
i
=
1
;
int
i
=
1
;
while
(
true
)
{
while
(
true
)
{
try
{
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
proxy
,
headerMap
,
paramMap
);
paramMap
.
clear
();
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
ProxyFactory
.
getNatProxy
(),
headerMap
,
paramMap
);
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
null
,
comment_id
,
article_id
,
proxy
);
paramMap
.
clear
();
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
null
,
comment_id
,
article_id
,
proxy
);
break
;
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
}
dataList
.
addAll
(
lists
);
paramMap
=
qqkbCommentAnalysis
.
getParamMap
(
result
,
i
,
comment_id
,
article_id
);
i
++;
ZhiWeiTools
.
sleep
(
300
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
dataList
.
addAll
(
lists
);
paramMap
=
qqkbCommentAnalysis
.
getParamMap
(
result
,
i
,
comment_id
,
article_id
);
i
++;
ZhiWeiTools
.
sleep
(
5000
);
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/QQKandian.java
View file @
d979d793
...
@@ -13,8 +13,6 @@ import java.util.regex.Pattern;
...
@@ -13,8 +13,6 @@ import java.util.regex.Pattern;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
...
@@ -31,7 +29,6 @@ import okhttp3.Request;
...
@@ -31,7 +29,6 @@ import okhttp3.Request;
public
class
QQKandian
{
public
class
QQKandian
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKandian
.
class
);
public
List
<
QQKandianUser
>
getUser
(
String
name
,
Proxy
proxy
)
{
public
List
<
QQKandianUser
>
getUser
(
String
name
,
Proxy
proxy
)
{
if
(
name
!=
null
&&
name
.
length
()
>
0
)
{
if
(
name
!=
null
&&
name
.
length
()
>
0
)
{
...
...
src/main/java/com/zhiwei/parse/QicheHome.java
View file @
d979d793
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
...
@@ -3,7 +3,6 @@ package com.zhiwei.parse;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
...
src/main/java/com/zhiwei/parse/SinaTousu.java
View file @
d979d793
...
@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
...
@@ -5,7 +5,6 @@ import java.io.UnsupportedEncodingException;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
...
src/main/java/com/zhiwei/parse/SouBao.java
View file @
d979d793
...
@@ -68,6 +68,7 @@ public class SouBao {
...
@@ -68,6 +68,7 @@ public class SouBao {
poi
.
exportExcel
(
"D:\\crawlerdata\\搜报网-EA 品牌 关键词-06.11-06.12.xlsx"
,
"sa"
,
headList
,
bodyList
);
poi
.
exportExcel
(
"D:\\crawlerdata\\搜报网-EA 品牌 关键词-06.11-06.12.xlsx"
,
"sa"
,
headList
,
bodyList
);
}
}
@SuppressWarnings
(
"unchecked"
)
public
static
Map
<
String
,
String
>
getdata
()
{
public
static
Map
<
String
,
String
>
getdata
()
{
Map
<
String
,
String
>
map
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
map
=
new
HashMap
<
String
,
String
>();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
...
...
src/main/java/com/zhiwei/parse/Souhu.java
View file @
d979d793
...
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
...
@@ -13,6 +13,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.SouhuAccountAnalysis
;
import
com.zhiwei.parse.analysis.SouhuAccountAnalysis
;
...
@@ -144,8 +145,8 @@ public class Souhu {
...
@@ -144,8 +145,8 @@ public class Souhu {
int
j
=
1
;
int
j
=
1
;
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
)
+
"&page_no="
+
j
;
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
ProxyFactory
.
getNatProxy
()
)
+
"&page_no="
+
j
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
ProxyFactory
.
getNatProxy
()
,
headerMap
);
System
.
out
.
println
(
newurl
);
System
.
out
.
println
(
newurl
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"jsonObject"
).
getJSONArray
(
"comments"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"jsonObject"
).
getJSONArray
(
"comments"
);
...
@@ -158,7 +159,7 @@ public class Souhu {
...
@@ -158,7 +159,7 @@ public class Souhu {
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
j
++;
j
++;
ZhiWeiTools
.
sleep
(
300
0
);
ZhiWeiTools
.
sleep
(
300
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/Toutiao.java
View file @
d979d793
...
@@ -7,9 +7,6 @@ import java.util.ArrayList;
...
@@ -7,9 +7,6 @@ import java.util.ArrayList;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis
;
import
com.zhiwei.parse.analysis.ToutiaoKeyWordAnalysis
;
...
@@ -17,7 +14,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
...
@@ -17,7 +14,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
Toutiao
{
public
class
Toutiao
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Toutiao
.
class
);
private
static
ToutiaoKeyWordAnalysis
toutiaoKeyWordAnalysis
=
new
ToutiaoKeyWordAnalysis
();
private
static
ToutiaoKeyWordAnalysis
toutiaoKeyWordAnalysis
=
new
ToutiaoKeyWordAnalysis
();
...
...
src/main/java/com/zhiwei/parse/Xueqiu.java
View file @
d979d793
...
@@ -5,6 +5,8 @@ import java.io.UnsupportedEncodingException;
...
@@ -5,6 +5,8 @@ import java.io.UnsupportedEncodingException;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collection
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -12,13 +14,14 @@ import java.util.Map;
...
@@ -12,13 +14,14 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis
;
import
com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Request
;
import
okhttp3.Request
;
import
okhttp3.Response
;
public
class
Xueqiu
{
public
class
Xueqiu
{
...
@@ -60,8 +63,26 @@ public class Xueqiu {
...
@@ -60,8 +63,26 @@ public class Xueqiu {
break
;
break
;
}
}
}
}
return
bodyList
;
return
bodyList
;
}
}
public
static
Map
<
String
,
Object
>
getUrlData
(
String
url
,
Proxy
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
jsondata
=
result
.
split
(
"window.SNOWMAN_STATUS = "
)[
1
].
split
(
"window.SNOWMAN_TARGET"
)[
0
];
jsondata
=
jsondata
.
substring
(
0
,
jsondata
.
lastIndexOf
(
";"
));
JSONObject
json
=
JSONObject
.
parseObject
(
jsondata
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"like"
,
json
.
getInteger
(
"like_count"
));
map
.
put
(
"repostCount"
,
json
.
getInteger
(
"retweet_count"
));
map
.
put
(
"commentCount"
,
json
.
getInteger
(
"reply_count"
));
return
map
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 雪球 数据转评赞获取失败 exception {} url = {}"
,
e
,
url
);
}
return
Collections
.
emptyMap
();
}
}
}
src/main/java/com/zhiwei/parse/Yiche.java
View file @
d979d793
...
@@ -86,6 +86,7 @@ public class Yiche {
...
@@ -86,6 +86,7 @@ public class Yiche {
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
page
++;
page
++;
}
}
return
bodyList
;
}
}
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
...
...
src/main/java/com/zhiwei/parse/Youku.java
0 → 100644
View file @
d979d793
package
com
.
zhiwei
.
parse
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
okhttp3.Response
;
public
class
Youku
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
Youku
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getDataList
(
String
word
)
{
String
aaid
=
"9cae49f0e031664b00d8f9c108e586ab"
;
List
<
Map
<
String
,
Object
>>
list
=
new
ArrayList
<>();
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
String
url
=
"https://so.youku.com/search_video/q_"
+
URLCodeUtil
.
getURLEncode
(
word
,
"UTF-8"
)+
"?spm=a2h0k.11417342.filter.dnew&orderfield=createtime&aaid="
+
aaid
+
"&pg="
+
i
;
System
.
out
.
println
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
ProxyFactory
.
getNatProxy
())){
String
result
=
response
.
body
().
string
();
String
jsondata
=
result
.
split
(
"bigview.view\\("
)[
1
].
split
(
"\\)\\</script\\>"
)[
0
];
JSONObject
json
=
JSONObject
.
parseObject
(
jsondata
);
String
docData
=
json
.
getString
(
"html"
);
Document
doc
=
Jsoup
.
parse
(
docData
);
Elements
elements
=
doc
.
select
(
"div.sk-result-list"
).
select
(
"div.sk-mod"
);
for
(
Element
element
:
elements
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
title
=
element
.
select
(
"div.mod-main > div.mod-header > h2 > a"
).
text
();
String
surl
=
element
.
select
(
"div.mod-main > div.mod-header > h2 > a"
).
attr
(
"href"
);
String
time
=
element
.
select
(
"div.mod-main > div.mod-info > p"
).
text
();
if
(
time
.
contains
(
"上传时间:"
))
{
map
.
put
(
"title"
,
title
);
map
.
put
(
"url"
,
"https:"
+
surl
);
map
.
put
(
"time"
,
time
.
replaceAll
(
"上传时间:"
,
""
).
split
(
" "
)[
0
]);
map
.
put
(
"uper"
,
time
.
replace
(
time
.
split
(
"上传者:"
)[
0
],
""
));
list
.
add
(
map
);
}
}
logger
.
info
(
" i = {} dataSize = {} "
,
i
,
list
.
size
());
}
catch
(
Exception
e
)
{
logger
.
error
(
" Exception {} "
,
e
);
}
}
return
list
;
}
}
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
View file @
d979d793
...
@@ -13,13 +13,19 @@ import org.jsoup.select.Elements;
...
@@ -13,13 +13,19 @@ import org.jsoup.select.Elements;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
AiqiyiByWordAnalysis
{
public
class
AiqiyiByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
* @Description 解析出所有有用链接
* @Description 解析出所有有用链接
...
@@ -45,9 +51,9 @@ public class AiqiyiByWordAnalysis {
...
@@ -45,9 +51,9 @@ public class AiqiyiByWordAnalysis {
}
}
public
Map
<
String
,
Object
>
getAiqiyiData
(
String
url
,
Map
<
String
,
String
>
headerMap
,
Proxy
proxy
)
{
public
Map
<
String
,
Object
>
getAiqiyiData
(
String
url
,
Map
<
String
,
String
>
headerMap
,
Proxy
proxy
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
response
.
body
().
string
(
);
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
String
time
=
doc
.
select
(
"#widget-vshort-ptime"
).
text
();
String
time
=
doc
.
select
(
"#widget-vshort-ptime"
).
text
();
if
(!
time
.
contains
(
"2017"
))
{
if
(!
time
.
contains
(
"2017"
))
{
...
@@ -68,7 +74,7 @@ public class AiqiyiByWordAnalysis {
...
@@ -68,7 +74,7 @@ public class AiqiyiByWordAnalysis {
System
.
out
.
println
(
dataMap
.
toString
());
System
.
out
.
println
(
dataMap
.
toString
());
return
dataMap
;
return
dataMap
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析出错
{}"
,
e
);
return
dataMap
;
return
dataMap
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
View file @
d979d793
...
@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
...
@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
BaijiaAccountAnalysis
{
public
class
BaijiaAccountAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaijiaAccountAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
BaijiaAccountAnalysis
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
Map
<
String
,
Object
>
getBaijiaAccount2Data
(
JSONObject
data
)
{
public
Map
<
String
,
Object
>
getBaijiaAccount2Data
(
JSONObject
data
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
...
@@ -159,8 +163,8 @@ public class BaijiaAccountAnalysis {
...
@@ -159,8 +163,8 @@ public class BaijiaAccountAnalysis {
public
String
getBaijiaContent
(
String
url
,
Proxy
proxy
)
{
public
String
getBaijiaContent
(
String
url
,
Proxy
proxy
)
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
response
.
body
().
string
(
);
Document
document
=
Jsoup
.
parse
(
result
);
Document
document
=
Jsoup
.
parse
(
result
);
return
document
.
select
(
"section.news-content"
).
text
();
return
document
.
select
(
"section.news-content"
).
text
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
View file @
d979d793
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
...
@@ -14,13 +14,17 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
DayuByWordAnalysis
{
public
class
DayuByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuByWordAnalysis
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
result
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
result
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
...
@@ -28,7 +32,7 @@ public class DayuByWordAnalysis {
...
@@ -28,7 +32,7 @@ public class DayuByWordAnalysis {
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"iflowItems"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"iflowItems"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
map
.
put
(
"title"
,
data
.
getString
(
"title"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
).
replaceAll
(
"<.*?>"
,
""
));
String
url
=
data
.
getString
(
"zzd_url"
);
String
url
=
data
.
getString
(
"zzd_url"
);
...
@@ -42,7 +46,7 @@ public class DayuByWordAnalysis {
...
@@ -42,7 +46,7 @@ public class DayuByWordAnalysis {
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析出错
{}"
,
e
);
return
dataList
;
return
dataList
;
}
}
...
@@ -51,22 +55,19 @@ public class DayuByWordAnalysis {
...
@@ -51,22 +55,19 @@ public class DayuByWordAnalysis {
public
String
getContent
(
String
url
,
Proxy
proxy
)
{
public
String
getContent
(
String
url
,
Proxy
proxy
)
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
Pattern
pat
=
Pattern
.
compile
(
"xissJsonData = (.*);"
);
String
result
=
response
.
body
().
string
();
Matcher
matcher
=
pat
.
matcher
(
result
);
Pattern
pat
=
Pattern
.
compile
(
"xissJsonData = (.*);"
);
try
{
Matcher
matcher
=
pat
.
matcher
(
result
);
if
(
matcher
.
find
())
{
if
(
matcher
.
find
())
{
String
s
=
matcher
.
group
(
0
);
String
s
=
matcher
.
group
(
0
);
JSONObject
json
=
JSONObject
.
parseObject
(
s
.
substring
(
15
,
s
.
length
()
-
1
));
JSONObject
json
=
JSONObject
.
parseObject
(
s
.
substring
(
15
,
s
.
length
()
-
1
));
String
content
=
json
.
getString
(
"content"
).
replaceAll
(
"<.*?>"
,
""
);
return
json
.
getString
(
"content"
).
replaceAll
(
"<.*?>"
,
""
);
return
content
;
}
}
return
null
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析文本出错"
,
e
.
getMessage
());
e
.
printStackTrace
();
System
.
out
.
println
(
result
);
return
null
;
}
}
return
null
;
}
}
}
}
src/main/java/com/zhiwei/parse/analysis/DayuCommentAnalysis.java
View file @
d979d793
...
@@ -10,12 +10,8 @@ import java.util.Map;
...
@@ -10,12 +10,8 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
DayuCommentAnalysis
{
public
class
DayuCommentAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuCommentAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuCommentAnalysis
.
class
);
...
@@ -33,7 +29,7 @@ public class DayuCommentAnalysis {
...
@@ -33,7 +29,7 @@ public class DayuCommentAnalysis {
JSONObject
json
=
JSONObject
.
parseObject
(
result
).
getJSONObject
(
"data"
).
getJSONObject
(
"comments_map"
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
).
getJSONObject
(
"data"
).
getJSONObject
(
"comments_map"
);
Map
<
String
,
Object
>
map
=
(
Map
<
String
,
Object
>)
json
;
Map
<
String
,
Object
>
map
=
(
Map
<
String
,
Object
>)
json
;
for
(
Map
.
Entry
<
String
,
Object
>
entry
:
map
.
entrySet
()
)
{
for
(
Map
.
Entry
<
String
,
Object
>
entry
:
map
.
entrySet
()
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<>();
JSONObject
data
=
JSONObject
.
parseObject
(
entry
.
getValue
().
toString
());
JSONObject
data
=
JSONObject
.
parseObject
(
entry
.
getValue
().
toString
());
dataMap
.
put
(
"content"
,
data
.
getString
(
"content"
));
dataMap
.
put
(
"content"
,
data
.
getString
(
"content"
));
dataMap
.
put
(
"nickname"
,
data
.
getJSONObject
(
"user"
).
getString
(
"nickname"
));
dataMap
.
put
(
"nickname"
,
data
.
getJSONObject
(
"user"
).
getString
(
"nickname"
));
...
@@ -45,78 +41,78 @@ public class DayuCommentAnalysis {
...
@@ -45,78 +41,78 @@ public class DayuCommentAnalysis {
dataMap
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
time
),
"yyyy-MM-dd HH:mm:ss"
));
dataMap
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
time
),
"yyyy-MM-dd HH:mm:ss"
));
int
i
=
data
.
getInteger
(
"reply_cnt"
);
int
i
=
data
.
getInteger
(
"reply_cnt"
);
dataMap
.
put
(
"replay_count"
,
i
);
dataMap
.
put
(
"replay_count"
,
i
);
if
(
i
>
0
)
{
//
if(i > 0) {
dataList
.
addAll
(
getReplayData
(
id
,
articleId
,
proxy
));
//
dataList.addAll(getReplayData(id,articleId,proxy));
}
//
}
dataList
.
add
(
dataMap
);
dataList
.
add
(
dataMap
);
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析出错
{}"
,
e
);
return
dataList
;
return
dataList
;
}
}
}
}
/**
//
/**
*
//
*
* @Description 解析
//
* @Description 解析
* @param id
//
* @param id
* @param articleId
//
* @param articleId
* @return
//
* @return
*/
//
*/
private
List
<
Map
<
String
,
Object
>>
getReplayData
(
String
id
,
String
articleId
,
Proxy
proxy
)
{
//
private List<Map<String,Object>> getReplayData(String id,String articleId,Proxy proxy) {
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
//
Map<String,String> headerMap = HeadGet.getDayuCommentHeaderMap(null);
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/detail/"
+
id
+
"/comments?articleId="
+
articleId
+
"&count=10&ts="
;
//
String url = "http://m.uczzd.cn/iflow/api/v2/cmt/detail/"+id+"/comments?articleId="+articleId+"&count=10&ts=";
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"-1"
,
proxy
,
headerMap
);
//
String result = HttpClient.executeHttpRequestGet(url+"-1",proxy, headerMap);
List
<
Map
<
String
,
Object
>>
data
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> data = new ArrayList<Map<String,Object>>();
List
<
String
>
timeList
=
new
ArrayList
<
String
>();
//
List<String> timeList = new ArrayList<String>();
while
(
true
)
{
//
while(true) {
ZhiWeiTools
.
sleep
(
2000
);
//
ZhiWeiTools.sleep(2000);
long
time
=
analysisReplayData
(
result
,
data
);
//
long time = analysisReplayData(result,data);
if
(
timeList
.
contains
(
String
.
valueOf
(
time
))){
//
if(timeList.contains(String.valueOf(time))){
break
;
//
break;
}
//
}
timeList
.
add
(
String
.
valueOf
(
time
));
//
timeList.add(String.valueOf(time));
if
(
time
==
0
)
{
//
if(time == 0) {
break
;
//
break;
}
//
}
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
time
,
proxy
,
headerMap
);
//
result = HttpClient.executeHttpRequestGet(url+time,proxy, headerMap);
}
//
}
System
.
out
.
println
(
"=====================评论下回复获取数=="
+
data
.
size
());
//
System.out.println("=====================评论下回复获取数=="+data.size());
return
data
;
//
return data;
}
//
}
/**
//
/**
*
//
*
* @Description 解析
//
* @Description 解析
* @param result
//
* @param result
* @param dataList
//
* @param dataList
* @return
//
* @return
*/
//
*/
private
long
analysisReplayData
(
String
result
,
List
<
Map
<
String
,
Object
>>
dataList
)
{
//
private long analysisReplayData(String result,List<Map<String,Object>> dataList) {
long
time
=
0
;
//
long time = 0;
try
{
//
try {
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
//
JSONObject json = JSONObject.parseObject(result);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"replies"
);
//
JSONArray jsonArry = json.getJSONObject("data").getJSONArray("replies");
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
//
for(int i = 0; i < jsonArry.size();i++) {
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
//
Map<String,Object> map = new HashMap<String, Object>();
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
//
JSONObject data = jsonArry.getJSONObject(i);
map
.
put
(
"content"
,
data
.
getString
(
"content"
));
//
map.put("content", data.getString("content"));
map
.
put
(
"nickname"
,
data
.
getString
(
"nickname"
));
//
map.put("nickname", data.getString("nickname"));
map
.
put
(
"like"
,
data
.
getString
(
"up_cnt"
));
//
map.put("like", data.getString("up_cnt"));
map
.
put
(
"id"
,
data
.
getString
(
"commentId"
));
//
map.put("id", data.getString("commentId"));
map
.
put
(
"url"
,
data
.
getString
(
"shareUrl"
));
//
map.put("url", data.getString("shareUrl"));
time
=
data
.
getLong
(
"timeShow"
);
//
time = data.getLong("timeShow");
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
time
),
"yyyy-MM-dd HH:mm:ss"
));
//
map.put("time", TimeParse.dateFormartString(new Date(time), "yyyy-MM-dd HH:mm:ss"));
map
.
put
(
"replay_count"
,
data
.
getInteger
(
"replyCnt"
));
//
map.put("replay_count", data.getInteger("replyCnt"));
dataList
.
add
(
map
);
//
dataList.add(map);
}
//
}
return
time
;
//
return time;
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
logger
.
error
(
"获取大鱼号评论出错--回复的"
,
e
.
getMessage
());
//
logger.error("获取大鱼号评论出错--回复的",e.getMessage());
return
0
;
//
return 0;
}
//
}
}
//
}
...
...
src/main/java/com/zhiwei/parse/analysis/DoubanCommentAnalysis.java
View file @
d979d793
...
@@ -10,8 +10,6 @@ import java.util.Map;
...
@@ -10,8 +10,6 @@ import java.util.Map;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
import
javax.swing.plaf.synth.SynthSpinnerUI
;
import
org.apache.commons.lang3.math.NumberUtils
;
import
org.apache.commons.lang3.math.NumberUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
...
...
src/main/java/com/zhiwei/parse/analysis/DouyinHotDataAnalysis.java
View file @
d979d793
...
@@ -6,16 +6,12 @@ import java.util.HashMap;
...
@@ -6,16 +6,12 @@ import java.util.HashMap;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
DouyinHotDataAnalysis
{
public
class
DouyinHotDataAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DouyinHotDataAnalysis
.
class
);
public
Map
<
String
,
Object
>
getData
(
String
result
)
{
public
Map
<
String
,
Object
>
getData
(
String
result
)
{
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
View file @
d979d793
...
@@ -11,12 +11,17 @@ import org.slf4j.LoggerFactory;
...
@@ -11,12 +11,17 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
FenghuangAccountAnalysis
{
public
class
FenghuangAccountAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangAccountAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangAccountAnalysis
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
...
@@ -31,8 +36,8 @@ public class FenghuangAccountAnalysis {
...
@@ -31,8 +36,8 @@ public class FenghuangAccountAnalysis {
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangAccountHeaderMap
(
null
);
JSONArray
jsonArry
=
null
;
JSONArray
jsonArry
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
response
.
body
().
string
(
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"feeds"
).
getJSONArray
(
"list"
);
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"feeds"
).
getJSONArray
(
"list"
);
if
(
jsonArry
==
null
||
jsonArry
.
size
()
<
1
)
{
if
(
jsonArry
==
null
||
jsonArry
.
size
()
<
1
)
{
...
@@ -83,7 +88,7 @@ public class FenghuangAccountAnalysis {
...
@@ -83,7 +88,7 @@ public class FenghuangAccountAnalysis {
map
.
put
(
"url"
,
json
.
getString
(
"shareurl"
));
map
.
put
(
"url"
,
json
.
getString
(
"shareurl"
));
map
.
put
(
"id"
,
json
.
getString
(
"aid"
));
map
.
put
(
"id"
,
json
.
getString
(
"aid"
));
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析具体文章的时候出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析具体文章的时候出错
{}"
,
e
);
return
null
;
return
null
;
}
}
return
map
;
return
map
;
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
View file @
d979d793
...
@@ -13,17 +13,22 @@ import org.slf4j.LoggerFactory;
...
@@ -13,17 +13,22 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
okhttp3.Response
;
public
class
FenghuangCommentAnalysis
{
public
class
FenghuangCommentAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangCommentAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangCommentAnalysis
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
String
result
=
response
.
body
().
string
(
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
map
.
put
(
"real_count"
,
json
.
getInteger
(
"count"
));
map
.
put
(
"real_count"
,
json
.
getInteger
(
"count"
));
map
.
put
(
"comment_num"
,
json
.
getInteger
(
"join_count"
));
map
.
put
(
"comment_num"
,
json
.
getInteger
(
"join_count"
));
...
@@ -44,8 +49,8 @@ public class FenghuangCommentAnalysis {
...
@@ -44,8 +49,8 @@ public class FenghuangCommentAnalysis {
public
String
getdocUrl
(
String
url
,
Proxy
proxy
)
{
public
String
getdocUrl
(
String
url
,
Proxy
proxy
)
{
String
docUrl
=
null
;
String
docUrl
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
String
result
=
response
.
body
().
string
(
);
if
(
result
.
contains
(
"commentUrl\": \""
))
{
if
(
result
.
contains
(
"commentUrl\": \""
))
{
docUrl
=
result
.
split
(
"commentUrl\": \""
)[
1
].
split
(
"\","
)[
0
];
docUrl
=
result
.
split
(
"commentUrl\": \""
)[
1
].
split
(
"\","
)[
0
];
break
;
break
;
...
@@ -76,24 +81,18 @@ public class FenghuangCommentAnalysis {
...
@@ -76,24 +81,18 @@ public class FenghuangCommentAnalysis {
*/
*/
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
String
result
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
try
{
String
result
=
response
.
body
().
string
();
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
}
catch
(
Exception
e
)
{
JSONArray
jsonArry
=
json
.
getJSONArray
(
"data"
);
logger
.
error
(
"链接获取信息失败"
,
e
.
getMessage
());
return
null
;
}
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"data"
);
try
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
Map
<
String
,
Object
>
map
=
getcommentData
(
jsonArry
.
getJSONObject
(
i
));
Map
<
String
,
Object
>
map
=
getcommentData
(
jsonArry
.
getJSONObject
(
i
));
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"
获取信息出错"
,
e
.
getMessage
()
);
logger
.
error
(
"
链接获取信息失败"
,
e
);
return
null
;
return
Collections
.
emptyList
()
;
}
}
return
dataList
;
return
dataList
;
...
@@ -109,22 +108,16 @@ public class FenghuangCommentAnalysis {
...
@@ -109,22 +108,16 @@ public class FenghuangCommentAnalysis {
*/
*/
public
List
<
Map
<
String
,
Object
>>
getData2
(
String
url
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getData2
(
String
url
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
String
result
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
{
String
result
=
response
.
body
().
string
();
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
}
catch
(
Exception
e
)
{
JSONArray
jsonArry
=
json
.
getJSONArray
(
"comments"
);
logger
.
error
(
"链接获取信息失败 {}"
,
e
);
return
Collections
.
emptyList
();
}
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"comments"
);
try
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
Map
<
String
,
Object
>
map
=
getcommentData2
(
jsonArry
.
getJSONObject
(
i
));
Map
<
String
,
Object
>
map
=
getcommentData2
(
jsonArry
.
getJSONObject
(
i
));
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"
获取信息出错
{}"
,
e
);
logger
.
error
(
"
链接获取信息失败
{}"
,
e
);
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
return
dataList
;
return
dataList
;
...
@@ -154,7 +147,7 @@ public class FenghuangCommentAnalysis {
...
@@ -154,7 +147,7 @@ public class FenghuangCommentAnalysis {
* @return
* @return
*/
*/
private
Map
<
String
,
Object
>
getcommentData
(
JSONObject
json
)
{
private
Map
<
String
,
Object
>
getcommentData
(
JSONObject
json
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
try
{
try
{
JSONObject
data
=
json
.
getJSONObject
(
"data"
);
JSONObject
data
=
json
.
getJSONObject
(
"data"
);
map
.
put
(
"nickname"
,
json
.
getString
(
"nickname"
));
map
.
put
(
"nickname"
,
json
.
getString
(
"nickname"
));
...
@@ -169,7 +162,7 @@ public class FenghuangCommentAnalysis {
...
@@ -169,7 +162,7 @@ public class FenghuangCommentAnalysis {
long
time
=
data
.
getLong
(
"add_time"
)
*
1000
;
long
time
=
data
.
getLong
(
"add_time"
)
*
1000
;
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
time
),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
time
),
"yyyy-MM-dd HH:mm:ss"
));
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"具体解析一条数据出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"具体解析一条数据出错
{}"
,
e
);
return
null
;
return
null
;
}
}
return
map
;
return
map
;
...
...
src/main/java/com/zhiwei/parse/analysis/MaimaiBywordAnalysis.java
View file @
d979d793
...
@@ -5,15 +5,11 @@ import java.util.HashMap;
...
@@ -5,15 +5,11 @@ import java.util.HashMap;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
public
class
MaimaiBywordAnalysis
{
public
class
MaimaiBywordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
MaimaiBywordAnalysis
.
class
);
public
Map
<
String
,
Object
>
getData
(
String
result
,
String
time
)
{
public
Map
<
String
,
Object
>
getData
(
String
result
,
String
time
)
{
Map
<
String
,
Object
>
map1
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map1
=
new
HashMap
<
String
,
Object
>();
...
@@ -38,7 +34,7 @@ public class MaimaiBywordAnalysis {
...
@@ -38,7 +34,7 @@ public class MaimaiBywordAnalysis {
map
.
put
(
"like"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"likes"
));
map
.
put
(
"like"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"likes"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"spreads"
));
//传播数
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"feed"
).
getInteger
(
"spreads"
));
//传播数
System
.
out
.
println
(
map
.
toString
());
//
System.out.println(map.toString());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
map1
.
put
(
"data"
,
dataList
);
map1
.
put
(
"data"
,
dataList
);
...
@@ -69,7 +65,7 @@ public class MaimaiBywordAnalysis {
...
@@ -69,7 +65,7 @@ public class MaimaiBywordAnalysis {
map
.
put
(
"like"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"likes"
));
map
.
put
(
"like"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"likes"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"comment_count"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"total_cnt"
));
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"search_order"
));
//传播数
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"gossip"
).
getInteger
(
"search_order"
));
//传播数
System
.
out
.
println
(
map
.
toString
());
//
System.out.println(map.toString());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
map1
.
put
(
"data"
,
dataList
);
map1
.
put
(
"data"
,
dataList
);
...
...
src/main/java/com/zhiwei/parse/analysis/MeipaiByWordAnalysis.java
View file @
d979d793
...
@@ -19,7 +19,6 @@ import com.zhiwei.util.TimeUtil;
...
@@ -19,7 +19,6 @@ import com.zhiwei.util.TimeUtil;
public
class
MeipaiByWordAnalysis
{
public
class
MeipaiByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
MeipaiByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
MeipaiByWordAnalysis
.
class
);
/**
/**
*
*
* @Description 解析此页
* @Description 解析此页
...
...
src/main/java/com/zhiwei/parse/analysis/QQKBCommentAnalysis.java
View file @
d979d793
...
@@ -19,7 +19,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
...
@@ -19,7 +19,6 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
QQKBCommentAnalysis
{
public
class
QQKBCommentAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKBCommentAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKBCommentAnalysis
.
class
);
/**
/**
*
*
...
@@ -37,10 +36,9 @@ public class QQKBCommentAnalysis {
...
@@ -37,10 +36,9 @@ public class QQKBCommentAnalysis {
JSONObject
data
=
jsonArry
.
getJSONArray
(
jsonArry
.
size
()-
1
).
getJSONObject
(
0
);
JSONObject
data
=
jsonArry
.
getJSONArray
(
jsonArry
.
size
()-
1
).
getJSONObject
(
0
);
String
coral_scorem
=
data
.
getString
(
"coral_score"
);
String
coral_scorem
=
data
.
getString
(
"coral_score"
);
String
reply_id
=
data
.
getString
(
"reply_id"
);
String
reply_id
=
data
.
getString
(
"reply_id"
);
Map
<
String
,
Object
>
paMap
=
HeadGet
.
getQQKBCommentParamMap2
(
comment_id
,
page
,
coral_scorem
,
article_id
,
reply_id
);
return
HeadGet
.
getQQKBCommentParamMap2
(
comment_id
,
page
,
coral_scorem
,
article_id
,
reply_id
);
return
paMap
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"构造post请求信息失败
"
,
e
.
getMessage
()
);
logger
.
error
(
"构造post请求信息失败
{}"
,
e
);
return
null
;
return
null
;
}
}
}
}
...
@@ -52,13 +50,13 @@ public class QQKBCommentAnalysis {
...
@@ -52,13 +50,13 @@ public class QQKBCommentAnalysis {
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getCommentData
(
String
result
,
String
cookie
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getCommentData
(
String
result
,
String
cookie
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"comments"
).
getJSONArray
(
"hot"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"comments"
).
getJSONArray
(
"hot"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
()
;
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
()
;
i
++)
{
JSONObject
data
=
jsonArry
.
getJSONArray
(
i
).
getJSONObject
(
0
);
JSONObject
data
=
jsonArry
.
getJSONArray
(
i
).
getJSONObject
(
0
);
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"content"
,
data
.
getString
(
"reply_content"
));
map
.
put
(
"content"
,
data
.
getString
(
"reply_content"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"tipstime"
))
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"tipstime"
))
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"name"
,
data
.
getString
(
"nick"
));
map
.
put
(
"name"
,
data
.
getString
(
"nick"
));
...
@@ -66,12 +64,11 @@ public class QQKBCommentAnalysis {
...
@@ -66,12 +64,11 @@ public class QQKBCommentAnalysis {
int
replay_num
=
0
;
int
replay_num
=
0
;
String
reply_id
=
data
.
getString
(
"reply_id"
);
String
reply_id
=
data
.
getString
(
"reply_id"
);
if
(
data
.
toString
().
contains
(
"reply_num"
))
{
if
(
data
.
toString
().
contains
(
"reply_num"
))
{
replay_num
=
data
.
getInteger
(
"reply_num"
);
// replay_num = data.getInteger("reply_num");
List
<
Map
<
String
,
Object
>>
lists
=
getReplyCommentData
(
cookie
,
reply_id
,
comment_id
,
article_id
,
proxy
);
// List<Map<String,Object>> lists = getReplyCommentData(cookie,reply_id,comment_id, article_id,proxy);
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
// if(lists != null && lists.size() > 0) {
dataList
.
addAll
(
lists
);
// dataList.addAll(lists);
}
// }
map
.
put
(
"reply_num"
,
replay_num
);
}
}
map
.
put
(
"reply_id"
,
reply_id
);
map
.
put
(
"reply_id"
,
reply_id
);
map
.
put
(
"reply_num"
,
replay_num
);
map
.
put
(
"reply_num"
,
replay_num
);
...
@@ -80,7 +77,7 @@ public class QQKBCommentAnalysis {
...
@@ -80,7 +77,7 @@ public class QQKBCommentAnalysis {
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析数据出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析数据出错
{}"
,
e
);
return
dataList
;
return
dataList
;
}
}
...
@@ -93,7 +90,7 @@ public class QQKBCommentAnalysis {
...
@@ -93,7 +90,7 @@ public class QQKBCommentAnalysis {
* @return
* @return
*/
*/
public
Map
<
String
,
Object
>
getOneReplyComment
(
JSONObject
data
)
{
public
Map
<
String
,
Object
>
getOneReplyComment
(
JSONObject
data
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
try
{
try
{
map
.
put
(
"content"
,
data
.
getString
(
"reply_content"
));
map
.
put
(
"content"
,
data
.
getString
(
"reply_content"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"tipstime"
))
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
Long
.
valueOf
(
data
.
getString
(
"tipstime"
))
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
));
...
@@ -103,13 +100,13 @@ public class QQKBCommentAnalysis {
...
@@ -103,13 +100,13 @@ public class QQKBCommentAnalysis {
System
.
out
.
println
(
map
.
toString
());
System
.
out
.
println
(
map
.
toString
());
return
map
;
return
map
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取单个回复评论出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"获取单个回复评论出错
{}"
,
e
);
return
null
;
return
null
;
}
}
}
}
public
List
<
Map
<
String
,
Object
>>
getReplyCommentData
(
String
cookie
,
String
reply_id
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
public
List
<
Map
<
String
,
Object
>>
getReplyCommentData
(
String
cookie
,
String
reply_id
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
try
{
String
old_reply_id
=
""
;
String
old_reply_id
=
""
;
...
...
src/main/java/com/zhiwei/parse/analysis/QicheHomeKwyWordAnalysis.java
View file @
d979d793
...
@@ -6,16 +6,12 @@ import java.util.HashMap;
...
@@ -6,16 +6,12 @@ import java.util.HashMap;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
QicheHomeKwyWordAnalysis
{
public
class
QicheHomeKwyWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QicheHomeKwyWordAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
)
{
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
)
{
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/analysis/WangyiHistoryAnalysis.java
View file @
d979d793
...
@@ -21,15 +21,16 @@ public class WangyiHistoryAnalysis {
...
@@ -21,15 +21,16 @@ public class WangyiHistoryAnalysis {
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WangyiHistoryAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WangyiHistoryAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
,
Proxy
proxy
,
String
endTime
,
String
source
)
{
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
,
Proxy
proxy
,
String
endTime
,
String
source
)
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"list"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"list"
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiHistoryHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiHistoryHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
try
{
try
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
if
(
endTime
!=
null
&&
endTime
.
length
()
>
1
)
{
if
(
endTime
!=
null
&&
endTime
.
length
()
>
1
)
{
if
(
data
.
getString
(
"ptime"
).
compareTo
(
endTime
)
<=
0
)
{
if
(
data
.
getString
(
"ptime"
).
compareTo
(
endTime
)
<=
0
)
{
...
...
src/main/resources/log4j.properties
View file @
d979d793
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
log4j.appender.stdout.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
log4j.appender.stdout.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE
=
org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE
=
org.apache.log4j.DailyRollingFileAppender
log4j.appender.ROLLING_FILE.Threshold
=
stdout
log4j.appender.ROLLING_FILE.Threshold
=
stdout
log4j.appender.ROLLING_FILE.File
=
./Log/
wechatcrawler
.log
log4j.appender.ROLLING_FILE.File
=
./Log/
artivleData
.log
log4j.appender.ROLLING_FILE.Append
=
true
log4j.appender.ROLLING_FILE.Append
=
true
log4j.appender.ROLLING_FILE.layout
=
org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout
=
org.apache.log4j.PatternLayout
log4j.appender.ROLLING_FILE.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
log4j.appender.ROLLING_FILE.layout.ConversionPattern
=
<%d>[%5p] %c - %m%n
\ No newline at end of file
src/test/java/com/zhiwei/Comment/ChejiaCommentCountTest.java
0 → 100644
View file @
d979d793
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Chejia;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class ChejiaCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("D://crawlerdata//自媒体/车家号.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + "";
//// url = "https://chejiahao.autohome.com.cn/info/3073188#reply";
// System.out.println(url);
// Chejia.getChejiaComment(url, ProxyFactory.getNatProxy());
//// int i = Chejia.getChejiaCommentCount(url, ProxyFactory.getNatProxy());
//// System.out.println(i);
//// map1.put("count", i);
// ZhiWeiTools.sleep(100);
// }
// headList.add("count");
// poi.exportExcel("D://crawlerdata//自媒体/车家号.xlsx", "评论采集", headList,
// list);
//
// }
//}
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
0 → 100644
View file @
d979d793
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Maimai;
//import com.zhiwei.parse.Yiche;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class MaimaiCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// String url = map1.get("地址") + "";
// Map<String,Object> map3 = Maimai.getMaiaiCount(url, ProxyFactory.getNatProxy());
// map1.putAll(map3);
// ZhiWeiTools.sleep(100);
// }
// headList.add("like");
// headList.add("spreads");
// headList.add("cmts");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉#美团 裁员#汇总截至12月20日10点30分.xlsx(1).xlsx", "评论采集", headList,
// list);
// }
//}
src/test/java/com/zhiwei/Comment/XueqiuCommentCountTest.java
0 → 100644
View file @
d979d793
//package com.zhiwei.Comment;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//import java.util.Objects;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Xueqiu;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//public class XueqiuCommentCountTest {
// @Test
// public void f() {
//
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
// Map<String, Object> map = poi
// .importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\雪球-腾讯.xlsx", 0);
// List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
// List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
// List<String> headList = (List<String>) map.get("head");
// for (Map<String, Object> map1 : list) {
// for(int i = 1;i < 5;i++) {
// String url = map1.get("地址") + "";
// Map<String,Object> map3 = Xueqiu.getUrlData(url, ProxyFactory.getNatProxy());
// ZhiWeiTools.sleep(100);
// if(Objects.nonNull(map3)) {
// System.out.println(map3.toString());
// map1.putAll(map3);
// break;
// }
// }
// }
// headList.add("like");
// headList.add("repostCount");
// headList.add("commentCount");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\雪球-腾讯.xlsx", "评论数采集", headList,
// list);
//
// }
//}
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
View file @
d979d793
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Aiqiyi
;
//
import com.zhiwei.parse.Aiqiyi;
//
public
class
AiqiyiByWordExample
{
//
public class AiqiyiByWordExample {
//
//
@Test
//
@Test
public
void
aiqiyiByWordTest
()
{
//
public void aiqiyiByWordTest() {
String
word
=
"美食,味道,菜"
;
//
String word = "美食,味道,菜";
String
[]
words
=
word
.
split
(
","
);
//
String[] words = word.split(",");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
String
w
:
words
)
{
//
for(String w : words) {
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
,
null
);
//
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
//
if(dataList != null && dataList.size() >= 1) {
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
}
//
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"count"
);
//
headList.add("count");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"title"
);
//
headList.add("title");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata/爱奇艺关键词采集.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList);
//
//
//
}
//
}
//
//
//
}
//
}
src/test/java/com/zhiwei/crawler/BaijiaAccountExample.java
View file @
d979d793
package
com
.
zhiwei
.
crawler
;
//package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//import java.util.ArrayList;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
org.junit.Test
;
//import org.junit.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Baijia
;
//import com.zhiwei.parse.Baijia;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
public
class
BaijiaAccountExample
{
//public class BaijiaAccountExample {
//
//// @Test
// public void baijiaAccountTest() {
// String app_id = "1536766276004443";
// String startTime = "2015-01-01 00:00:00";
// //2017-11-30 17:48:17
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountData(app_id,startTime,null);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// List<String> headList = new ArrayList<String>();
// headList.add("title");
// headList.add("time");
// headList.add("read_amount");
// headList.add("app_id");
// headList.add("source");
// headList.add("url");
// headList.add("content");
// poi.exportExcel("D://crawlerdata/百家号-马继华.xlsx", "马继华", headList, lists);
// }
//
//// @Test
// public void baijiaAccount2Test() {
// String app_id = "b_1548519002063358";
// String startTime = "2018-01-01 00:00:00";
// //2017-11-30 17:48:17
// List<String> idList = new ArrayList<>();
// idList.add("b_1548519002063358");
// idList.add("b_1536766292852334");
// idList.add("b_1536766781763274");
// idList.add("b_1536766200338498");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String id : idList) {
// ZhiWeiTools.sleep(5000);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccount2Data(id,startTime,null);
// bodyList.addAll(lists);
// }
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// List<String> headList = new ArrayList<String>();
// headList.add("title");
// headList.add("time");
// headList.add("source");
// headList.add("url");
// headList.add("content");
// poi.exportExcel("D://crawlerdata//自媒体/百家号-all.xlsx", "科学的fan", headList, bodyList);
// }
//
// @Test
// @Test
public
void
baijiaAccountTest
()
{
// public void test3() {
String
app_id
=
"1536766276004443"
;
// String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
String
startTime
=
"2015-01-01 00:00:00"
;
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
//2017-11-30 17:48:17
// String startTime = "2018-05-01 00:00:00";
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccountData
(
app_id
,
startTime
,
null
);
// Map<String,Object> map = poi.importExcel(path, 0);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// List<Map<String,Object>> list = (List<Map<String, Object>>) map.get("body");
List
<
String
>
headList
=
new
ArrayList
<
String
>();
// List<Map<String,Object>> bodyList = new ArrayList<>();
headList
.
add
(
"title"
);
// for(Map<String,Object> m : list) {
headList
.
add
(
"time"
);
// try {
headList
.
add
(
"read_amount"
);
// String app_id = m.get("id").toString();
headList
.
add
(
"app_id"
);
// app_id = "1594158489045754";
headList
.
add
(
"source"
);
// String name = m.get("name").toString();
headList
.
add
(
"url"
);
// String cookie = "__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5";
headList
.
add
(
"content"
);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,name, startTime,cookie, null);
poi
.
exportExcel
(
"D://crawlerdata/百家号-马继华.xlsx"
,
"马继华"
,
headList
,
lists
);
// if(lists != null) {
}
// bodyList.addAll(lists);
// }
// @Test
// } catch (Exception e) {
public
void
baijiaAccount2Test
()
{
// }
String
app_id
=
"b_1548519002063358"
;
// }
String
startTime
=
"2018-01-01 00:00:00"
;
// List<String> headList = new ArrayList<String>();
//2017-11-30 17:48:17
// headList.add("title");
List
<
String
>
idList
=
new
ArrayList
<>();
// headList.add("time");
idList
.
add
(
"b_1548519002063358"
);
// headList.add("source");
idList
.
add
(
"b_1536766292852334"
);
// headList.add("url");
idList
.
add
(
"b_1536766781763274"
);
// headList.add("content");
idList
.
add
(
"b_1536766200338498"
);
// headList.add("read_amount");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
// poi.exportExcel("D://crawlerdata//自媒体/百家号-lxj-2.xlsx", "娱乐资本论", headList, bodyList);
for
(
String
id
:
idList
)
{
// }
ZhiWeiTools
.
sleep
(
5000
);
//
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccount2Data
(
id
,
startTime
,
null
);
//}
bodyList
.
addAll
(
lists
);
}
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
headList
.
add
(
"content"
);
poi
.
exportExcel
(
"D://crawlerdata//自媒体/百家号-all.xlsx"
,
"科学的fan"
,
headList
,
bodyList
);
}
@Test
public
void
test3
()
{
String
path
=
"D://crawlerdata//自媒体/百家号采集.xlsx"
;
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
startTime
=
"2018-05-01 00:00:00"
;
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
path
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
Map
<
String
,
Object
>
m
:
list
)
{
try
{
String
app_id
=
m
.
get
(
"id"
).
toString
();
app_id
=
"1594158489045754"
;
String
name
=
m
.
get
(
"name"
).
toString
();
String
cookie
=
"__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5"
;
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccountByBaiduData
(
app_id
,
name
,
startTime
,
cookie
,
null
);
if
(
lists
!=
null
)
{
bodyList
.
addAll
(
lists
);
}
}
catch
(
Exception
e
)
{
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
headList
.
add
(
"content"
);
headList
.
add
(
"read_amount"
);
poi
.
exportExcel
(
"D://crawlerdata//自媒体/百家号-lxj-2.xlsx"
,
"娱乐资本论"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
View file @
d979d793
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Dayu
;
//
import com.zhiwei.parse.Dayu;
//
public
class
DayuAccountExample
{
//
public class DayuAccountExample {
//
//
@Test
//
@Test
public
void
dayuAccountTest
()
{
//
public void dayuAccountTest() {
//https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true
//
//https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true
//
//
// String mid = "d7300311c1504d24a229c3da345785c6";
//
//
String mid = "d7300311c1504d24a229c3da345785c6";
// String name = "大鱼海棠雨";
//
//
String name = "大鱼海棠雨";
String
startTime
=
"2017-01-01 00:00:00"
;
//
String startTime = "2017-01-01 00:00:00";
String
path
=
"D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx"
;
//
String path = "D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx";
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
path
,
0
);
//
Map<String,Object> map = poi.importExcel(path, 0);
List
<
Map
<
String
,
Object
>>
lists
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
//
List<Map<String,Object>> lists = (List<Map<String, Object>>) map.get("body");
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"url"
);
//
headList.add("url");
// headList.add("content_id");
//
//
headList.add("content_id");
// headList.add("origin_id");
//
//
headList.add("origin_id");
// headList.add("xss_item_id");
//
//
headList.add("xss_item_id");
for
(
Map
<
String
,
Object
>
data
:
lists
)
{
//
for(Map<String,Object> data : lists) {
String
mid
=
data
.
get
(
"mid"
)+
""
;
//
String mid = data.get("mid")+"";
String
name
=
data
.
get
(
"name"
)+
""
;
//
String name = data.get("name")+"";
if
(
mid
.
length
()
<
1
&&
name
.
length
()
<
1
)
{
//
if(mid.length() < 1 && name.length() < 1) {
continue
;
//
continue;
}
//
}
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuAccountData
(
mid
,
name
,
null
,
null
);
//
List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null,null);
poi
.
exportExcel
(
path
,
name
,
headList
,
dataList
);
//
poi.exportExcel(path, name, headList, dataList);
}
//
}
//
//
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/crawler/DayuByWordExample.java
View file @
d979d793
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.parse.Dayu
;
//
import com.zhiwei.parse.Dayu;
//
public
class
DayuByWordExample
{
//
public class DayuByWordExample {
//
//
@Test
//
@Test
public
void
dayuByWordTest
()
{
//
public void dayuByWordTest() {
String
word
=
"11"
;
//
String word = "11";
//
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuByWordData
(
word
,
null
);
//
List<Map<String,Object>> dataList = Dayu.getDayuByWordData(word,null);
//
System
.
out
.
println
(
dataList
.
size
());
//
System.out.println(dataList.size());
//
//
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
View file @
d979d793
...
@@ -10,15 +10,18 @@ import com.zhiwei.parse.Maimai;
...
@@ -10,15 +10,18 @@ import com.zhiwei.parse.Maimai;
public
class
MaimaiBywordExample
{
public
class
MaimaiBywordExample
{
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
String
word
=
"美团
晋升
"
;
String
word
=
"美团
|某团|MT|大众点评|新美大|美团点评
"
;
String
cookie
=
"
sessionid=y87knknqrc3fi6xto2zv0s4kugmleepk; guid=GxsfBBgZGwQYGx4EGBkeVgcYGx4fHhwcGhgbVhwZBB0ZHwVDWEtMS3kKGhobBB0THhkEGgQTHAVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; seid=s1539933372113; token=\"ZTjnEij9jsL4ZCdnKF2CaUAwcJHgcem/zHvAbXp3MXdY+uSPva8scjbe2zHl2gE98CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiSFVMLVhKb2g5TkJGNHRJanljUW5Qa1V5IiwiX2V4cGlyZSI6MTU0MDAxOTc5MTUwNSwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=dJmy52LHX-stqroAbm66u2zJaZA
"
;
String
cookie
=
"
guid=GxsfBBgZGwQYGx4EGBkeVhsfGB4aHBpWHBkEHRkfBUNYS0xLeQoSEwQSHR8ZBBoEGx0FT0dFWEJpCgNFQUlPbQpPQUNGCgZmZ35iYQIKHBkEHRkfBV5DYUhPfU9GWlprCgMeHH1lfQoRGQQcCn5kClldRU5EQ30CChoEHwVLRkZDUEVn; token=\"7IGuqjEwgJ2gXX5PZ0UYSxvn81Aws6v5OFrwpSErsbctlSd1e/7+AzYEMMMeeFJJ8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; _buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiOGtDSnF6VG5QcFk0R3ZmVFB4MThIMW1ZIiwiX2V4cGlyZSI6MTU0ODMwODU0MTMyNCwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=cnQ0i1LwYxhjO3_BvQ4Coh0f9PQ
"
;
String
time
=
"201
8-10-15
00:00:00"
;
String
time
=
"201
9-01-17
00:00:00"
;
String
[]
words
=
word
.
split
(
"\\|"
);
String
[]
words
=
word
.
split
(
"\\|"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
List
<
Map
<
String
,
Object
>>
c
=
Maimai
.
getData
(
w
,
cookie
,
time
,
null
);
//实名动态
// List<Map<String,Object>> c = Maimai.getDataByNoName(w, cookie, time, null);
// List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
bodyList
.
addAll
(
c
);
//职言交流
List
<
Map
<
String
,
Object
>>
c2
=
Maimai
.
getDataByNoName
(
w
,
cookie
,
time
,
null
);
// bodyList.addAll(c);
bodyList
.
addAll
(
c2
);
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"time"
);
headList
.
add
(
"time"
);
...
@@ -29,7 +32,7 @@ public class MaimaiBywordExample {
...
@@ -29,7 +32,7 @@ public class MaimaiBywordExample {
headList
.
add
(
"comment_count"
);
headList
.
add
(
"comment_count"
);
headList
.
add
(
"spreads"
);
headList
.
add
(
"spreads"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团
晋升-1015
.xlsx"
,
"脉脉关键词"
,
headList
,
bodyList
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团
-0123
.xlsx"
,
"脉脉关键词"
,
headList
,
bodyList
);
}
}
}
}
src/test/java/com/zhiwei/crawler/PearVideoByWordExample.java
View file @
d979d793
...
@@ -13,7 +13,7 @@ public class PearVideoByWordExample {
...
@@ -13,7 +13,7 @@ public class PearVideoByWordExample {
@Test
@Test
public
void
pearVideoByWordTest
()
{
public
void
pearVideoByWordTest
()
{
String
word
=
"
美食
"
;
String
word
=
"
大宝 甲醛
"
;
List
<
Map
<
String
,
Object
>>
bodyList
=
PearVideo
.
getPearVideoData
(
word
,
null
);
List
<
Map
<
String
,
Object
>>
bodyList
=
PearVideo
.
getPearVideoData
(
word
,
null
);
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
View file @
d979d793
...
@@ -6,6 +6,8 @@ import java.util.Map;
...
@@ -6,6 +6,8 @@ import java.util.Map;
import
org.junit.Test
;
import
org.junit.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.QQKB
;
import
com.zhiwei.parse.QQKB
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -18,7 +20,8 @@ public class QQKBCommentExample {
...
@@ -18,7 +20,8 @@ public class QQKBCommentExample {
String
url
=
"https://kuaibao.qq.com/s/20181122A11WQB00"
;
String
url
=
"https://kuaibao.qq.com/s/20181122A11WQB00"
;
//https://kuaibao.qq.com/s/20180423A1PI7400?refer=kb_news
//https://kuaibao.qq.com/s/20180423A1PI7400?refer=kb_news
// https://kuaibao.qq.com/s/20180423A0L60800?refer=kb_news
// https://kuaibao.qq.com/s/20180423A0L60800?refer=kb_news
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体/快报评论采集.xlsx"
,
0
);
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体/快报评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
...
@@ -40,7 +43,7 @@ public class QQKBCommentExample {
...
@@ -40,7 +43,7 @@ public class QQKBCommentExample {
headList
.
add
(
"time"
);
//时间
headList
.
add
(
"time"
);
//时间
headList
.
add
(
"content"
);
//内容
headList
.
add
(
"content"
);
//内容
System
.
out
.
println
(
bodyList
.
size
());
System
.
out
.
println
(
bodyList
.
size
());
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\快报评论采集
-zhj
.xlsx"
,
"sada"
,
headList
,
bodyList
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\快报评论采集.xlsx"
,
"sada"
,
headList
,
bodyList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
View file @
d979d793
package
com
.
zhiwei
.
crawler
;
package
com
.
zhiwei
.
crawler
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.junit.Test
;
import
org.junit.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SouhuCommentCountExample
{
public
class
SouhuCommentCountExample
{
@Test
@Test
public
void
souhuCommentCountTest
()
{
public
void
souhuCommentCountTest
()
{
String
url
=
"http://www.sohu.com/a/281414426_133392"
;
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
System
.
out
.
println
(
i
);
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体//搜狐评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
String
>
headList
=
(
List
<
String
>)
map
.
get
(
"head"
);
for
(
Map
<
String
,
Object
>
map1
:
list
)
{
String
url
=
""
;
try
{
url
=
map1
.
get
(
"url"
)+
""
;
System
.
out
.
println
(
url
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
ProxyFactory
.
getNatProxy
());
map1
.
put
(
"count"
,
i
);
System
.
out
.
println
(
map1
.
toString
());
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
url
);
e
.
printStackTrace
();
continue
;
}
}
headList
.
add
(
"count"
);
poi
.
exportExcel
(
"D://crawlerdata//自媒体//搜狐评论采集.xlsx"
,
"sheet2"
,
headList
,
list
);
}
}
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
View file @
d979d793
...
@@ -6,6 +6,8 @@ import java.util.Map;
...
@@ -6,6 +6,8 @@ import java.util.Map;
import
org.junit.Test
;
import
org.junit.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Fenghuang
;
import
com.zhiwei.parse.Fenghuang
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.parse.Souhu
;
...
@@ -16,7 +18,8 @@ public class SouhuCommentExample {
...
@@ -16,7 +18,8 @@ public class SouhuCommentExample {
@Test
@Test
public
void
souhuCommentTest
()
{
public
void
souhuCommentTest
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体//搜狐评论采集.xlsx"
,
0
);
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体//搜狐评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
...
@@ -30,7 +33,7 @@ public class SouhuCommentExample {
...
@@ -30,7 +33,7 @@ public class SouhuCommentExample {
if
(
dataList
.
size
()
<=
0
)
{
if
(
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
urlList
.
add
(
url
);
}
}
ZhiWeiTools
.
sleep
(
20
00
);
ZhiWeiTools
.
sleep
(
1
00
);
if
(
dataList
!=
null
)
{
if
(
dataList
!=
null
)
{
bodyList
.
addAll
(
dataList
);
bodyList
.
addAll
(
dataList
);
}
}
...
...
src/test/java/com/zhiwei/hsitory/QQkandianHistoryExample.java
View file @
d979d793
package
com
.
zhiwei
.
hsitory
;
//
package com.zhiwei.hsitory;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.HashMap
;
//
import java.util.HashMap;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.testng.annotations.Test
;
//
import org.testng.annotations.Test;
//
import
com.zhiwei.bean.HistortyBean
;
//
import com.zhiwei.bean.HistortyBean;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.QQKandian
;
//
import com.zhiwei.parse.QQKandian;
//
public
class
QQkandianHistoryExample
{
//
public class QQkandianHistoryExample {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
String
uid
=
"2661642386"
;
//
String uid = "2661642386";
//
QQKandian
qqKandian
=
new
QQKandian
();
//
QQKandian qqKandian = new QQKandian();
List
<
HistortyBean
>
dataList
=
qqKandian
.
getHistoryData
(
uid
,
null
);
//
List<HistortyBean> dataList = qqKandian.getHistoryData(uid, null);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
HistortyBean
h
:
dataList
)
{
//
for(HistortyBean h : dataList) {
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
//
Map<String, Object> map = new HashMap<String,Object>();
map
.
put
(
"标题"
,
h
.
getTitle
());
//
map.put("标题", h.getTitle());
map
.
put
(
"时间"
,
h
.
getTime
());
//
map.put("时间", h.getTime());
map
.
put
(
"来源"
,
h
.
getSource
());
//
map.put("来源", h.getSource());
map
.
put
(
"正文"
,
h
.
getContent
());
//
map.put("正文", h.getContent());
map
.
put
(
"链接"
,
h
.
getUrl
());
//
map.put("链接", h.getUrl());
bodyList
.
add
(
map
);
//
bodyList.add(map);
}
//
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"标题"
);
//
headList.add("标题");
headList
.
add
(
"来源"
);
//
headList.add("来源");
headList
.
add
(
"链接"
);
//
headList.add("链接");
headList
.
add
(
"正文"
);
//
headList.add("正文");
headList
.
add
(
"时间"
);
//
headList.add("时间");
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
poi.exportExcel("D:\\crawlerdata\\自媒体\\qq看点-数据-2661642386.xlsx", "数据", headList, bodyList);
//
//
}
//
}
}
//
}
src/test/java/com/zhiwei/keyword/GftaiTest.java
View file @
d979d793
//
package com.zhiwei.keyword;
package
com
.
zhiwei
.
keyword
;
//
//
import java.util.ArrayList;
import
java.util.ArrayList
;
//
import java.util.List;
import
java.util.List
;
//
import java.util.Map;
import
java.util.Map
;
//
//
import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.parse.Gftai;
import
com.zhiwei.parse.Gftai
;
//
//
public class GftaiTest {
public
class
GftaiTest
{
//
@Test
@Test
//
public void f() {
public
void
f
()
{
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb
";
String
words
=
"美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信
"
;
//
String[] ws = words.split("\\|");
String
[]
ws
=
words
.
split
(
"\\|"
);
//
List<Map<String,Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
for(String word : ws) {
for
(
String
word
:
ws
)
{
//
List<Map<String,Object>> list = Gftai.getData(word, null);
List
<
Map
<
String
,
Object
>>
list
=
Gftai
.
getData
(
word
,
null
);
//
bodyList.addAll(list);
bodyList
.
addAll
(
list
);
//
System.out.println(word + " --------- " + bodyList.size());
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
//
}
}
//
List<String> headList = new ArrayList<>();
List
<
String
>
headList
=
new
ArrayList
<>();
//
headList.add("title");
headList
.
add
(
"title"
);
//
headList.add("time");
headList
.
add
(
"time"
);
//
headList.add("content");
headList
.
add
(
"content"
);
//
headList.add("source");
headList
.
add
(
"source"
);
//
headList.add("url");
headList
.
add
(
"url"
);
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\国富泰信用
.xlsx", "数据", headList, bodyList);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\国富泰信用-美团-2
.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
}
}
//
}
}
src/test/java/com/zhiwei/keyword/KuaiTousuTest.java
View file @
d979d793
//
package com.zhiwei.keyword;
package
com
.
zhiwei
.
keyword
;
//
//
import java.util.ArrayList;
import
java.util.ArrayList
;
//
import java.util.List;
import
java.util.List
;
//
import java.util.Map;
import
java.util.Map
;
//
//
import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.parse.Gftai;
import
com.zhiwei.parse.Gftai
;
//
import com.zhiwei.parse.KuaiTousu;
import
com.zhiwei.parse.KuaiTousu
;
//
//
public class KuaiTousuTest {
public
class
KuaiTousuTest
{
//
@Test
@Test
//
public void f() {
public
void
f
()
{
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb
";
String
words
=
"美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信
"
;
//
String[] ws = words.split("\\|");
String
[]
ws
=
words
.
split
(
"\\|"
);
//
List<Map<String,Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
for(String word : ws) {
for
(
String
word
:
ws
)
{
//
List<Map<String,Object>> list = KuaiTousu.getData(word, null);
List
<
Map
<
String
,
Object
>>
list
=
KuaiTousu
.
getData
(
word
,
null
);
//
bodyList.addAll(list);
bodyList
.
addAll
(
list
);
//
System.out.println(word + " --------- " + bodyList.size());
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
//
}
}
//
List<String> headList = new ArrayList<>();
List
<
String
>
headList
=
new
ArrayList
<>();
//
headList.add("title");
headList
.
add
(
"title"
);
//
headList.add("time");
headList
.
add
(
"time"
);
//
headList.add("content");
headList
.
add
(
"content"
);
//
headList.add("source");
headList
.
add
(
"source"
);
//
headList.add("url");
headList
.
add
(
"url"
);
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\新浪广东快投诉
.xlsx", "数据", headList, bodyList);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\新浪广东快投诉-美团-2
.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
//
//
//
//
}
}
//
}
}
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
View file @
d979d793
//
package com.zhiwei.keyword;
package
com
.
zhiwei
.
keyword
;
//
//
import java.util.ArrayList;
import
java.util.ArrayList
;
//
import java.util.List;
import
java.util.List
;
//
import java.util.Map;
import
java.util.Map
;
//
//
import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.parse.KuaiTousu;
import
com.zhiwei.parse.KuaiTousu
;
//
import com.zhiwei.parse.SinaTousu;
import
com.zhiwei.parse.SinaTousu
;
//
//
public class SinaTousuTest {
public
class
SinaTousuTest
{
//
//
@Test
@Test
//
public void getSinaTousuData() {
public
void
getSinaTousuData
()
{
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// String words = "民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb
";
String
words
=
"美团 催收|美团 借款|美团 还钱|三快 借钱|三快 生活费|三快 借款|美团 征信
"
;
//
String[] ws = words.split("\\|");
String
[]
ws
=
words
.
split
(
"\\|"
);
//
List<Map<String,Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
for(String word : ws) {
for
(
String
word
:
ws
)
{
// List<Map<String,Object>> list = SinaTousu.getSinaTousuData(word, null, "2018-01
-01 00:00:00");
List
<
Map
<
String
,
Object
>>
list
=
SinaTousu
.
getSinaTousuData
(
word
,
null
,
"2018-07
-01 00:00:00"
);
//
bodyList.addAll(list);
bodyList
.
addAll
(
list
);
//
System.out.println(word + " --------- " + bodyList.size());
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
//
}
}
//
List<String> headList = new ArrayList<>();
List
<
String
>
headList
=
new
ArrayList
<>();
//
headList.add("title");
headList
.
add
(
"title"
);
//
headList.add("time");
headList
.
add
(
"time"
);
//
headList.add("content");
headList
.
add
(
"content"
);
//
headList.add("source");
headList
.
add
(
"source"
);
//
headList.add("url");
headList
.
add
(
"url"
);
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉
.xlsx", "数据", headList, bodyList);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉-美团-2
.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
//
//
//
}
}
//
}
}
src/test/java/com/zhiwei/keyword/YoukuKeyWordTest.java
0 → 100644
View file @
d979d793
//package com.zhiwei.keyword;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Youku;
//
//public class YoukuKeyWordTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
// GroupType.PROVIDER);
// String word = "帮宝适 二噁英," +
// "帮宝适 二恶英," +
// "帮宝适 有毒," +
// "帮宝适 剧毒," +
// "帮宝适 致癌," +
// "宝洁 二噁英," +
// "宝洁 二恶英," +
// "宝洁 有毒," +
// "宝洁 剧毒," +
// "宝洁 致癌," +
// "纸尿裤 二噁英," +
// "纸尿裤 二恶英," +
// "纸尿裤 有毒," +
// "纸尿裤 剧毒," +
// "纸尿裤 致癌";
// List<Map<String,Object>> bodyList = new ArrayList<>();
// String[] words = word.split(",");
// for(String w : words) {
// System.out.println(w);
// bodyList.addAll(Youku.getDataList(w));
// }
// List<String> headList = new ArrayList<>();
// headList.add("title");
// headList.add("time");
// headList.add("url");
// headList.add("uper");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\优酷数据-txh-0121.xlsx", "数据", headList, bodyList);
//
// }
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment