Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
6018f0b3
Commit
6018f0b3
authored
Nov 18, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
提交修改
parent
3e350f8b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
28 changed files
with
310 additions
and
141 deletions
+310
-141
pom.xml
+2
-2
src/main/java/com/zhiwei/httpclient/HeadGet.java
+4
-5
src/main/java/com/zhiwei/httpclient/HttpClient.java
+66
-5
src/main/java/com/zhiwei/parse/Baijia.java
+9
-7
src/main/java/com/zhiwei/parse/BiliBili.java
+6
-7
src/main/java/com/zhiwei/parse/Dayu.java
+34
-1
src/main/java/com/zhiwei/parse/Douban.java
+2
-3
src/main/java/com/zhiwei/parse/Maimai.java
+40
-0
src/main/java/com/zhiwei/parse/QQKB.java
+2
-3
src/main/java/com/zhiwei/parse/QQKandian.java
+0
-0
src/main/java/com/zhiwei/parse/SinaKeji.java
+19
-8
src/main/java/com/zhiwei/parse/SinaTousu.java
+1
-1
src/main/java/com/zhiwei/parse/Souhu.java
+2
-8
src/main/java/com/zhiwei/parse/TXNews.java
+3
-1
src/main/java/com/zhiwei/parse/Wangyi.java
+17
-1
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
+5
-7
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
+7
-9
src/main/java/com/zhiwei/parse/analysis/BilibilikeyWordAnalysis.java
+2
-2
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
+20
-12
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
+2
-2
src/test/java/com/zhiwei/Comment/SinaKejiComment.java
+9
-3
src/test/java/com/zhiwei/crawler/QQAccountExample.java
+1
-1
src/test/java/com/zhiwei/hsitory/BaijiaAccountExample.java
+8
-4
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
+2
-2
src/test/java/com/zhiwei/shipin/BilibiliTest.java
+42
-42
src/test/java/com/zhiwei/shipin/QQTVTest.java
+3
-3
src/test/java/com/zhiwei/shipin/SohuTVTest.java
+1
-1
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
+1
-1
No files found.
pom.xml
View file @
6018f0b3
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
articlenewscrawler
</artifactId>
<artifactId>
articlenewscrawler
</artifactId>
<version>
0.
1.7
-SNAPSHOT
</version>
<version>
0.
2.2
-SNAPSHOT
</version>
<name>
articlenewscrawler
</name>
<name>
articlenewscrawler
</name>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<artifactId>
crawler-core
</artifactId>
<version>
0.
3.6-RELEASE
</version>
<version>
0.
5.5.6-SNAPSHOT
</version>
<scope>
provided
</scope>
<scope>
provided
</scope>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/httpclient/HeadGet.java
View file @
6018f0b3
...
@@ -67,7 +67,7 @@ public class HeadGet {
...
@@ -67,7 +67,7 @@ public class HeadGet {
public
static
Map
<
String
,
String
>
getYidianzixunAccountHeaderMap
(
String
cookie
,
String
referer
)
{
public
static
Map
<
String
,
String
>
getYidianzixunAccountHeaderMap
(
String
cookie
,
String
referer
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
headerMap
.
put
(
"User-Agent"
,
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (
Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36
"
);
"Mozilla/5.0 (
iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1
"
);
headerMap
.
put
(
"Accept"
,
headerMap
.
put
(
"Accept"
,
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
);
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9"
);
...
@@ -254,14 +254,13 @@ public class HeadGet {
...
@@ -254,14 +254,13 @@ public class HeadGet {
* @throws IOException
* @throws IOException
*/
*/
public
static
Map
<
String
,
String
>
getDayuCommentHeaderMap
(
String
cookie
)
{
public
static
Map
<
String
,
String
>
getDayuCommentHeaderMap
(
String
cookie
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
);
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
);
headerMap
.
put
(
"Accept"
,
headerMap
.
put
(
"Accept"
,
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
);
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-CN,zh;q=0.9"
);
headerMap
.
put
(
"Connection"
,
"keep-alive"
);
headerMap
.
put
(
"Connection"
,
"keep-alive"
);
headerMap
.
put
(
"Host"
,
"m.uczzd.cn"
);
if
(
cookie
!=
null
)
{
if
(
cookie
!=
null
)
{
headerMap
.
put
(
"Cookie"
,
cookie
);
headerMap
.
put
(
"Cookie"
,
cookie
);
}
}
...
@@ -293,13 +292,13 @@ public class HeadGet {
...
@@ -293,13 +292,13 @@ public class HeadGet {
}
}
public
static
Map
<
String
,
Object
>
getQQAccountOneParamMap
(
String
chlid
)
{
public
static
Map
<
String
,
Object
>
getQQAccountOneParamMap
(
String
chlid
)
{
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<>();
paramMap
.
put
(
"chlid"
,
chlid
);
paramMap
.
put
(
"chlid"
,
chlid
);
return
paramMap
;
return
paramMap
;
}
}
public
static
Map
<
String
,
Object
>
getQQAccountOtherParamMap
(
String
ids
)
{
public
static
Map
<
String
,
Object
>
getQQAccountOtherParamMap
(
String
ids
)
{
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<>();
paramMap
.
put
(
"ids"
,
ids
);
paramMap
.
put
(
"ids"
,
ids
);
return
paramMap
;
return
paramMap
;
}
}
...
...
src/main/java/com/zhiwei/httpclient/HttpClient.java
View file @
6018f0b3
...
@@ -3,6 +3,7 @@ package com.zhiwei.httpclient;
...
@@ -3,6 +3,7 @@ package com.zhiwei.httpclient;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -11,12 +12,14 @@ import com.zhiwei.crawler.core.HttpBoot;
...
@@ -11,12 +12,14 @@ import com.zhiwei.crawler.core.HttpBoot;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
okhttp3.FormBody
;
import
okhttp3.Headers
;
import
okhttp3.Response
;
import
okhttp3.Response
;
public
class
HttpClient
{
public
class
HttpClient
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
throwException
(
false
).
build
();
/**
/**
*
*
...
@@ -43,6 +46,25 @@ public class HttpClient {
...
@@ -43,6 +46,25 @@ public class HttpClient {
* @return
* @return
* @throws IOException
* @throws IOException
*/
*/
public
static
String
executeHttpRequestGet
(
String
url
,
ProxyHolder
proxy
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
return
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
}
}
return
null
;
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param url
* @param cookie
* @return
* @throws IOException
*/
public
static
String
executeHttpRequestGet
(
String
url
,
ProxyHolder
proxy
,
Map
<
String
,
String
>
headerMap
)
{
public
static
String
executeHttpRequestGet
(
String
url
,
ProxyHolder
proxy
,
Map
<
String
,
String
>
headerMap
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
)){
...
@@ -54,8 +76,37 @@ public class HttpClient {
...
@@ -54,8 +76,37 @@ public class HttpClient {
return
null
;
return
null
;
}
}
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
/**
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
headerMap
,
paramMap
),
proxy
)){
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @param url
* @param cookie
* @return
* @throws IOException
*/
public
static
String
executeHttpRequestGet
(
String
url
,
ProxyHolder
proxy
,
Headers
header
)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
header
),
proxy
)){
return
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
}
}
return
null
;
}
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
params
)
{
FormBody
body
=
null
;
if
(
Objects
.
nonNull
(
params
)
&&
!
params
.
isEmpty
())
{
FormBody
.
Builder
builder
=
new
FormBody
.
Builder
();
params
.
forEach
((
lt
,
rt
)
->
{
if
(
Objects
.
nonNull
(
lt
))
{
builder
.
add
(
String
.
valueOf
(
lt
),
Objects
.
isNull
(
rt
)
?
""
:
String
.
valueOf
(
rt
));
}
});
body
=
builder
.
build
();
}
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
headerMap
,
body
),
proxy
)){
return
response
.
body
().
string
();
return
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
...
@@ -64,8 +115,18 @@ public class HttpClient {
...
@@ -64,8 +115,18 @@ public class HttpClient {
}
}
public
static
String
executeHttpRequestPost
(
String
url
,
ProxyHolder
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
public
static
String
executeHttpRequestPost
(
String
url
,
ProxyHolder
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
params
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
headerMap
,
paramMap
),
proxy
)){
FormBody
body
=
null
;
if
(
Objects
.
nonNull
(
params
)
&&
!
params
.
isEmpty
())
{
FormBody
.
Builder
builder
=
new
FormBody
.
Builder
();
params
.
forEach
((
lt
,
rt
)
->
{
if
(
Objects
.
nonNull
(
lt
))
{
builder
.
add
(
String
.
valueOf
(
lt
),
Objects
.
isNull
(
rt
)
?
""
:
String
.
valueOf
(
rt
));
}
});
body
=
builder
.
build
();
}
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
url
,
headerMap
,
body
),
proxy
)){
return
response
.
body
().
string
();
return
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
);
...
...
src/main/java/com/zhiwei/parse/Baijia.java
View file @
6018f0b3
...
@@ -89,20 +89,21 @@ public class Baijia {
...
@@ -89,20 +89,21 @@ public class Baijia {
while
(
f
)
{
while
(
f
)
{
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
try
{
try
{
String
url
=
"https://
author.baidu.com/list?type=article&tab=2&uk="
+
uk
+
"&ctime="
+
ctime
+
"&num=50
"
;
String
url
=
"https://
mbd.baidu.com/webpage?tab=article&num=10&uk="
+
uk
+
"&ctime="
+
ctime
+
"&type=newhome&action=dynamic&format=json
"
;
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
Map
<
String
,
Object
>
dMap
=
baijiaAccountAnalysis
.
getBaijiaAccountData3
(
result
,
name
,
startTime
);
Map
<
String
,
Object
>
dMap
=
baijiaAccountAnalysis
.
getBaijiaAccountData3
(
result
,
name
,
startTime
);
List
<
Map
<
String
,
Object
>>
dList
=
(
List
<
Map
<
String
,
Object
>>)
dMap
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
dList
=
(
List
<
Map
<
String
,
Object
>>)
dMap
.
get
(
"data"
);
dataList
.
addAll
(
dList
);
if
(
Objects
.
nonNull
(
dList
))
dataList
.
addAll
(
dList
);
logger
.
info
(
"{} 数据采集结果 {}"
,
appId
,
dataList
.
size
());
logger
.
info
(
"{} 数据采集结果 {}"
,
appId
,
dataList
.
size
());
if
(!(
boolean
)
dMap
.
get
(
"more"
))
{
if
(!(
boolean
)
dMap
.
get
(
"more"
))
{
f
=
false
;
f
=
false
;
}
}
ctime
=
String
.
valueOf
(
dMap
.
get
(
"ctime"
));
ctime
=
String
.
valueOf
(
dMap
.
get
(
"ctime"
));
ZhiWeiTools
.
sleep
(
3
000
);
ZhiWeiTools
.
sleep
(
1
000
);
break
;
break
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
ZhiWeiTools
.
sleep
(
3
000
);
ZhiWeiTools
.
sleep
(
2
000
);
}
}
}
}
}
}
...
@@ -111,15 +112,16 @@ public class Baijia {
...
@@ -111,15 +112,16 @@ public class Baijia {
}
}
private
static
String
getUkData
(
String
appId
,
ProxyHolder
proxy
,
String
cookie
)
{
private
static
String
getUkData
(
String
appId
,
ProxyHolder
proxy
,
String
cookie
)
{
String
url
=
"https://author.baidu.com/profile?context={\"from\":0,\"app_id\":\""
// String url = "https://author.baidu.com/profile?context={\"from\":0,\"app_id\":\""
+
appId
+
"\"}&cmdType=&pagelets[]=root&reqID=0&ispeed=1#"
;
// +appId+"\"}&cmdType=&pagelets[]=root&reqID=0&ispeed=1#";
String
url
=
"https://author.baidu.com/home/"
+
appId
;
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
headers
.
put
(
"Host"
,
"author.baidu.com"
);
headers
.
put
(
"Host"
,
"author.baidu.com"
);
headers
.
put
(
"cookie"
,
cookie
);
headers
.
put
(
"cookie"
,
cookie
);
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headers
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headers
),
proxy
).
body
().
string
();
return
result
.
split
(
"uk\
\\\\":\\\\\""
)[
1
].
split
(
"\\\\
\","
)[
0
];
return
result
.
split
(
"uk\
":\""
)[
1
].
split
(
"
\","
)[
0
];
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"百家号uk 获取失败"
);
logger
.
error
(
"百家号uk 获取失败"
);
}
}
...
...
src/main/java/com/zhiwei/parse/BiliBili.java
View file @
6018f0b3
...
@@ -4,7 +4,6 @@ import java.io.UnsupportedEncodingException;
...
@@ -4,7 +4,6 @@ import java.io.UnsupportedEncodingException;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -13,7 +12,7 @@ import org.slf4j.LoggerFactory;
...
@@ -13,7 +12,7 @@ import org.slf4j.LoggerFactory;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.
crawler.utils.RequestUtils
;
import
com.zhiwei.
httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.BilibilikeyWordAnalysis
;
import
com.zhiwei.parse.analysis.BilibilikeyWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -22,16 +21,16 @@ import okhttp3.Headers;
...
@@ -22,16 +21,16 @@ import okhttp3.Headers;
public
class
BiliBili
{
public
class
BiliBili
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
BiliBili
.
class
);
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
BiliBili
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
useCookieJar
(
true
).
build
();
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
Proxy
proxy
,
String
endTime
,
String
cookie
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
Proxy
proxy
,
String
endTime
,
String
cookie
)
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
try
{
try
{
//
//
String
url
=
"https://search.bilibili.com/all?keyword="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)+
"&order=pubdate&duration=0&tids_1=0"
;
String
url
=
"https://search.bilibili.com/all?keyword="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)+
"&single_column=1&order=stow&duration=0&tids_1=0"
;
System
.
out
.
println
(
url
);
Headers
header
=
Headers
.
of
(
"cookie"
,
cookie
,
"Referer"
,
"https://www.bilibili.com/"
,
"Host"
,
"search.bilibili.com"
);
Headers
header
=
Headers
.
of
(
"cookie"
,
cookie
,
"Referer"
,
"https://www.bilibili.com/"
,
"Host"
,
"search.bilibili.com"
);
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
header
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
(
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
ProxyHolder
.
NAT_HEAVY_PROXY
,
header
);
ZhiWeiTools
.
sleep
(
100
);
ZhiWeiTools
.
sleep
(
100
);
Map
<
String
,
Object
>
map
=
BilibilikeyWordAnalysis
.
getData
(
result
,
word
,
endTime
);
Map
<
String
,
Object
>
map
=
BilibilikeyWordAnalysis
.
getData
(
result
,
word
,
endTime
);
boolean
more
=
(
boolean
)
map
.
get
(
"more"
);
boolean
more
=
(
boolean
)
map
.
get
(
"more"
);
...
@@ -43,7 +42,7 @@ public class BiliBili {
...
@@ -43,7 +42,7 @@ public class BiliBili {
while
(
more
)
{
while
(
more
)
{
map
.
clear
();
map
.
clear
();
String
ur
=
url
+
"&page="
+
n
;
String
ur
=
url
+
"&page="
+
n
;
String
result2
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
ur
,
header
),
ProxyHolder
.
NAT_HEAVY_PROXY
).
body
().
string
(
);
String
result2
=
HttpClient
.
executeHttpRequestGet
(
ur
,
ProxyHolder
.
NAT_HEAVY_PROXY
,
header
);
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
,
word
,
endTime
);
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
,
word
,
endTime
);
List
<
Map
<
String
,
Object
>>
dataList2
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
dataList2
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
dataList2
!=
null
)
{
if
(
dataList2
!=
null
)
{
...
@@ -60,7 +59,7 @@ public class BiliBili {
...
@@ -60,7 +59,7 @@ public class BiliBili {
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"e {}"
,
e
);
logger
.
error
(
"e {}"
,
e
);
}
}
return
Collections
.
emptyList
()
;
return
bodyList
;
}
}
...
...
src/main/java/com/zhiwei/parse/Dayu.java
View file @
6018f0b3
...
@@ -102,7 +102,7 @@ public class Dayu {
...
@@ -102,7 +102,7 @@ public class Dayu {
* @param articleId
* @param articleId
* @return
* @return
*/
*/
public
static
int
getDayuCommentCount
(
String
articleId
,
Proxy
proxy
)
{
public
static
int
getDayuCommentCount
(
String
articleId
,
Proxy
Holder
proxy
)
{
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot"
;
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
...
@@ -110,6 +110,39 @@ public class Dayu {
...
@@ -110,6 +110,39 @@ public class Dayu {
return
json
.
getJSONObject
(
"data"
).
getInteger
(
"comment_cnt"
);
return
json
.
getJSONObject
(
"data"
).
getInteger
(
"comment_cnt"
);
}
}
/**
** 大鱼阅读数
* @param url
* @param proxy
* @return
* @return int
*/
public
static
int
getDayuReadCount
(
String
url
,
ProxyHolder
proxy
)
{
try
{
if
(
url
.
contains
(
"!wm_aid="
))
{
String
articleId
=
url
.
split
(
"wm_aid="
)[
1
];
String
eUrl
=
"https://ff.dayu.com/contents/origin/"
+
articleId
+
"?biz_id=1002&_fetch_author=1&_incr_fields=click1,click2,click3,click_total,play,like"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
eUrl
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"_incrs"
).
getIntValue
(
"click2"
)
+
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"_incrs"
).
getIntValue
(
"click1"
);
}
else
if
(
url
.
contains
(
"wm_cid="
))
{
String
articleId
=
url
.
split
(
"wm_cid="
)[
1
];
String
eUrl
=
"https://ff.dayu.com/contents/"
+
articleId
+
"?biz_id=1002&_fetch_author=1&_incr_fields=click1,click2,click3,click_total,play,like"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
eUrl
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"_incrs"
).
getIntValue
(
"click2"
)
+
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"_incrs"
).
getIntValue
(
"click1"
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
return
-
1
;
}
/**
/**
*
*
* @Description 大鱼号依据关键词采集
* @Description 大鱼号依据关键词采集
...
...
src/main/java/com/zhiwei/parse/Douban.java
View file @
6018f0b3
...
@@ -38,7 +38,7 @@ public class Douban {
...
@@ -38,7 +38,7 @@ public class Douban {
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
doubanTopicGetByWord
(
String
word
,
ProxyHolder
proxy
,
String
cookie
,
String
stime
)
{
public
static
List
<
Map
<
String
,
Object
>>
doubanTopicGetByWord
(
String
word
,
ProxyHolder
proxy
,
String
cookie
,
String
stime
)
{
int
page
=
0
;
int
page
=
0
;
int
count
=
2
0
;
int
count
=
5
0
;
boolean
more
=
true
;
boolean
more
=
true
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"Host"
,
"www.douban.com"
);
headerMap
.
put
(
"Host"
,
"www.douban.com"
);
...
@@ -77,10 +77,9 @@ public class Douban {
...
@@ -77,10 +77,9 @@ public class Douban {
map
.
put
(
"time"
,
time
);
map
.
put
(
"time"
,
time
);
map
.
put
(
"reply_count"
,
replyCount
);
map
.
put
(
"reply_count"
,
replyCount
);
bodyList
.
add
(
map
);
bodyList
.
add
(
map
);
// System.out.println(map.toString());
}
}
}
}
if
(
bodyList
.
size
()
-
cou
<
3
0
){
if
(
bodyList
.
size
()
-
cou
<
10
||
page
>
50
0
){
more
=
false
;
more
=
false
;
}
}
logger
.
info
(
"采集到 第 {} 页 ,一共采集到 {} 条 是否有下一页 {}"
,
page
,
bodyList
.
size
(),
more
);
logger
.
info
(
"采集到 第 {} 页 ,一共采集到 {} 条 是否有下一页 {}"
,
page
,
bodyList
.
size
(),
more
);
...
...
src/main/java/com/zhiwei/parse/Maimai.java
View file @
6018f0b3
...
@@ -16,7 +16,9 @@ import org.slf4j.LoggerFactory;
...
@@ -16,7 +16,9 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
...
@@ -139,6 +141,44 @@ public class Maimai {
...
@@ -139,6 +141,44 @@ public class Maimai {
return
Collections
.
emptyMap
();
return
Collections
.
emptyMap
();
}
}
public
static
void
main
(
String
[]
args
)
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
,
10000002L
);
String
url
=
"https://maimai.cn/web/feed_detail?fid=1353566056&efid=QTa45Y1e-oQzyn1dZ5ozlQ"
;
System
.
out
.
println
(
getMaiaiCount2
(
url
,
ProxyHolder
.
NAT_HEAVY_PROXY
));
}
/**
* https://maimai.cn/web/feed_detail?fid=1304191535&efid=0CQbJXhoYLXdC87NFIkRMA
* @Description 获取脉脉转评赞
* @param url
* @param proxy
* @return
*/
public
static
Map
<
String
,
Object
>
getMaiaiCount2
(
String
url
,
ProxyHolder
proxy
)
{
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
result
=
result
.
split
(
"JSON.parse\\(\""
)[
1
].
split
(
"\"\\);\\</script\\>"
)[
0
];
result
=
ZhiWeiTools
.
decodeUnicode
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
data
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"feed"
);
map
.
put
(
"like"
,
data
.
getJSONObject
(
"likes"
).
getInteger
(
"n"
));
map
.
put
(
"spreads"
,
data
.
getJSONObject
(
"spreads"
).
getInteger
(
"n"
));
map
.
put
(
"cmts"
,
data
.
getJSONObject
(
"comments"
).
getInteger
(
"n"
));
map
.
put
(
"gid"
,
data
.
getLong
(
"id"
));
map
.
put
(
"title"
,
data
.
getJSONObject
(
"main"
).
getString
(
"text"
));
map
.
put
(
"author"
,
data
.
getJSONObject
(
"main"
).
getJSONObject
(
"u"
).
getString
(
"name"
));
map
.
put
(
"userId"
,
data
.
getJSONObject
(
"main"
).
getJSONObject
(
"u"
).
getString
(
"mmid"
));
map
.
put
(
"company"
,
data
.
getJSONObject
(
"main"
).
getJSONObject
(
"u"
).
getString
(
"career_str"
));
return
map
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" 脉脉 转评攒 获取失败 {}"
,
e
);
}
}
return
Collections
.
emptyMap
();
}
/**
/**
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* //https://maimai.cn/web/gossip_detail?encode_id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MTk2MzEyNjYsImlhdCI6MTU0ODI5NzI5NX0.N6SPmcf-fyitLNomzY-a8BEY31eseYnvG7RTUQ3jxYY
* @Description 获取脉脉转评赞
* @Description 获取脉脉转评赞
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
6018f0b3
...
@@ -60,8 +60,8 @@ public class QQKB {
...
@@ -60,8 +60,8 @@ public class QQKB {
try
{
try
{
for
(
int
j
=
1
;
j
<
3
;
j
++)
{
for
(
int
j
=
1
;
j
<
3
;
j
++)
{
ids
=
ids
.
substring
(
0
,
ids
.
length
()-
1
);
ids
=
ids
.
substring
(
0
,
ids
.
length
()-
1
);
System
.
out
.
println
(
ids
);
logger
.
info
(
"data {}"
,
ids
);
ZhiWeiTools
.
sleep
(
7
000
);
ZhiWeiTools
.
sleep
(
1
000
);
paramMap
.
clear
();
paramMap
.
clear
();
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
...
@@ -76,7 +76,6 @@ public class QQKB {
...
@@ -76,7 +76,6 @@ public class QQKB {
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
ids
=
""
;
ids
=
""
;
paramMap
.
clear
();
paramMap
.
clear
();
continue
;
}
}
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/QQKandian.java
View file @
6018f0b3
This diff is collapsed.
Click to expand it.
src/main/java/com/zhiwei/parse/SinaKeji.java
View file @
6018f0b3
...
@@ -34,15 +34,15 @@ public class SinaKeji {
...
@@ -34,15 +34,15 @@ public class SinaKeji {
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSinaKejiComment
(
String
url
,
ProxyHolder
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSinaKejiComment
(
String
url
,
ProxyHolder
proxy
)
{
String
com
mentId
=
getCommentId
(
url
,
proxy
);
String
com
Url
=
getCommentId
(
url
,
proxy
);
if
(
nonNull
(
com
mentId
))
{
if
(
nonNull
(
com
Url
))
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
int
page
=
1
;
int
page
=
1
;
int
count
=
1
;
int
count
=
1
;
while
(
true
)
{
while
(
true
)
{
try
{
try
{
ZhiWeiTools
.
sleep
(
3
000
);
ZhiWeiTools
.
sleep
(
1
000
);
String
newUrl
=
"http://comment.sina.com.cn/page/info?version=1&format=json&channel=kj&newsid="
+
commentId
+
"&compress=0&ie=utf-8&oe=utf-8&page_size=20&t_size=3&h_size=3&thread=1&page="
+
page
;
String
newUrl
=
comUrl
+
page
;
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
newUrl
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
newUrl
),
proxy
).
body
().
string
();
List
<
Map
<
String
,
Object
>>
list
=
sinaKejiCommentAnalysis
.
getSinaCommet
(
result
);
List
<
Map
<
String
,
Object
>>
list
=
sinaKejiCommentAnalysis
.
getSinaCommet
(
result
);
dataList
.
addAll
(
list
);
dataList
.
addAll
(
list
);
...
@@ -63,16 +63,27 @@ public class SinaKeji {
...
@@ -63,16 +63,27 @@ public class SinaKeji {
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
/**
** 获取新浪评论链接
* @param url
* @param proxy
* @return
* @return String
*/
private
static
String
getCommentId
(
String
url
,
ProxyHolder
proxy
)
{
private
static
String
getCommentId
(
String
url
,
ProxyHolder
proxy
)
{
String
commentId
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
//list?channel=
if
(
result
.
contains
(
"newsid:"
))
{
if
(
result
.
contains
(
"newsid:"
))
{
commentId
=
result
.
split
(
"newsid: '"
)[
1
].
split
(
"'"
)[
0
];
String
commentId
=
result
.
split
(
"newsid: '"
)[
1
].
split
(
"'"
)[
0
];
if
(
nonNull
(
commentId
))
{
String
channel
=
result
.
split
(
"channel: '"
)[
1
].
split
(
"'"
)[
0
];
return
commentId
;
if
(
nonNull
(
commentId
)
&&
nonNull
(
channel
))
{
return
"http://comment.sina.com.cn/page/info?version=1&format=json&channel="
+
channel
+
"&newsid="
+
commentId
+
"&compress=0&ie=utf-8&oe=utf-8&page_size=20&t_size=3&h_size=3&thread=1&page="
;
}
}
}
else
if
(
result
.
contains
(
"__cmntId"
)){
String
key
=
result
.
split
(
"__cmntId\":\""
)[
1
].
split
(
"\","
)[
0
];
return
"http://comment.sina.com.cn/page/info?version=1&format=json&channel="
+
key
.
split
(
":"
)[
0
]+
"&newsid="
+
key
.
split
(
":"
)[
1
]+
"&compress=0&ie=utf-8&oe=utf-8&page_size=20&t_size=3&h_size=3&thread=1&page="
;
}
}
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
logger
.
error
(
"获取 文章评论 id 失败"
);
logger
.
error
(
"获取 文章评论 id 失败"
);
...
...
src/main/java/com/zhiwei/parse/SinaTousu.java
View file @
6018f0b3
...
@@ -29,7 +29,7 @@ public class SinaTousu {
...
@@ -29,7 +29,7 @@ public class SinaTousu {
int
count
=
1
;
int
count
=
1
;
while
(
true
)
{
while
(
true
)
{
try
{
try
{
if
(
count
>
3
)
{
if
(
count
>
3
||
page
>
200
)
{
break
;
break
;
}
}
String
url
=
"https://tousu.sina.com.cn/api/index/s?keywords="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)+
"&page_size=100&page="
;
String
url
=
"https://tousu.sina.com.cn/api/index/s?keywords="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)+
"&page_size=100&page="
;
...
...
src/main/java/com/zhiwei/parse/Souhu.java
View file @
6018f0b3
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
...
@@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory;
...
@@ -12,7 +10,6 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -35,10 +32,7 @@ public class Souhu {
...
@@ -35,10 +32,7 @@ public class Souhu {
*/
*/
public
static
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
public
static
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
try
{
try
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
);
return
souhuCommentAnalysis
.
getSouhuCommentCount
(
url
,
proxy
);
if
(
nonNull
(
newurl
))
{
return
souhuCommentAnalysis
.
getSouhuCommentCount
(
newurl
,
proxy
);
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜狐获取评论数出错了 {}"
,
e
);
logger
.
error
(
"搜狐获取评论数出错了 {}"
,
e
);
}
}
...
@@ -83,7 +77,7 @@ public class Souhu {
...
@@ -83,7 +77,7 @@ public class Souhu {
if
(
isCulling
)
{
if
(
isCulling
)
{
url
=
url
+
"&columnId=-1"
;
url
=
url
+
"&columnId=-1"
;
}
}
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"pcArticleVOS"
);
JSONArray
jsonArray
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"pcArticleVOS"
);
List
<
Map
<
String
,
Object
>>
dataList1
=
souhuAccountAnalysis
.
analysisData
(
jsonArray
,
name
);
List
<
Map
<
String
,
Object
>>
dataList1
=
souhuAccountAnalysis
.
analysisData
(
jsonArray
,
name
);
...
...
src/main/java/com/zhiwei/parse/TXNews.java
View file @
6018f0b3
...
@@ -21,6 +21,8 @@ import com.zhiwei.parse.analysis.TXNewsByWordAnalysis;
...
@@ -21,6 +21,8 @@ import com.zhiwei.parse.analysis.TXNewsByWordAnalysis;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.MediaType
;
import
okhttp3.RequestBody
;
import
okhttp3.Response
;
import
okhttp3.Response
;
public
class
TXNews
{
public
class
TXNews
{
...
@@ -71,7 +73,7 @@ public class TXNews {
...
@@ -71,7 +73,7 @@ public class TXNews {
String
content
=
StringUtils
.
join
(
"coral_uin="
,
coralUin
,
"&coral_uid="
,
coralUid
,
"&reply_id="
,
replayId
);
String
content
=
StringUtils
.
join
(
"coral_uin="
,
coralUin
,
"&coral_uid="
,
coralUid
,
"&reply_id="
,
replayId
);
//eca55388bbbb596e632bca03a2378efe94b83142fd046f1f70 876579532
//eca55388bbbb596e632bca03a2378efe94b83142fd046f1f70 876579532
System
.
out
.
println
(
content
);
System
.
out
.
println
(
content
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
"https://r.inews.qq.com/getMyComments"
,
"application/json"
,
content
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
"https://r.inews.qq.com/getMyComments"
,
RequestBody
.
create
(
MediaType
.
get
(
"application/json"
),
content
)
),
proxy
)){
JSONObject
json
=
JSONObject
.
parseObject
(
response
.
body
().
string
());
JSONObject
json
=
JSONObject
.
parseObject
(
response
.
body
().
string
());
JSONArray
jsonArray
=
json
.
getJSONObject
(
"comments"
).
getJSONArray
(
"new"
);
JSONArray
jsonArray
=
json
.
getJSONObject
(
"comments"
).
getJSONArray
(
"new"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
...
...
src/main/java/com/zhiwei/parse/Wangyi.java
View file @
6018f0b3
...
@@ -2,6 +2,7 @@ package com.zhiwei.parse;
...
@@ -2,6 +2,7 @@ package com.zhiwei.parse;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -27,7 +28,7 @@ public class Wangyi {
...
@@ -27,7 +28,7 @@ public class Wangyi {
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Wangyi
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Wangyi
.
class
);
private
static
WangyiCommentAnalysis
wangyiCommentAnalysis
=
new
WangyiCommentAnalysis
();
private
static
WangyiCommentAnalysis
wangyiCommentAnalysis
=
new
WangyiCommentAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
throwException
(
false
).
build
();
private
static
WangyiHistoryAnalysis
wangyiHistoryAnalysis
=
new
WangyiHistoryAnalysis
();
private
static
WangyiHistoryAnalysis
wangyiHistoryAnalysis
=
new
WangyiHistoryAnalysis
();
/**
/**
...
@@ -81,6 +82,21 @@ public class Wangyi {
...
@@ -81,6 +82,21 @@ public class Wangyi {
}
}
}
}
public
static
Map
<
String
,
Object
>
getReadAndLikeCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
cCount
=
result
.
split
(
"data-count=\""
)[
1
].
split
(
"\" data-hidead"
)[
0
];
String
lCount
=
result
.
split
(
"data-like=\""
)[
1
].
split
(
"\"><"
)[
0
];
Map
<
String
,
Object
>
rMap
=
new
HashMap
<>();
rMap
.
put
(
"commentCount"
,
cCount
);
rMap
.
put
(
"likes"
,
lCount
);
return
rMap
;
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
return
Collections
.
emptyMap
();
}
/**
/**
*
*
* @Description 网易网页版数据
* @Description 网易网页版数据
...
...
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
View file @
6018f0b3
...
@@ -22,15 +22,13 @@ public class AiqiyiByWordAnalysis {
...
@@ -22,15 +22,13 @@ public class AiqiyiByWordAnalysis {
List
<
Map
<
String
,
Object
>>
dataMap
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataMap
=
new
ArrayList
<>();
try
{
try
{
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
Elements
elements
=
doc
.
select
(
"
li.list_item
"
);
Elements
elements
=
doc
.
select
(
"
div.layout-main > div
"
);
for
(
Element
element
:
elements
)
{
for
(
Element
element
:
elements
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
title
=
element
.
select
(
"li"
).
attr
(
"data-widget-searchlist-tvname"
);
String
title
=
element
.
select
(
"a.main-tit"
).
attr
(
"title"
);
String
time
=
element
.
select
(
"em.result_info_desc"
).
text
().
split
(
" "
)[
0
];
String
time
=
element
.
select
(
"span.info-des"
).
text
().
split
(
" "
)[
0
];
if
(
element
.
select
(
"label.result_info_lbl"
).
text
().
contains
(
"上传者"
))
{
String
uurl
=
element
.
select
(
"a.main-tit"
).
attr
(
"href"
);
map
.
put
(
"source"
,
element
.
select
(
"a.result_info_link"
).
text
());
map
.
put
(
"source"
,
element
.
select
(
"a.uploader-name"
).
text
());
}
String
uurl
=
element
.
select
(
"h3.result_title > a"
).
attr
(
"href"
);
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"url"
,
uurl
);
map
.
put
(
"url"
,
uurl
);
map
.
put
(
"title"
,
title
);
map
.
put
(
"title"
,
title
);
...
...
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
View file @
6018f0b3
...
@@ -58,24 +58,22 @@ public class BaijiaAccountAnalysis {
...
@@ -58,24 +58,22 @@ public class BaijiaAccountAnalysis {
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"list"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"list"
);
if
(
json
.
getJSONObject
(
"data"
).
getBoolean
(
"has
_m
ore"
)
!=
null
&&
if
(
json
.
getJSONObject
(
"data"
).
getBoolean
(
"has
M
ore"
)
!=
null
&&
json
.
getJSONObject
(
"data"
).
getBoolean
(
"has
_m
ore"
)
)
{
json
.
getJSONObject
(
"data"
).
getBoolean
(
"has
M
ore"
)
)
{
more
=
true
;
more
=
true
;
rmap
.
put
(
"ctime"
,
json
.
getJSONObject
(
"data"
).
getString
(
"ctime"
));
rmap
.
put
(
"ctime"
,
json
.
getJSONObject
(
"data"
).
get
JSONObject
(
"query"
).
get
String
(
"ctime"
));
}
}
// String name = json.getJSONObject("data").getJSONObject("author").getString("display_name");
// String name = json.getJSONObject("data").getJSONObject("author").getString("display_name");
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
)
.
getJSONObject
(
"itemData"
)
;
String
id
=
data
.
getString
(
"article_id"
);
String
id
=
data
.
getString
(
"article_id"
);
int
t
=
data
.
getInteger
(
"updated_at"
);
int
t
=
data
.
getInteger
(
"updated_at"
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(
t
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
);
String
time
=
TimeParse
.
dateFormartString
(
new
Date
(
t
*
1000L
),
"yyyy-MM-dd HH:mm:ss"
);
System
.
out
.
println
(
time
);
System
.
out
.
println
(
time
);
if
(
startTime
!=
null
&&
startTime
.
length
()
>
1
)
{
if
(
startTime
!=
null
&&
startTime
.
length
()
>
1
&&
time
.
compareTo
(
startTime
)
<
1
)
{
if
(
time
.
compareTo
(
startTime
)
<
1
)
{
more
=
false
;
more
=
false
;
continue
;
continue
;
}
}
}
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
String
url
=
"http://baijiahao.baidu.com/s?id="
+
id
;
String
url
=
"http://baijiahao.baidu.com/s?id="
+
id
;
...
...
src/main/java/com/zhiwei/parse/analysis/BilibilikeyWordAnalysis.java
View file @
6018f0b3
...
@@ -18,7 +18,7 @@ public class BilibilikeyWordAnalysis {
...
@@ -18,7 +18,7 @@ public class BilibilikeyWordAnalysis {
try
{
try
{
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
boolean
more
=
false
;
boolean
more
=
false
;
if
(
doc
.
select
(
"#
server-search-app > div.contain > div.body-contain > div
> div.page-wrap > div > ul > li.page-item.next > button"
).
text
().
contains
(
"下一页"
))
{
if
(
doc
.
select
(
"#
all-list > div.flow-loader
> div.page-wrap > div > ul > li.page-item.next > button"
).
text
().
contains
(
"下一页"
))
{
more
=
true
;
more
=
true
;
}
}
...
@@ -28,7 +28,7 @@ public class BilibilikeyWordAnalysis {
...
@@ -28,7 +28,7 @@ public class BilibilikeyWordAnalysis {
String
playcount
=
null
;
String
playcount
=
null
;
String
source
=
null
;
String
source
=
null
;
String
submitcount
=
null
;
String
submitcount
=
null
;
Elements
elements
=
doc
.
select
(
"ul.video-
contain
.clearfix"
).
select
(
"li"
);
Elements
elements
=
doc
.
select
(
"ul.video-
list
.clearfix"
).
select
(
"li"
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
for
(
Element
element
:
elements
)
{
for
(
Element
element
:
elements
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
...
...
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
View file @
6018f0b3
...
@@ -77,9 +77,10 @@ public class SouhuCommentAnalysis {
...
@@ -77,9 +77,10 @@ public class SouhuCommentAnalysis {
public
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
public
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
id
=
getUrlId
(
url
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
"https://apiv2.sohu.com/api/comment/list?callback=&page_size=10&topic_id=&page_no=1&source_id=mp_"
+
id
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
if
(
json
.
getInteger
(
"code"
)
==
500
)
{
if
(
Objects
.
nonNull
(
json
.
get
(
"code"
))
&&
json
.
getInteger
(
"code"
)
==
500
)
{
return
0
;
return
0
;
}
}
return
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"cmt_sum"
);
return
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"cmt_sum"
);
...
@@ -116,21 +117,28 @@ public class SouhuCommentAnalysis {
...
@@ -116,21 +117,28 @@ public class SouhuCommentAnalysis {
}
}
public
int
getReadNum
(
String
url
,
ProxyHolder
proxy
)
{
public
int
getReadNum
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
id
=
getUrlId
(
url
);
String
result
=
response
.
body
().
string
();
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"http://v2.sohu.com/public-api/articles/"
+
id
+
"/pv"
),
proxy
)){
String
sourceId
=
getNewsId
(
result
);
return
Integer
.
parseInt
(
response
.
body
().
string
());
url
=
"http://v2.sohu.com/public-api/articles/pv?articleIds="
+
sourceId
;
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
return
JSONObject
.
parseObject
(
result
).
getInteger
(
sourceId
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {} "
,
e
);
logger
.
error
(
"Exception {} "
,
e
);
}
}
return
-
1
;
return
-
1
;
}
}
/**
** 获取搜狐文章id
* @param url
* @return
* @return String
*/
private
String
getUrlId
(
String
url
)
{
try
{
return
url
.
split
(
"/a/"
)[
1
].
split
(
"_"
)[
0
];
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜狐链接解析失败"
);
}
return
null
;
}
}
}
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
View file @
6018f0b3
...
@@ -27,9 +27,9 @@
...
@@ -27,9 +27,9 @@
//// List<String> headList = (List<String>) map.get("head");
//// List<String> headList = (List<String>) map.get("head");
//// for (Map<String, Object> map1 : list) {
//// for (Map<String, Object> map1 : list) {
//// String url = map1.get("地址") + "";
//// String url = map1.get("地址") + "";
// String cookie = "
_buuid=0668b664-13b3-4bd0-aa37-99d747432e85; guid=HBoEGxgEGBscBBsZGlYHGBsZHRsYExwZHFYcGQQdGR8FQ1hLTEt5ChITBBIdHxkEGgQbHQVPR0VYQmkKA0VBSU9tCk9BQ0YKBmZnfmJhAgocGQQdGR8FXkNhSE99T0ZaWmsKAx4cfWV9ChEZBBwKfmQKWV1FTkRDfQIKGgQfBUtGRkNQRWc=; token=\"ou+mv1hjxjm0uOOTss1vgck9+h6OCS/lYQUeFnJgSK70FHprmw6GmjBGwk2qPQH88CKuzcDfAvoCmBm7+jVysA==\"; uid=\"A8ELjewCDRgHnZ5bX0Vy0/Airs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMjI3NjU0NTI0Iiwic2VjcmV0IjoiV0wyZmEtZDZxbkx2TEkzZHF2dTN4UG5SIiwiX2V4cGlyZSI6MTU2MDU5Mzg4Mjc5NCwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=ujhqvC3wPAn-WlCPXfB6C5ZJIgY
";
// String cookie = "";
// String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiMjU5OTg4YmE4YzI3MTFlOTllMjEyNDZlOTZiNDgwODgiLCJ1IjoxNzY2NDQ3NjUsImlkIjoyMjIwMDM4NH0.gBdp3i99L0xUKLglaVIJf07OrrPk6yoG9HAo-3Yftqk";
// String url = "https://maimai.cn/web/gossip_detail?src=app&webid=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlZ2lkIjoiMjU5OTg4YmE4YzI3MTFlOTllMjEyNDZlOTZiNDgwODgiLCJ1IjoxNzY2NDQ3NjUsImlkIjoyMjIwMDM4NH0.gBdp3i99L0xUKLglaVIJf07OrrPk6yoG9HAo-3Yftqk";
//
Map<String,Object> map3 = Maimai.getMaiaiCount(url,
cookie, ProxyHolder.NAT_HEAVY_PROXY);
//
List<Map<String, Object>> map3 = Maimai.getMaimaiCommentList(url,
cookie, ProxyHolder.NAT_HEAVY_PROXY);
// System.out.println(map3.toString());
// System.out.println(map3.toString());
// System.out.println(url);
// System.out.println(url);
//// map1.putAll(map3);
//// map1.putAll(map3);
...
...
src/test/java/com/zhiwei/Comment/SinaKejiComment.java
View file @
6018f0b3
//package com.zhiwei.Comment;
//package com.zhiwei.Comment;
//
//
//import org.
testng.annotations
.Test;
//import org.
junit
.Test;
//
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.SinaKeji;
//import com.zhiwei.parse.SinaKeji;
//
//
//public class SinaKejiComment {
//public class SinaKejiComment {
//
// @Test
// @Test
// public void f() {
// public void f() {
// String url = "https://tech.sina.com.cn/it/2018-07-17/doc-ihfkffam0632649.shtml";
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// String url = "https://tech.sina.com.cn/d/v/2019-08-19/doc-ihytcitn0207512.shtml";
//
//
// SinaKeji.getSinaKejiComment(url,
null
);
// SinaKeji.getSinaKejiComment(url,
ProxyHolder.NAT_HEAVY_PROXY
);
//
//
// }
// }
//
//}
//}
src/test/java/com/zhiwei/crawler/QQAccountExample.java
View file @
6018f0b3
...
@@ -25,7 +25,7 @@
...
@@ -25,7 +25,7 @@
//// System.out.println(child.split("chlid=")[1]);
//// System.out.println(child.split("chlid=")[1]);
// System.out.println(child.split("=")[1]);
// System.out.println(child.split("=")[1]);
//
//
// List<Map<String,Object>> lists = QQKB.getQQAccountData("5
456950
", cookie,null);
// List<Map<String,Object>> lists = QQKB.getQQAccountData("5
060059
", cookie,null);
// if(lists != null) {
// if(lists != null) {
// for(Map<String,Object> map1 : lists) {
// for(Map<String,Object> map1 : lists) {
// map1.put("name", map.get("呢称"));
// map1.put("name", map.get("呢称"));
...
...
src/test/java/com/zhiwei/hsitory/BaijiaAccountExample.java
View file @
6018f0b3
...
@@ -6,14 +6,17 @@
...
@@ -6,14 +6,17 @@
//
//
//import org.junit.Test;
//import org.junit.Test;
//
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Baijia;
//import com.zhiwei.parse.Baijia;
//import com.zhiwei.tools.tools.ZhiWeiTools;
//
//
//public class BaijiaAccountExample {
//public class BaijiaAccountExample {
//
//
// @Test
// @Test
// public void test3() {
// public void test3() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
// String path = "D://crawlerdata//自媒体/百家号采集.xlsx";
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String startTime = "2018-05-01 00:00:00";
// String startTime = "2018-05-01 00:00:00";
...
@@ -23,9 +26,10 @@
...
@@ -23,9 +26,10 @@
// for(Map<String,Object> m : list) {
// for(Map<String,Object> m : list) {
// try {
// try {
// String app_id = m.get("id").toString();
// String app_id = m.get("id").toString();
// app_id = "1594158489045754";
// app_id = "1602674438508810";
// String cookie = "__cfduid=d847baca85b97d1967b3da02ebb345b831535524251; BAIDUID=C0F0F81EF770C5219AB9C178654135EC:FG=1; PSTM=1536376257; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; locale=zh; delPer=0; H_PS_PSSID=1447_21117_20930; PSINO=5";
// String cookie = "BAIDUID=7D453C932433A93F7AD1F3B8ABC8B0E1:FG=1; BIDUPSID=7D453C932433A93F7AD1F3B8ABC8B0E1; PSTM=1570766401; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BDSFRCVID=eH-OJeCmH6VwoRJwCdmehrB7leKK0gOTHllvCh8hmwLadLIVJeC6EG0Ptf8g0KubFTPRogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tJkD_I_hJKt3qn7I5KToh4Athxob2bbXHDo-LIvHWT6cOR5JhfA-3R-e046f3-3L5CbH5D3s5lvvhb3O3M7ShbKdMa732RbrKCnraxQF5l8-sq0x0bOte-bQypoa0q3TLDOMahkM5h7xOKQoQlPK5JkgMx6MqpQJQeQ-5KQN3KJmfbL9bT3YjjISKx-_J5LJJxK; H_PS_PSSID=1442_21103_29567_29699_29220_22158; delPer=0; PSINO=5; ZD_ENTRY=baidu; yjs_js_security_passport=9687699d4b0965c0be1e6e312fc59ff5cf3d03a2_1571106914_js; Hmery-Time=1215393878";
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id, startTime,cookie, null);
// System.out.println(app_id);
// List<Map<String,Object>> lists = Baijia.getBaijiaAccountByBaiduData(app_id,"聚富财经", startTime,cookie, ProxyHolder.NAT_HEAVY_PROXY);
// if(lists != null) {
// if(lists != null) {
// bodyList.addAll(lists);
// bodyList.addAll(lists);
// }
// }
...
...
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
View file @
6018f0b3
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
// @Test
// @Test
// public void aiqiyiTest() {
// public void aiqiyiTest() {
//
//
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER
,10000002
);
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
// List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
// for(String w : wordList) {
// for(String w : wordList) {
...
@@ -34,7 +34,7 @@
...
@@ -34,7 +34,7 @@
// headList.add("title");
// headList.add("title");
// headList.add("word");
// headList.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//视频/爱奇艺关键词采集-
毓婷-0716
.xlsx", "数据", headList, bodyList);
// poi.exportExcel("D://crawlerdata//视频/爱奇艺关键词采集-
精装修
.xlsx", "数据", headList, bodyList);
//
//
//
//
//
//
...
...
src/test/java/com/zhiwei/shipin/BilibiliTest.java
View file @
6018f0b3
//
package com.zhiwei.shipin;
package
com
.
zhiwei
.
shipin
;
//
//
import java.util.ArrayList;
import
java.util.ArrayList
;
//
import java.util.List;
import
java.util.List
;
//
import java.util.Map;
import
java.util.Map
;
//
//
import org.junit.Test;
import
org.junit.Test
;
//
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.parse.BiliBili;
import
com.zhiwei.parse.BiliBili
;
//
import com.zhiwei.util.WordReadFile;
import
com.zhiwei.util.WordReadFile
;
//
//
public class BilibiliTest {
public
class
BilibiliTest
{
//
@Test
@Test
//
public void f() {
public
void
f
()
{
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181"
,
"local"
,
GroupType
.
PROVIDER
,
10000002L
);
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词-1
.txt");
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词
.txt"
);
//
List<Map<String, Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
//
for (String word : wordList) {
for
(
String
word
:
wordList
)
{
// List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2019-07-18
00:00:00",
List
<
Map
<
String
,
Object
>>
dataList
=
BiliBili
.
getData
(
word
,
null
,
"2001-01-14
00:00:00"
,
//
cookie);
cookie
);
//
if (dataList != null) {
if
(
dataList
!=
null
)
{
//
System.out.println(word + " ----- " + dataList.size());
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
//
bodyList.addAll(dataList);
bodyList
.
addAll
(
dataList
);
//
}
}
//
}
}
//
List<String> headlist = new ArrayList<>();
List
<
String
>
headlist
=
new
ArrayList
<>();
//
headlist.add("submitcount");
headlist
.
add
(
"submitcount"
);
//
headlist.add("playcount");
headlist
.
add
(
"playcount"
);
//
headlist.add("time");
headlist
.
add
(
"time"
);
//
headlist.add("source");
headlist
.
add
(
"source"
);
//
headlist.add("title");
headlist
.
add
(
"title"
);
//
headlist.add("url");
headlist
.
add
(
"url"
);
//
headlist.add("word");
headlist
.
add
(
"word"
);
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
// poi.exportExcel("D://crawlerdata//视频//bilibili关键词采集数据-吃鸡否-0722
.xlsx", "B站数据", headlist, bodyList);
poi
.
exportExcel
(
"D://crawlerdata//视频//bilibili关键词采集数据-封神神话-收藏
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
//
//
}
}
//
}
}
src/test/java/com/zhiwei/shipin/QQTVTest.java
View file @
6018f0b3
...
@@ -17,8 +17,8 @@
...
@@ -17,8 +17,8 @@
//public class QQTVTest {
//public class QQTVTest {
// @Test
// @Test
// public void f() {
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER);
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER
, 10000002
);
// String time = "2019-0
4
-11 00:00:00";
// String time = "2019-0
1
-11 00:00:00";
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<Map<String, Object>> bodyList = new ArrayList<>();
// List<Map<String, Object>> bodyList = new ArrayList<>();
// for (String word : wordList) {
// for (String word : wordList) {
...
@@ -37,7 +37,7 @@
...
@@ -37,7 +37,7 @@
// headlist.add("url");
// headlist.add("url");
// headlist.add("word");
// headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//视频//腾讯视频关键词采集数据-
毓婷-0716
.xlsx", "腾讯视频数据", headlist, bodyList);
// poi.exportExcel("D://crawlerdata//视频//腾讯视频关键词采集数据-
精装修
.xlsx", "腾讯视频数据", headlist, bodyList);
//
//
//
//
//
//
...
...
src/test/java/com/zhiwei/shipin/SohuTVTest.java
View file @
6018f0b3
...
@@ -33,7 +33,7 @@
...
@@ -33,7 +33,7 @@
// headlist.add("url");
// headlist.add("url");
// headlist.add("word");
// headlist.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//视频//搜狐视频关键词采集数据-
毓婷-0716
.xlsx", "搜狐数据", headlist, bodyList);
// poi.exportExcel("D://crawlerdata//视频//搜狐视频关键词采集数据-
华为-0812
.xlsx", "搜狐数据", headlist, bodyList);
//
//
// }
// }
//}
//}
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
View file @
6018f0b3
...
@@ -30,7 +30,7 @@
...
@@ -30,7 +30,7 @@
// headList.add("uper");
// headList.add("uper");
// headList.add("word");
// headList.add("word");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata//视频//优酷数据-
毓婷-0716
.xlsx", "数据", headList, bodyList);
// poi.exportExcel("D://crawlerdata//视频//优酷数据-
华为-0812
.xlsx", "数据", headList, bodyList);
//
//
// }
// }
//}
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment