Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
cb5516a0
Commit
cb5516a0
authored
Mar 21, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
采集修改
parent
2a35dd02
Hide whitespace changes
Inline
Side-by-side
Showing
62 changed files
with
1002 additions
and
369 deletions
+1002
-369
pom.xml
+3
-3
src/main/java/com/zhiwei/httpclient/HeadGet.java
+7
-1
src/main/java/com/zhiwei/httpclient/HttpClient.java
+2
-2
src/main/java/com/zhiwei/parse/Aika.java
+1
-1
src/main/java/com/zhiwei/parse/Aiqiyi.java
+30
-5
src/main/java/com/zhiwei/parse/Baijia.java
+1
-1
src/main/java/com/zhiwei/parse/BiliBili.java
+2
-2
src/main/java/com/zhiwei/parse/Chejia.java
+1
-1
src/main/java/com/zhiwei/parse/Douban.java
+1
-1
src/main/java/com/zhiwei/parse/Gftai.java
+1
-1
src/main/java/com/zhiwei/parse/KuaiTousu.java
+1
-1
src/main/java/com/zhiwei/parse/Maimai.java
+1
-1
src/main/java/com/zhiwei/parse/Pcauto.java
+1
-1
src/main/java/com/zhiwei/parse/QQKB.java
+11
-6
src/main/java/com/zhiwei/parse/QQNews.java
+70
-0
src/main/java/com/zhiwei/parse/QicheHome.java
+1
-1
src/main/java/com/zhiwei/parse/SinaKeji.java
+26
-1
src/main/java/com/zhiwei/parse/SinaTousu.java
+4
-4
src/main/java/com/zhiwei/parse/Souhu.java
+18
-4
src/main/java/com/zhiwei/parse/TXNews.java
+70
-3
src/main/java/com/zhiwei/parse/TechTx.java
+2
-1
src/main/java/com/zhiwei/parse/Xueqiu.java
+69
-8
src/main/java/com/zhiwei/parse/Yiche.java
+1
-1
src/main/java/com/zhiwei/parse/Yidianzixun.java
+65
-12
src/main/java/com/zhiwei/parse/Youku.java
+28
-2
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
+5
-4
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
+1
-1
src/main/java/com/zhiwei/parse/analysis/BilibilikeyWordAnalysis.java
+5
-4
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
+2
-2
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
+1
-1
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
+1
-1
src/main/java/com/zhiwei/parse/analysis/GftaiAnalysis.java
+3
-4
src/main/java/com/zhiwei/parse/analysis/KuaiTousuAnalysis.java
+3
-3
src/main/java/com/zhiwei/parse/analysis/QQKBAccountAnalysis.java
+14
-10
src/main/java/com/zhiwei/parse/analysis/SinaTousuAnalysis.java
+4
-3
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
+57
-11
src/main/java/com/zhiwei/parse/analysis/YidianzixunAccountAnalysis.java
+14
-11
src/main/java/com/zhiwei/parse/shipin/QQTV.java
+2
-2
src/main/java/com/zhiwei/parse/shipin/SohuTV.java
+2
-1
src/test/java/com/zhiwei/Comment/AiqiyiHotCountTest.java
+33
-0
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
+44
-44
src/test/java/com/zhiwei/Comment/QQNewCommentCountTest.java
+19
-0
src/test/java/com/zhiwei/Comment/SinkeCommentCountTest.java
+19
-0
src/test/java/com/zhiwei/Comment/TxNewsCommentCountTest.java
+18
-0
src/test/java/com/zhiwei/Comment/YidianzixunCommentCountTest.java
+18
-0
src/test/java/com/zhiwei/Comment/YoukuHotCountTest.java
+37
-0
src/test/java/com/zhiwei/TestHttpBoot.java
+38
-38
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
+0
-45
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
+32
-32
src/test/java/com/zhiwei/crawler/QQAccountExample.java
+2
-1
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
+3
-3
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
+3
-0
src/test/java/com/zhiwei/crawler/WangyiCommentCountExample.java
+2
-0
src/test/java/com/zhiwei/crawler/YidianzixunAccountExample.java
+4
-4
src/test/java/com/zhiwei/hsitory/TxNewsTest.java
+39
-0
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
+7
-3
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
+44
-0
src/test/java/com/zhiwei/shipin/BilibiliTest.java
+3
-2
src/test/java/com/zhiwei/shipin/QQTVTest.java
+3
-2
src/test/java/com/zhiwei/shipin/SohuTVTest.java
+39
-38
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
+36
-35
src/test/java/com/zhiwei/user/MaimaiTest.java
+28
-0
No files found.
pom.xml
View file @
cb5516a0
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
articlenewscrawler
</artifactId>
<artifactId>
articlenewscrawler
</artifactId>
<version>
0.
0.9
-SNAPSHOT
</version>
<version>
0.
1.3
-SNAPSHOT
</version>
<name>
articlenewscrawler
</name>
<name>
articlenewscrawler
</name>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
...
@@ -31,12 +31,12 @@
...
@@ -31,12 +31,12 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.1.
1
-SNAPSHOT
</version>
<version>
0.1.
2
-SNAPSHOT
</version>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<artifactId>
crawler-core
</artifactId>
<version>
0.
1.1
-RELEASE
</version>
<version>
0.
3.0
-RELEASE
</version>
<scope>
provided
</scope>
<scope>
provided
</scope>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/httpclient/HeadGet.java
View file @
cb5516a0
package
com
.
zhiwei
.
httpclient
;
package
com
.
zhiwei
.
httpclient
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.URLEncoder
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -278,7 +280,11 @@ public class HeadGet {
...
@@ -278,7 +280,11 @@ public class HeadGet {
headerMap
.
put
(
"Connection"
,
"keep-alive"
);
headerMap
.
put
(
"Connection"
,
"keep-alive"
);
headerMap
.
put
(
"Accept"
,
"*/*"
);
headerMap
.
put
(
"Accept"
,
"*/*"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-Hans-CN;q=1"
);
headerMap
.
put
(
"Accept-Language"
,
"zh-Hans-CN;q=1"
);
headerMap
.
put
(
"User-Agent"
,
"天天快报 4.6.0 qnreading (iPhone8,1; iOS 10.3.3; zh_CN; 4.6.0.81)"
);
try
{
headerMap
.
put
(
"User-Agent"
,
URLEncoder
.
encode
(
"天天快报 4.6.0 qnreading (iPhone8,1; iOS 10.3.3; zh_CN; 4.6.0.81)"
,
"utf-8"
));
}
catch
(
UnsupportedEncodingException
e
)
{
e
.
printStackTrace
();
}
if
(
cookie
!=
null
)
{
if
(
cookie
!=
null
)
{
headerMap
.
put
(
"Cookie"
,
cookie
);
headerMap
.
put
(
"Cookie"
,
cookie
);
...
...
src/main/java/com/zhiwei/httpclient/HttpClient.java
View file @
cb5516a0
...
@@ -8,15 +8,15 @@ import org.slf4j.Logger;
...
@@ -8,15 +8,15 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
okhttp3.Response
;
import
okhttp3.Response
;
public
class
HttpClient
{
public
class
HttpClient
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
HttpClient
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
(
false
,
2
);
/**
/**
*
*
...
...
src/main/java/com/zhiwei/parse/Aika.java
View file @
cb5516a0
...
@@ -11,8 +11,8 @@ import org.slf4j.Logger;
...
@@ -11,8 +11,8 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.AikaCommentAnalysis
;
import
com.zhiwei.parse.analysis.AikaCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/Aiqiyi.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -10,14 +9,21 @@ import java.util.Map;
...
@@ -10,14 +9,21 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.AiqiyiByWordAnalysis
;
import
com.zhiwei.parse.analysis.AiqiyiByWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
Aiqiyi
{
public
class
Aiqiyi
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Aiqiyi
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Aiqiyi
.
class
);
private
static
AiqiyiByWordAnalysis
aiqiyiByWordAnalysis
=
new
AiqiyiByWordAnalysis
();
private
static
AiqiyiByWordAnalysis
aiqiyiByWordAnalysis
=
new
AiqiyiByWordAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
(
false
,
2
);
/**
/**
*
*
...
@@ -25,16 +31,16 @@ public class Aiqiyi {
...
@@ -25,16 +31,16 @@ public class Aiqiyi {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
for
(
int
i
=
1
;
i
<=
5
;
i
++)
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
int
count
=
dataList
.
size
();
int
count
=
dataList
.
size
();
String
url
=
"https://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg__t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_4_bitrate_"
;
String
url
=
"https://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg__t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_4_bitrate_"
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
map
=
aiqiyiByWordAnalysis
.
getAiqiyiData
(
result
);
List
<
Map
<
String
,
Object
>>
map
=
aiqiyiByWordAnalysis
.
getAiqiyiData
(
result
,
word
);
if
(
map
!=
null
)
{
if
(
map
!=
null
)
{
dataList
.
addAll
(
map
);
dataList
.
addAll
(
map
);
}
}
...
@@ -42,7 +48,7 @@ public class Aiqiyi {
...
@@ -42,7 +48,7 @@ public class Aiqiyi {
break
;
break
;
}
}
System
.
out
.
println
(
"=============="
+
dataList
.
size
());
System
.
out
.
println
(
"=============="
+
dataList
.
size
());
ZhiWeiTools
.
sleep
(
200
0
);
ZhiWeiTools
.
sleep
(
200
);
}
}
return
dataList
;
return
dataList
;
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
...
@@ -51,5 +57,24 @@ public class Aiqiyi {
...
@@ -51,5 +57,24 @@ public class Aiqiyi {
}
}
}
}
/**
*
* @Description 获取 爱奇艺视频热度
* @param url
* @param proxy
* @return
*/
public
static
int
aiqiyiHotCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
id
=
result
.
split
(
"tvId\":"
)[
1
].
split
(
","
)[
0
];
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"https://pcw-api.iqiyi.com/video/video/hotplaytimes/"
+
id
)).
body
().
string
();
return
JSONObject
.
parseObject
(
result
).
getJSONArray
(
"data"
).
getJSONObject
(
0
).
getInteger
(
"hot"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
" 爱奇艺 热度采集出错 {} "
,
e
);
return
-
1
;
}
}
}
}
src/main/java/com/zhiwei/parse/Baijia.java
View file @
cb5516a0
...
@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
...
@@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.BaijiaAccountAnalysis
;
import
com.zhiwei.parse.analysis.BaijiaAccountAnalysis
;
...
...
src/main/java/com/zhiwei/parse/BiliBili.java
View file @
cb5516a0
...
@@ -33,7 +33,7 @@ public class BiliBili {
...
@@ -33,7 +33,7 @@ public class BiliBili {
Request
request
=
HttpRequestBuilder
.
newGetRequest
(
url
,
header
);
Request
request
=
HttpRequestBuilder
.
newGetRequest
(
url
,
header
);
String
result
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
ZhiWeiTools
.
sleep
(
3000
);
ZhiWeiTools
.
sleep
(
3000
);
Map
<
String
,
Object
>
map
=
BilibilikeyWordAnalysis
.
getData
(
result
);
Map
<
String
,
Object
>
map
=
BilibilikeyWordAnalysis
.
getData
(
result
,
word
);
boolean
more
=
(
boolean
)
map
.
get
(
"more"
);
boolean
more
=
(
boolean
)
map
.
get
(
"more"
);
List
<
Map
<
String
,
Object
>>
dataList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
dataList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
dataList
!=
null
)
{
if
(
dataList
!=
null
)
{
...
@@ -46,7 +46,7 @@ public class BiliBili {
...
@@ -46,7 +46,7 @@ public class BiliBili {
System
.
out
.
println
(
ur
);
System
.
out
.
println
(
ur
);
request
=
HttpRequestBuilder
.
newGetRequest
(
ur
,
header
);
request
=
HttpRequestBuilder
.
newGetRequest
(
ur
,
header
);
String
result2
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
String
result2
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
);
map
=
BilibilikeyWordAnalysis
.
getData
(
result2
,
word
);
List
<
Map
<
String
,
Object
>>
dataList2
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
dataList2
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
dataList2
!=
null
)
{
if
(
dataList2
!=
null
)
{
bodyList
.
addAll
(
dataList2
);
bodyList
.
addAll
(
dataList2
);
...
...
src/main/java/com/zhiwei/parse/Chejia.java
View file @
cb5516a0
...
@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory;
...
@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/Douban.java
View file @
cb5516a0
...
@@ -16,7 +16,7 @@ import org.slf4j.Logger;
...
@@ -16,7 +16,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.parse.analysis.DoubanCommentAnalysis
;
import
com.zhiwei.parse.analysis.DoubanCommentAnalysis
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/Gftai.java
View file @
cb5516a0
...
@@ -10,7 +10,7 @@ import org.slf4j.Logger;
...
@@ -10,7 +10,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.parse.analysis.GftaiAnalysis
;
import
com.zhiwei.parse.analysis.GftaiAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/KuaiTousu.java
View file @
cb5516a0
...
@@ -11,7 +11,7 @@ import org.slf4j.Logger;
...
@@ -11,7 +11,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.parse.analysis.KuaiTousuAnalysis
;
import
com.zhiwei.parse.analysis.KuaiTousuAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/Maimai.java
View file @
cb5516a0
...
@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory;
...
@@ -17,8 +17,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
import
com.zhiwei.parse.analysis.MaimaiBywordAnalysis
;
...
...
src/main/java/com/zhiwei/parse/Pcauto.java
View file @
cb5516a0
...
@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
...
@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.PcautoCommentAnalysis
;
import
com.zhiwei.parse.analysis.PcautoCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
cb5516a0
...
@@ -2,6 +2,7 @@ package com.zhiwei.parse;
...
@@ -2,6 +2,7 @@ package com.zhiwei.parse;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -34,10 +35,10 @@ public class QQKB {
...
@@ -34,10 +35,10 @@ public class QQKB {
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
String
result
=
""
;
String
result
=
""
;
List
<
String
>
idsList
=
new
ArrayList
<
String
>();
List
<
String
>
idsList
=
new
ArrayList
<>();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
idsList
=
qqAccountAnalysis
.
getQQAllIds
(
result
);
idsList
=
qqAccountAnalysis
.
getQQAllIds
(
result
);
...
@@ -45,6 +46,9 @@ public class QQKB {
...
@@ -45,6 +46,9 @@ public class QQKB {
break
;
break
;
}
}
}
}
if
(
idsList
.
isEmpty
())
{
return
Collections
.
emptyList
();
}
System
.
out
.
println
(
"此帐号可采集的历史文章数==============="
+
idsList
.
size
());
System
.
out
.
println
(
"此帐号可采集的历史文章数==============="
+
idsList
.
size
());
url
=
"http://r.cnews.qq.com/getSubNewsListItems"
;
url
=
"http://r.cnews.qq.com/getSubNewsListItems"
;
String
ids
=
""
;
String
ids
=
""
;
...
@@ -106,7 +110,7 @@ public class QQKB {
...
@@ -106,7 +110,7 @@ public class QQKB {
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
url
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
url
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
String
comment_id
=
getCid
(
url
,
proxy
);
String
comment_id
=
getCid
(
url
,
proxy
);
String
article_id
=
url
.
split
(
"/"
)[
4
].
split
(
"\\?"
)[
0
];
String
article_id
=
url
.
split
(
"/"
)[
4
].
split
(
"\\?"
)[
0
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
null
);
...
@@ -133,7 +137,7 @@ public class QQKB {
...
@@ -133,7 +137,7 @@ public class QQKB {
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析天天快报评论出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析天天快报评论出错
{}"
,
e
);
return
dataList
;
return
dataList
;
}
}
}
}
...
@@ -208,9 +212,10 @@ public class QQKB {
...
@@ -208,9 +212,10 @@ public class QQKB {
* @param article_id
* @param article_id
* @return
* @return
*/
*/
public
static
int
getCommentCount
(
String
cookie
,
String
url
,
Proxy
proxy
)
{
public
static
int
getCommentCount
(
String
url
,
Proxy
proxy
)
{
String
comment_id
=
getCid
(
url
,
proxy
);
String
comment_id
=
getCid
(
url
,
proxy
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
String
article_id
=
url
.
split
(
"/"
)[
4
];
String
cookie
=
"phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
try
{
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
...
@@ -218,7 +223,7 @@ public class QQKB {
...
@@ -218,7 +223,7 @@ public class QQKB {
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"comments"
).
getInteger
(
"count"
);
return
json
.
getJSONObject
(
"comments"
).
getInteger
(
"count"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析天天快报评论出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析天天快报评论出错
{}"
,
e
);
return
0
;
return
0
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/QQNews.java
0 → 100644
View file @
cb5516a0
package
com
.
zhiwei
.
parse
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
okhttp3.Response
;
/**
*
* @ClassName QQNews
* @Description 腾讯网相关采集
* @author byte-zbs
* @Date 2019年3月6日 下午1:54:26
* @version 1.0.0
*/
public
class
QQNews
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
QQNews
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
* .
* @Description 获取腾讯网评论数 (https://new.qq.com/cmsn/20190305/TEC2019030500050000)
* @param id TEC2019030500050000
* @param proxy
* @return
*/
public
static
int
getQQNewsCommentCount
(
String
id
,
ProxyHolder
proxy
)
{
String
cid
=
getCid
(
id
,
proxy
);
if
(
Objects
.
nonNull
(
cid
))
{
String
url
=
"https://coral.qq.com/article/"
+
cid
+
"/commentnum"
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
Integer
.
parseInt
(
json
.
getJSONObject
(
"data"
).
getString
(
"commentnum"
));
}
catch
(
Exception
e
)
{
logger
.
error
(
"腾讯网评论采集出错 {}"
,
e
);
}
}
return
-
1
;
}
/**
*
* @Description 依据网站获取文章cid
* @param id
* @param proxy
* @return
*/
private
static
String
getCid
(
String
id
,
ProxyHolder
proxy
)
{
String
url
=
"https://openapi.inews.qq.com/getQQNewsNormalContent?id="
+
id
+
"&refer=mobilewwwqqcom"
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getString
(
"cid"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取文章cid失败{}"
,
e
);
}
return
null
;
}
}
src/main/java/com/zhiwei/parse/QicheHome.java
View file @
cb5516a0
...
@@ -9,8 +9,8 @@ import org.slf4j.Logger;
...
@@ -9,8 +9,8 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis
;
import
com.zhiwei.parse.analysis.QicheHomeKwyWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/SinaKeji.java
View file @
cb5516a0
...
@@ -11,12 +11,15 @@ import java.util.Map;
...
@@ -11,12 +11,15 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.SinaKejiCommentAnalysis
;
import
com.zhiwei.parse.analysis.SinaKejiCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
SinaKeji
{
public
class
SinaKeji
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SinaKeji
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
SinaKeji
.
class
);
...
@@ -78,5 +81,27 @@ public class SinaKeji {
...
@@ -78,5 +81,27 @@ public class SinaKeji {
return
null
;
return
null
;
}
}
/**
*
* @Description 新浪科技评论数获取
* @param url
* @param proxy
* @return
*/
public
static
int
getCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
if
(
result
.
contains
(
"getcomments:'"
))
{
url
=
result
.
split
(
"getcomments:'"
)[
1
].
split
(
"',"
)[
0
];
}
else
{
url
=
result
.
split
(
"getcomments\":\""
)[
1
].
split
(
"\""
)[
0
];
}
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"https:"
+
url
),
proxy
).
body
().
string
();
return
JSONObject
.
parseObject
(
result
).
getJSONObject
(
"data"
).
getJSONObject
(
"cmnt"
).
getInteger
(
"total"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"新浪 文章获取评论数失败 {}"
,
e
);
}
return
-
1
;
}
}
}
src/main/java/com/zhiwei/parse/SinaTousu.java
View file @
cb5516a0
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -12,7 +11,8 @@ import org.slf4j.Logger;
...
@@ -12,7 +11,8 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.SinaTousuAnalysis
;
import
com.zhiwei.parse.analysis.SinaTousuAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -23,7 +23,7 @@ public class SinaTousu {
...
@@ -23,7 +23,7 @@ public class SinaTousu {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getSinaTousuData
(
String
word
,
Proxy
proxy
,
String
time
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSinaTousuData
(
String
word
,
Proxy
Holder
proxy
,
String
time
)
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
int
page
=
1
;
int
page
=
1
;
int
count
=
1
;
int
count
=
1
;
...
@@ -43,7 +43,7 @@ public class SinaTousu {
...
@@ -43,7 +43,7 @@ public class SinaTousu {
logger
.
info
(
"黑猫投诉 关键词采集 第{}页 ,一共采集到数据 {} "
,
page
,
bodyList
.
size
());
logger
.
info
(
"黑猫投诉 关键词采集 第{}页 ,一共采集到数据 {} "
,
page
,
bodyList
.
size
());
page
++;
page
++;
ZhiWeiTools
.
sleep
(
30
00
);
ZhiWeiTools
.
sleep
(
1
00
);
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
count
++;
count
++;
logger
.
error
(
"UnsupportedEncodingException {}"
,
e
);
logger
.
error
(
"UnsupportedEncodingException {}"
,
e
);
...
...
src/main/java/com/zhiwei/parse/Souhu.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
...
@@ -21,7 +23,6 @@ import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
...
@@ -21,7 +23,6 @@ import com.zhiwei.parse.analysis.SouhuAccountAnalysis;
import
com.zhiwei.parse.analysis.SouhuCommentAnalysis
;
import
com.zhiwei.parse.analysis.SouhuCommentAnalysis
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
static
java
.
util
.
Objects
.
nonNull
;
public
class
Souhu
{
public
class
Souhu
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Souhu
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Souhu
.
class
);
...
@@ -39,9 +40,7 @@ public class Souhu {
...
@@ -39,9 +40,7 @@ public class Souhu {
try
{
try
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
);
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
,
proxy
);
if
(
nonNull
(
newurl
))
{
if
(
nonNull
(
newurl
))
{
int
i
;
return
souhuCommentAnalysis
.
getSouhuCommentCount
(
newurl
,
proxy
);
i
=
souhuCommentAnalysis
.
getSouhuCommentCount
(
newurl
,
proxy
);
return
i
;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜狐获取评论数出错了 {}"
,
e
);
logger
.
error
(
"搜狐获取评论数出错了 {}"
,
e
);
...
@@ -49,6 +48,21 @@ public class Souhu {
...
@@ -49,6 +48,21 @@ public class Souhu {
return
-
1
;
return
-
1
;
}
}
/**
*
* @Description 获取搜狐号 阅读数
* @param url
* @param proxy
* @return
*/
public
static
int
getSohuReadNum
(
String
url
,
ProxyHolder
proxy
)
{
try
{
return
souhuCommentAnalysis
.
getReadNum
(
url
,
proxy
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜狐获取阅读数出错 {}"
,
e
);
}
return
-
1
;
}
/**
/**
*
*
...
...
src/main/java/com/zhiwei/parse/TXNews.java
View file @
cb5516a0
...
@@ -2,25 +2,36 @@ package com.zhiwei.parse;
...
@@ -2,25 +2,36 @@ package com.zhiwei.parse;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.TXNewsByWordAnalysis
;
import
com.zhiwei.parse.analysis.TXNewsByWordAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
TXNews
{
public
class
TXNews
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
TXNews
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
TXNews
.
class
);
private
static
TXNewsByWordAnalysis
txNewsByWordAnalysis
=
new
TXNewsByWordAnalysis
();
private
static
TXNewsByWordAnalysis
txNewsByWordAnalysis
=
new
TXNewsByWordAnalysis
();
public
static
boolean
txNewshasMoreData
=
true
;
public
static
boolean
txNewshasMoreData
=
true
;
public
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
devid
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
devid
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
// b3dd1e7d-9d3c-4e75-bf3e-3a76f326ee34
// b3dd1e7d-9d3c-4e75-bf3e-3a76f326ee34
...
@@ -43,15 +54,71 @@ public class TXNews {
...
@@ -43,15 +54,71 @@ public class TXNews {
logger
.
info
(
"采集到数据======={}"
,
dataList
.
size
());
logger
.
info
(
"采集到数据======={}"
,
dataList
.
size
());
count
=
0
;
count
=
0
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
count
++;
if
(
count
>
2
)
{
if
(
count
>
2
)
{
count
++;
break
;
break
;
}
}
continue
;
}
}
}
}
return
dataList
;
return
dataList
;
}
}
public
static
List
<
Map
<
String
,
Object
>>
getTxNewsComments
(
String
coralUin
,
String
coralUid
,
ProxyHolder
proxy
)
{
String
replayId
=
""
;
int
tryCount
=
0
;
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
while
(
true
)
{
String
content
=
StringUtils
.
join
(
"coral_uin="
,
coralUin
,
"&coral_uid="
,
coralUid
,
"&reply_id="
,
replayId
);
//eca55388bbbb596e632bca03a2378efe94b83142fd046f1f70 876579532
System
.
out
.
println
(
content
);
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapPost
(
"https://r.inews.qq.com/getMyComments"
,
"application/json"
,
content
),
proxy
)){
JSONObject
json
=
JSONObject
.
parseObject
(
response
.
body
().
string
());
JSONArray
jsonArray
=
json
.
getJSONObject
(
"comments"
).
getJSONArray
(
"new"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONArray
dataJson
=
jsonArray
.
getJSONArray
(
i
);
JSONObject
data
=
dataJson
.
getJSONObject
(
dataJson
.
size
()-
1
);
Map
<
String
,
Object
>
replaymap
=
new
HashMap
<>();
replaymap
.
put
(
"name"
,
data
.
getString
(
"nick"
));
replaymap
.
put
(
"replayUrl"
,
data
.
getString
(
"url"
));
replaymap
.
put
(
"content"
,
data
.
getString
(
"reply_content"
));
replaymap
.
put
(
"time"
,
new
Date
(
Long
.
parseLong
(
data
.
getString
(
"pub_time"
)+
"000"
)));
replaymap
.
put
(
"replayNum"
,
data
.
getInteger
(
"reply_num"
));
replaymap
.
put
(
"agreeNum"
,
data
.
getInteger
(
"agree_count"
));
replayId
=
data
.
getString
(
"reply_id"
);
dataList
.
add
(
replaymap
);
tryCount
=
0
;
}
logger
.
info
(
" 采集到 {} 条 采集uid为 {}"
,
dataList
.
size
(),
coralUid
);
if
(
json
.
getInteger
(
"bnext"
)
==
0
)
{
break
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"腾讯新闻采集有部分出错 {} "
,
e
);
tryCount
++;
}
if
(
tryCount
>
3
)
{
break
;
}
}
return
dataList
;
}
/**
*
* @Description 腾讯新闻客户端评论数(https://view.inews.qq.com/a/20190305A0D0MR00)
* @param url
* @param proxy
* @return
*/
public
static
int
getTxNewsCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
return
Integer
.
parseInt
(
result
.
split
(
"comment_count\":"
)[
1
].
split
(
"}"
)[
0
]);
}
catch
(
Exception
e
)
{
logger
.
error
(
" 腾讯 新闻 评论数获取失败 {}"
,
e
);
}
return
-
1
;
}
}
}
src/main/java/com/zhiwei/parse/TechTx.java
View file @
cb5516a0
...
@@ -12,8 +12,8 @@ import org.slf4j.Logger;
...
@@ -12,8 +12,8 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.TechTxCommentAnalysis
;
import
com.zhiwei.parse.analysis.TechTxCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -70,4 +70,5 @@ public class TechTx {
...
@@ -70,4 +70,5 @@ public class TechTx {
return
null
;
return
null
;
}
}
}
}
src/main/java/com/zhiwei/parse/Xueqiu.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -13,10 +12,12 @@ import java.util.Map;
...
@@ -13,10 +12,12 @@ import java.util.Map;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis
;
import
com.zhiwei.parse.analysis.XueqiuKeyWordAnalysis
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Request
;
import
okhttp3.Request
;
...
@@ -28,6 +29,15 @@ public class Xueqiu {
...
@@ -28,6 +29,15 @@ public class Xueqiu {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
XueqiuKeyWordAnalysis
xueqiuKeyWordAnalysis
=
new
XueqiuKeyWordAnalysis
();
private
static
XueqiuKeyWordAnalysis
xueqiuKeyWordAnalysis
=
new
XueqiuKeyWordAnalysis
();
/**
*
* @Description 关键词采集历史文章
* @param word
* @param endTime
* @param proxy
* @param cookie
* @return
*/
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
endTime
,
Proxy
proxy
,
String
cookie
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
endTime
,
Proxy
proxy
,
String
cookie
)
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
int
i
=
0
;
int
i
=
0
;
...
@@ -51,10 +61,7 @@ public class Xueqiu {
...
@@ -51,10 +61,7 @@ public class Xueqiu {
logger
.
info
(
"采集到第{} 页 , 一共采集到 {} 数据"
,
page
,
bodyList
.
size
());
logger
.
info
(
"采集到第{} 页 , 一共采集到 {} 数据"
,
page
,
bodyList
.
size
());
page
++;
page
++;
}
}
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
i
++;
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
i
++;
i
++;
}
}
...
@@ -65,6 +72,13 @@ public class Xueqiu {
...
@@ -65,6 +72,13 @@ public class Xueqiu {
return
bodyList
;
return
bodyList
;
}
}
/**
*
* @Description 雪球获取点赞评论数
* @param url
* @param proxy
* @return
*/
public
static
Map
<
String
,
Object
>
getUrlData
(
String
url
,
Proxy
proxy
)
{
public
static
Map
<
String
,
Object
>
getUrlData
(
String
url
,
Proxy
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
...
@@ -79,9 +93,56 @@ public class Xueqiu {
...
@@ -79,9 +93,56 @@ public class Xueqiu {
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
" 雪球 数据转评赞获取失败 exception {} url = {}"
,
e
,
url
);
logger
.
error
(
" 雪球 数据转评赞获取失败 exception {} url = {}"
,
e
,
url
);
}
}
return
Collections
.
emptyMap
();
return
Collections
.
emptyMap
();
}
}
/**
*
* @Description (TODO这里用一句话描述这个方法的作用)
* @return
*/
public
List
<
Map
<
String
,
Object
>>
getXueqiuAccountData
(
String
userId
,
String
cookie
,
Proxy
proxy
)
{
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
headers
.
put
(
"cookie"
,
cookie
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
while
(
true
)
{
int
page
=
1
;
String
url
=
"https://xueqiu.com/v4/statuses/user_timeline.json?page="
+
page
+
"&user_id=6687544095&type=0"
;
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headers
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"statuses"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
ob
=
jsonArray
.
getJSONObject
(
i
);
//得到json数组的第i个数组
String
timeBefore
=
ob
.
getString
(
"timeBefore"
);
//时间
Date
date
=
TimeParse
.
stringFormartDate
(
timeBefore
);
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"name"
,
ob
.
getJSONObject
(
"user"
).
getString
(
"screen_name"
));
//statuses user screen_name
map
.
put
(
"time"
,
date
);
//statuses timeBefore
map
.
put
(
"source"
,
ob
.
getString
(
"source"
));
//statuses source
map
.
put
(
"content"
,
ob
.
getString
(
"description"
).
replaceAll
(
"<.*?>"
,
""
));
//statuses description
map
.
put
(
"repostCount"
,
ob
.
getString
(
"retweet_count"
));
//statuses retweet_count
map
.
put
(
"commentCount"
,
ob
.
getString
(
"reply_count"
));
//statuses reply_count
map
.
put
(
"likeCount"
,
ob
.
getString
(
"like_count"
));
//statuses like_count
map
.
put
(
"url"
,
"https://xueqiu.coms"
+
ob
.
getString
(
"target"
));
bodyList
.
add
(
map
);
}
int
maxPage
=
json
.
getInteger
(
"maxPage"
);
page
++;
if
(
page
>
maxPage
)
{
break
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"采集解析出错 {}"
,
e
);
break
;
}
}
return
bodyList
;
}
}
}
src/main/java/com/zhiwei/parse/Yiche.java
View file @
cb5516a0
...
@@ -14,8 +14,8 @@ import org.slf4j.LoggerFactory;
...
@@ -14,8 +14,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
...
src/main/java/com/zhiwei/parse/Yidianzixun.java
View file @
cb5516a0
...
@@ -6,11 +6,18 @@ import java.util.ArrayList;
...
@@ -6,11 +6,18 @@ import java.util.ArrayList;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
javax.script.Invocable
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.YidianzixunAccountAnalysis
;
import
com.zhiwei.parse.analysis.YidianzixunAccountAnalysis
;
...
@@ -18,12 +25,16 @@ import com.zhiwei.parse.analysis.YidianzixunByWordAnalysis;
...
@@ -18,12 +25,16 @@ import com.zhiwei.parse.analysis.YidianzixunByWordAnalysis;
import
com.zhiwei.parse.analysis.YidianzixunCommentAnalysis
;
import
com.zhiwei.parse.analysis.YidianzixunCommentAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
Yidianzixun
{
public
class
Yidianzixun
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Yidianzixun
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Yidianzixun
.
class
);
private
static
YidianzixunAccountAnalysis
yidianzixunAccountAnalysis
=
new
YidianzixunAccountAnalysis
();
private
static
YidianzixunAccountAnalysis
yidianzixunAccountAnalysis
=
new
YidianzixunAccountAnalysis
();
private
static
YidianzixunCommentAnalysis
yidianzixunCommentAnalysis
=
new
YidianzixunCommentAnalysis
();
private
static
YidianzixunCommentAnalysis
yidianzixunCommentAnalysis
=
new
YidianzixunCommentAnalysis
();
private
static
YidianzixunByWordAnalysis
yidianzixunByWordAnalysis
=
new
YidianzixunByWordAnalysis
();
private
static
YidianzixunByWordAnalysis
yidianzixunByWordAnalysis
=
new
YidianzixunByWordAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
/**
/**
*
*
* @Description (获取一点资讯历时文章)
* @Description (获取一点资讯历时文章)
...
@@ -33,13 +44,14 @@ public class Yidianzixun {
...
@@ -33,13 +44,14 @@ public class Yidianzixun {
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunAccountData
(
String
channelid
,
String
startTime
,
Proxy
proxy
,
String
cookie
)
{
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunAccountData
(
String
channelid
,
String
startTime
,
Proxy
proxy
,
String
cookie
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunAccountHeaderMap
(
cookie
,
"http://www.yidianzixun.com/channel/"
+
channelid
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunAccountHeaderMap
(
cookie
,
"http://www.yidianzixun.com/channel/"
+
channelid
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
int
j
=
0
;
int
j
=
0
;
boolean
f
=
true
;
boolean
f
=
true
;
try
{
try
{
while
(
f
)
{
while
(
f
)
{
String
url
=
"http://www.yidianzixun.com/
home/q/news_list_for_channel?channel_id="
+
channelid
+
"&cstart="
+
j
+
"&cend="
+(
j
+
10
);
String
url
=
"http://www.yidianzixun.com/
"
+
getSpt
(
channelid
,
j
,
j
+
10
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
System
.
out
.
println
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"result"
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"result"
);
if
(
jsonArry
.
size
()
==
0
)
{
if
(
jsonArry
.
size
()
==
0
)
{
...
@@ -47,18 +59,20 @@ public class Yidianzixun {
...
@@ -47,18 +59,20 @@ public class Yidianzixun {
}
}
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
Map
<
String
,
Object
>
map
=
yidianzixunAccountAnalysis
.
parseJsonByAccount
(
jsonArry
.
getJSONObject
(
i
));
Map
<
String
,
Object
>
map
=
yidianzixunAccountAnalysis
.
parseJsonByAccount
(
jsonArry
.
getJSONObject
(
i
));
if
(
startTime
!=
null
)
{
if
(!
map
.
isEmpty
())
{
String
time
=
map
.
get
(
"time"
)+
""
;
if
(
startTime
!=
null
)
{
if
(
startTime
.
compareTo
(
time
)
>
0
)
{
String
time
=
map
.
get
(
"time"
)+
""
;
f
=
false
;
if
(
startTime
.
compareTo
(
time
)
>
0
)
{
break
;
f
=
false
;
break
;
}
}
}
dataList
.
add
(
map
);
}
}
dataList
.
add
(
map
);
}
}
System
.
out
.
println
(
"================================"
+
dataList
.
size
());
System
.
out
.
println
(
"================================"
+
dataList
.
size
());
ZhiWeiTools
.
sleep
(
3000
);
ZhiWeiTools
.
sleep
(
3000
);
j
+=
10
;
j
=
dataList
.
size
()
;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据获取出错"
,
e
.
getMessage
());
logger
.
error
(
"数据获取出错"
,
e
.
getMessage
());
...
@@ -67,6 +81,28 @@ public class Yidianzixun {
...
@@ -67,6 +81,28 @@ public class Yidianzixun {
return
dataList
;
return
dataList
;
}
}
private
static
String
getSpt
(
String
channel_id
,
int
cstart
,
int
cend
)
{
String
n
=
"/home/q/news_list_for_channel?channel_id="
+
channel_id
+
"&cstart="
+
cstart
+
"&cend="
+
(
cstart
+
10
)
+
"&infinite=true&refresh=1&__from__=pc&multi=5"
;
String
jsText
=
"function spt(n, e, i, t) {"
+
"for (var o = \"sptoken\", a = \"\", c = 1; c < arguments.length; c++){o += arguments[c];}"
+
"for (var c = 0; c < o.length; c++) {var r = 10 ^ o.charCodeAt(c); a += String.fromCharCode(r)}return n += (/\\?/.test(n) ? \"&_spt=\" : \"?_spt=\") + encodeURIComponent(a)}"
;
ScriptEngineManager
manager
=
new
ScriptEngineManager
();
ScriptEngine
engine
=
manager
.
getEngineByName
(
"javascript"
);
try
{
engine
.
eval
(
jsText
);
if
(
engine
instanceof
Invocable
)
{
Invocable
invoke
=
(
Invocable
)
engine
;
return
invoke
.
invokeFunction
(
"spt"
,
n
,
channel_id
,
cstart
,
cend
)
.
toString
();
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
return
null
;
}
/**
/**
*
*
...
@@ -140,10 +176,27 @@ public class Yidianzixun {
...
@@ -140,10 +176,27 @@ public class Yidianzixun {
}
}
return
dataList
;
return
dataList
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取一点资讯数据失败"
,
e
.
getMessage
());
logger
.
error
(
"获取一点资讯数据失败 {}"
,
e
);
e
.
printStackTrace
();
}
return
dataList
;
return
dataList
;
}
/**
*
* @Description 一点资讯评论数获取 (http://www.yidianzixun.com/article/0LQaOacC)
* @param id 0LQaOacC
* @param proxy
* @return
*/
public
static
int
getYidianzixunCommentCount
(
String
id
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
"http://www.yidianzixun.com/home/q/getcomments?&docid="
+
id
+
"&s=&count=30&last_comment_id=&appid=web_yidian"
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getInteger
(
"total"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
" 一点资讯 评论数解析出错 {}"
,
e
);
}
}
return
-
1
;
}
}
}
}
src/main/java/com/zhiwei/parse/Youku.java
View file @
cb5516a0
...
@@ -14,8 +14,9 @@ import org.slf4j.LoggerFactory;
...
@@ -14,8 +14,9 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -23,7 +24,7 @@ import okhttp3.Response;
...
@@ -23,7 +24,7 @@ import okhttp3.Response;
public
class
Youku
{
public
class
Youku
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
Youku
.
class
);
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
Youku
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
(
false
,
2
);
public
static
List
<
Map
<
String
,
Object
>>
getDataList
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDataList
(
String
word
)
{
String
aaid
=
"9cae49f0e031664b00d8f9c108e586ab"
;
String
aaid
=
"9cae49f0e031664b00d8f9c108e586ab"
;
...
@@ -49,6 +50,7 @@ public class Youku {
...
@@ -49,6 +50,7 @@ public class Youku {
map
.
put
(
"url"
,
"https:"
+
surl
);
map
.
put
(
"url"
,
"https:"
+
surl
);
map
.
put
(
"time"
,
time
.
replaceAll
(
"上传时间:"
,
""
).
split
(
" "
)[
0
]);
map
.
put
(
"time"
,
time
.
replaceAll
(
"上传时间:"
,
""
).
split
(
" "
)[
0
]);
map
.
put
(
"uper"
,
time
.
replace
(
time
.
split
(
"上传者:"
)[
0
],
""
));
map
.
put
(
"uper"
,
time
.
replace
(
time
.
split
(
"上传者:"
)[
0
],
""
));
map
.
put
(
"word"
,
word
);
list
.
add
(
map
);
list
.
add
(
map
);
}
}
}
}
...
@@ -61,5 +63,29 @@ public class Youku {
...
@@ -61,5 +63,29 @@ public class Youku {
return
list
;
return
list
;
}
}
/**
*
* @Description 优酷热度采集
* @param url
* @param proxy
* @return
*/
public
static
int
getYoukuHotCount
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
Document
doc
=
Jsoup
.
parse
(
result
);
String
title
=
doc
.
select
(
"meta[name='title']"
).
attr
(
"content"
);
Elements
elements
=
doc
.
select
(
"div#listitem_page1"
).
select
(
"div.item.item-cover"
);
for
(
Element
element
:
elements
)
{
if
(
element
.
toString
().
contains
(
title
))
{
String
hot
=
element
.
select
(
"div.status > span"
).
text
().
replace
(
"热度 "
,
""
);
return
Integer
.
parseInt
(
hot
);
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"优酷热度采集出错{}"
,
e
);
}
return
-
1
;
}
}
}
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
View file @
cb5516a0
...
@@ -18,8 +18,7 @@ import com.zhiwei.tools.timeparse.TimeParse;
...
@@ -18,8 +18,7 @@ import com.zhiwei.tools.timeparse.TimeParse;
public
class
AiqiyiByWordAnalysis
{
public
class
AiqiyiByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AiqiyiByWordAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getAiqiyiData
(
String
result
,
String
word
)
{
public
List
<
Map
<
String
,
Object
>>
getAiqiyiData
(
String
result
)
{
List
<
Map
<
String
,
Object
>>
dataMap
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataMap
=
new
ArrayList
<>();
try
{
try
{
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
...
@@ -28,11 +27,14 @@ public class AiqiyiByWordAnalysis {
...
@@ -28,11 +27,14 @@ public class AiqiyiByWordAnalysis {
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
title
=
element
.
select
(
"li"
).
attr
(
"data-widget-searchlist-tvname"
);
String
title
=
element
.
select
(
"li"
).
attr
(
"data-widget-searchlist-tvname"
);
String
time
=
element
.
select
(
"em.result_info_desc"
).
text
().
split
(
" "
)[
0
];
String
time
=
element
.
select
(
"em.result_info_desc"
).
text
().
split
(
" "
)[
0
];
if
(
element
.
select
(
"label.result_info_lbl"
).
text
().
contains
(
"上传者"
))
{
map
.
put
(
"source"
,
element
.
select
(
"a.result_info_link"
).
text
());
}
String
uurl
=
element
.
select
(
"h3.result_title > a"
).
attr
(
"href"
);
String
uurl
=
element
.
select
(
"h3.result_title > a"
).
attr
(
"href"
);
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"url"
,
uurl
);
map
.
put
(
"url"
,
uurl
);
map
.
put
(
"title"
,
title
);
map
.
put
(
"title"
,
title
);
System
.
out
.
println
(
map
.
toString
()
);
map
.
put
(
"word"
,
word
);
dataMap
.
add
(
map
);
dataMap
.
add
(
map
);
}
}
return
dataMap
;
return
dataMap
;
...
@@ -42,7 +44,6 @@ public class AiqiyiByWordAnalysis {
...
@@ -42,7 +44,6 @@ public class AiqiyiByWordAnalysis {
}
}
}
}
// public String getSource(String url,ProxyHolder proxy) {
// public String getSource(String url,ProxyHolder proxy) {
// Map<String,String> headerMap = HeadGet.getAiqiyiForCountHeaderMap(null);
// Map<String,String> headerMap = HeadGet.getAiqiyiForCountHeaderMap(null);
// System.out.println(url);
// System.out.println(url);
...
...
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
View file @
cb5516a0
...
@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
...
@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/analysis/BilibilikeyWordAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -12,7 +13,7 @@ import org.jsoup.select.Elements;
...
@@ -12,7 +13,7 @@ import org.jsoup.select.Elements;
public
class
BilibilikeyWordAnalysis
{
public
class
BilibilikeyWordAnalysis
{
public
static
Map
<
String
,
Object
>
getData
(
String
result
)
{
public
static
Map
<
String
,
Object
>
getData
(
String
result
,
String
word
)
{
try
{
try
{
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
boolean
more
=
false
;
boolean
more
=
false
;
...
@@ -43,17 +44,17 @@ public class BilibilikeyWordAnalysis {
...
@@ -43,17 +44,17 @@ public class BilibilikeyWordAnalysis {
map
.
put
(
"time"
,
time
);
map
.
put
(
"time"
,
time
);
map
.
put
(
"source"
,
source
);
map
.
put
(
"source"
,
source
);
map
.
put
(
"submitcount"
,
submitcount
);
map
.
put
(
"submitcount"
,
submitcount
);
map
.
put
(
"word"
,
word
);
dataList
.
add
(
map
);
dataList
.
add
(
map
);
System
.
out
.
println
(
map
.
toString
());
}
}
Map
<
String
,
Object
>
rmap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
rmap
=
new
HashMap
<>();
rmap
.
put
(
"more"
,
more
);
rmap
.
put
(
"more"
,
more
);
rmap
.
put
(
"data"
,
dataList
);
rmap
.
put
(
"data"
,
dataList
);
return
rmap
;
return
rmap
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
return
null
;
return
Collections
.
emptyMap
()
;
}
}
}
}
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
...
@@ -15,7 +15,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
View file @
cb5516a0
...
@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
...
@@ -12,7 +12,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
View file @
cb5516a0
...
@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
...
@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
...
...
src/main/java/com/zhiwei/parse/analysis/GftaiAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.nodes.Element
;
...
...
src/main/java/com/zhiwei/parse/analysis/KuaiTousuAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
...
...
src/main/java/com/zhiwei/parse/analysis/QQKBAccountAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -23,11 +25,11 @@ public class QQKBAccountAnalysis {
...
@@ -23,11 +25,11 @@ public class QQKBAccountAnalysis {
public
List
<
Map
<
String
,
Object
>>
analysisQQAccountData
(
String
result
)
{
public
List
<
Map
<
String
,
Object
>>
analysisQQAccountData
(
String
result
)
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"newslist"
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"newslist"
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>
>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
try
{
try
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"url"
,
data
.
getString
(
"url_comment"
));
map
.
put
(
"url"
,
data
.
getString
(
"url_comment"
));
map
.
put
(
"time"
,
data
.
getString
(
"time"
));
map
.
put
(
"time"
,
data
.
getString
(
"time"
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
...
@@ -38,8 +40,8 @@ public class QQKBAccountAnalysis {
...
@@ -38,8 +40,8 @@ public class QQKBAccountAnalysis {
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"解析出错
{}"
,
e
);
return
null
;
return
Collections
.
emptyList
()
;
}
}
return
dataList
;
return
dataList
;
}
}
...
@@ -51,18 +53,20 @@ public class QQKBAccountAnalysis {
...
@@ -51,18 +53,20 @@ public class QQKBAccountAnalysis {
* @return
* @return
*/
*/
public
List
<
String
>
getQQAllIds
(
String
result
)
{
public
List
<
String
>
getQQAllIds
(
String
result
)
{
List
<
String
>
list
=
new
ArrayList
<
String
>();
List
<
String
>
list
=
new
ArrayList
<>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"ids"
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"ids"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
if
(
Objects
.
nonNull
(
jsonArry
)
)
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
list
.
add
(
data
.
getString
(
"id"
));
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
list
.
add
(
data
.
getString
(
"id"
));
}
}
}
return
list
;
return
list
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取企鹅号所有id出错
"
,
e
.
getMessage
()
);
logger
.
error
(
"获取企鹅号所有id出错
{}"
,
e
);
return
null
;
return
Collections
.
emptyList
()
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/analysis/SinaTousuAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.Date
;
...
@@ -28,9 +29,9 @@ public class SinaTousuAnalysis {
...
@@ -28,9 +29,9 @@ public class SinaTousuAnalysis {
for
(
int
i
=
0
;
i
<
jsonArray
.
size
()
;
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
()
;
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
String
ctime
=
TimeParse
.
dateFormartString
(
new
Date
(
data
.
getJSONObject
(
"main"
).
getLong
(
"timestamp"
)*
1000L
),
"yyyy-MM-dd HH:mm:ss"
);
String
ctime
=
TimeParse
.
dateFormartString
(
new
Date
(
data
.
getJSONObject
(
"main"
).
getLong
(
"timestamp"
)*
1000L
),
"yyyy-MM-dd HH:mm:ss"
);
if
(!
nonNull
(
time
)
||
ctime
.
compareTo
(
time
)
<=
0
)
{
// if(
nonNull(time) || ctime.compareTo(time) <= 0) {
continue
;
//
continue;
}
//
}
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"title"
,
data
.
getJSONObject
(
"main"
).
getString
(
"title"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"title"
,
data
.
getJSONObject
(
"main"
).
getString
(
"title"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"url"
,
"https:"
+
data
.
getJSONObject
(
"main"
).
getString
(
"url"
));
map
.
put
(
"url"
,
"https:"
+
data
.
getJSONObject
(
"main"
).
getString
(
"url"
));
...
...
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
View file @
cb5516a0
...
@@ -3,14 +3,15 @@ package com.zhiwei.parse.analysis;
...
@@ -3,14 +3,15 @@ package com.zhiwei.parse.analysis;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -30,24 +31,58 @@ public class SouhuCommentAnalysis {
...
@@ -30,24 +31,58 @@ public class SouhuCommentAnalysis {
public
String
getSouhuURL
(
String
url
,
ProxyHolder
proxy
)
{
public
String
getSouhuURL
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
result
=
response
.
body
().
string
();
String
source_id
=
result
.
split
(
"news_id: \""
)[
1
].
split
(
"\","
)[
0
];
String
sourceId
=
getNewsId
(
result
);
String
topic_id
=
result
.
split
(
"media_id: \""
)[
1
].
split
(
"\","
)[
0
];
String
topicId
=
getTopicId
(
result
);
return
"http://apiv2.sohu.com/api/comment/list?page_size=10&topic_id="
+
topic_id
+
"&source_id=mp_"
+
source_id
;
if
(
Objects
.
nonNull
(
topicId
)
&&
Objects
.
nonNull
(
sourceId
))
{
return
"http://apiv2.sohu.com/api/comment/list?page_size=10&topic_id="
+
topicId
+
"&source_id=mp_"
+
sourceId
;
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {} "
,
e
);
logger
.
error
(
"Exception {} "
,
e
);
}
}
return
null
;
return
null
;
}
}
private
String
getTopicId
(
String
result
)
{
try
{
String
topicId
=
null
;
if
(
result
.
contains
(
"news_id"
))
{
topicId
=
result
.
split
(
"media_id: \""
)[
1
].
split
(
"\","
)[
0
];
}
if
(
result
.
contains
(
"newsId"
))
{
topicId
=
result
.
split
(
"media_id: '"
)[
1
].
split
(
"',"
)[
0
];
}
return
topicId
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取topicID出错"
);
}
return
null
;
}
private
String
getNewsId
(
String
result
)
{
try
{
String
sourceId
=
null
;
if
(
result
.
contains
(
"news_id"
))
{
sourceId
=
result
.
split
(
"news_id: \""
)[
1
].
split
(
"\","
)[
0
];
}
if
(
result
.
contains
(
"newsId"
))
{
sourceId
=
result
.
split
(
"newsId : '"
)[
1
].
split
(
"',"
)[
0
];
}
return
sourceId
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取sourceId出错"
);
}
return
null
;
}
public
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
public
int
getSouhuCommentCount
(
String
url
,
ProxyHolder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
int
i
;
try
{
try
{
System
.
out
.
println
(
url
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
i
=
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"cmt_sum"
);
if
(
json
.
getInteger
(
"code"
)
==
500
)
{
return
i
;
return
0
;
}
return
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"cmt_sum"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"获取搜狐评论数信息出错 {}"
,
e
);
logger
.
error
(
"获取搜狐评论数信息出错 {}"
,
e
);
return
-
1
;
return
-
1
;
...
@@ -75,12 +110,23 @@ public class SouhuCommentAnalysis {
...
@@ -75,12 +110,23 @@ public class SouhuCommentAnalysis {
map
.
put
(
"comment_id"
,
data
.
getString
(
"comment_id"
));
map
.
put
(
"comment_id"
,
data
.
getString
(
"comment_id"
));
map
.
put
(
"reply_id"
,
data
.
getString
(
"reply_id"
));
map
.
put
(
"reply_id"
,
data
.
getString
(
"reply_id"
));
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
data
.
toString
());
logger
.
error
(
"解析出错 {}"
,
e
);
System
.
out
.
println
(
map
.
toString
());
logger
.
error
(
"解析出错"
,
e
.
getMessage
());
}
}
return
map
;
return
map
;
}
}
public
int
getReadNum
(
String
url
,
ProxyHolder
proxy
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
)){
String
result
=
response
.
body
().
string
();
String
sourceId
=
getNewsId
(
result
);
url
=
"http://v2.sohu.com/public-api/articles/pv?articleIds="
+
sourceId
;
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
return
JSONObject
.
parseObject
(
result
).
getInteger
(
sourceId
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {} "
,
e
);
}
return
-
1
;
}
...
...
src/main/java/com/zhiwei/parse/analysis/YidianzixunAccountAnalysis.java
View file @
cb5516a0
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -19,19 +20,21 @@ public class YidianzixunAccountAnalysis {
...
@@ -19,19 +20,21 @@ public class YidianzixunAccountAnalysis {
* @return
* @return
*/
*/
public
Map
<
String
,
Object
>
parseJsonByAccount
(
JSONObject
data
)
{
public
Map
<
String
,
Object
>
parseJsonByAccount
(
JSONObject
data
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
try
{
try
{
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
if
(
data
.
containsKey
(
"url"
))
{
map
.
put
(
"time"
,
data
.
getString
(
"date"
));
map
.
put
(
"title"
,
data
.
getString
(
"title"
));
map
.
put
(
"comment_count"
,
data
.
getString
(
"comment_count"
)==
null
?
0
:
data
.
getString
(
"comment_count"
));
map
.
put
(
"time"
,
data
.
getString
(
"date"
));
map
.
put
(
"ctype"
,
data
.
getString
(
"ctype"
));
map
.
put
(
"comment_count"
,
data
.
getString
(
"comment_count"
)==
null
?
0
:
data
.
getString
(
"comment_count"
));
map
.
put
(
"source"
,
data
.
getString
(
"source"
));
map
.
put
(
"ctype"
,
data
.
getString
(
"ctype"
));
map
.
put
(
"url"
,
data
.
getString
(
"url"
));
map
.
put
(
"source"
,
data
.
getString
(
"source"
));
map
.
put
(
"summary"
,
data
.
getString
(
"summary"
));
map
.
put
(
"url"
,
"http://www.yidianzixun.com/article/"
+
data
.
getString
(
"docid"
));
map
.
put
(
"summary"
,
data
.
getString
(
"summary"
));
}
else
{
return
Collections
.
emptyMap
();
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
data
.
toString
());
logger
.
error
(
"解析此条出错"
,
e
);
System
.
out
.
println
(
map
.
toString
());
logger
.
error
(
"解析此条出错"
,
e
.
getMessage
());
}
}
return
map
;
return
map
;
}
}
...
...
src/main/java/com/zhiwei/parse/shipin/QQTV.java
View file @
cb5516a0
...
@@ -18,9 +18,9 @@ import org.slf4j.LoggerFactory;
...
@@ -18,9 +18,9 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -54,7 +54,7 @@ public class QQTV {
...
@@ -54,7 +54,7 @@ public class QQTV {
String
nurl
=
element
.
select
(
"h2.result_title"
).
select
(
"a"
).
attr
(
"href"
);
String
nurl
=
element
.
select
(
"h2.result_title"
).
select
(
"a"
).
attr
(
"href"
);
Map
<
String
,
Object
>
map
=
getUrlData
(
nurl
,
ProxyFactory
.
getNatProxy
());
Map
<
String
,
Object
>
map
=
getUrlData
(
nurl
,
ProxyFactory
.
getNatProxy
());
if
(
Objects
.
nonNull
(
map
)
&&
time
.
compareTo
(
String
.
valueOf
(
map
.
get
(
"time"
)))
<
1
)
{
if
(
Objects
.
nonNull
(
map
)
&&
time
.
compareTo
(
String
.
valueOf
(
map
.
get
(
"time"
)))
<
1
)
{
// System.out.println(map.toString()
);
map
.
put
(
"word"
,
word
);
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
ZhiWeiTools
.
sleep
(
50
);
ZhiWeiTools
.
sleep
(
50
);
...
...
src/main/java/com/zhiwei/parse/shipin/SohuTV.java
View file @
cb5516a0
...
@@ -15,7 +15,7 @@ import org.slf4j.Logger;
...
@@ -15,7 +15,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -52,6 +52,7 @@ public class SohuTV {
...
@@ -52,6 +52,7 @@ public class SohuTV {
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"time"
,
TimeParse
.
stringFormartDate
(
time
));
map
.
put
(
"url"
,
"https://"
+
nurl
);
map
.
put
(
"url"
,
"https://"
+
nurl
);
map
.
put
(
"playCount"
,
amountOfPlay
);
map
.
put
(
"playCount"
,
amountOfPlay
);
map
.
put
(
"word"
,
word
);
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/test/java/com/zhiwei/Comment/AiqiyiHotCountTest.java
0 → 100644
View file @
cb5516a0
package
com
.
zhiwei
.
Comment
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Aiqiyi
;
public
class
AiqiyiHotCountTest
{
@Test
public
void
f
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
path
=
"C:\\Users\\byte-zbs\\Documents\\WXWork\\1688854025129101\\Cache\\File\\2019-03\\爱奇艺.xlsx"
;
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
path
,
0
);
List
<
Map
<
String
,
Object
>>
dataList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
headList
=
(
List
<
String
>)
map
.
get
(
"head"
);
headList
.
add
(
"count"
);
dataList
.
forEach
(
m
->
{
String
url
=
String
.
valueOf
(
m
.
get
(
"链接"
));
int
i
=
Aiqiyi
.
aiqiyiHotCount
(
url
,
ProxyHolder
.
NAT_PROXY
);
System
.
out
.
println
(
url
+
" -- "
+
i
);
m
.
put
(
"count"
,
i
);
});
poi
.
exportExcel
(
path
,
"data"
,
headList
,
dataList
);
}
}
src/test/java/com/zhiwei/Comment/MaimaiCommentCountTest.java
View file @
cb5516a0
package
com
.
zhiwei
.
Comment
;
//
package com.zhiwei.Comment;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.testng.annotations.Test
;
//
import org.testng.annotations.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
//
import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Maimai
;
//
import com.zhiwei.parse.Maimai;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//
import com.zhiwei.tools.tools.ZhiWeiTools;
//
public
class
MaimaiCommentCountTest
{
//
public class MaimaiCommentCountTest {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType
.
PROVIDER
);
//
GroupType.PROVIDER);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
//
Map
<
String
,
Object
>
map
=
poi
//
Map<String, Object> map = poi
.
importExcel
(
"C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx"
,
0
);
//
.importExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", 0);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
//
List<Map<String, Object>> list = (List<Map<String, Object>>) map.get("body");
String
cookie
=
"_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"rhItcea5qkO6WCSnVcczW/NRVLLCTsq3kQbpUCGAwQ0ceLunVJRjT5rgoFVYrIBA8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAyMTY3MDQ5LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=zbs4cHtzTcHWvjtkpjAZmoqLXsQ"
;
//
String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"rhItcea5qkO6WCSnVcczW/NRVLLCTsq3kQbpUCGAwQ0ceLunVJRjT5rgoFVYrIBA8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAyMTY3MDQ5LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=zbs4cHtzTcHWvjtkpjAZmoqLXsQ";
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String, Object>> bodyList = new ArrayList<Map<String, Object>>();
List
<
String
>
headList
=
(
List
<
String
>)
map
.
get
(
"head"
);
//
List<String> headList = (List<String>) map.get("head");
for
(
Map
<
String
,
Object
>
map1
:
list
)
{
//
for (Map<String, Object> map1 : list) {
String
url
=
map1
.
get
(
"地址"
)
+
""
;
//
String url = map1.get("地址") + "";
Map
<
String
,
Object
>
map3
=
Maimai
.
getMaiaiCount
(
url
,
null
,
ProxyHolder
.
NAT_PROXY
);
//
Map<String,Object> map3 = Maimai.getMaiaiCount(url,null, ProxyHolder.NAT_PROXY);
System
.
out
.
println
(
map3
.
toString
());
//
System.out.println(map3.toString());
System
.
out
.
println
(
url
);
//
System.out.println(url);
map1
.
putAll
(
map3
);
//
map1.putAll(map3);
ZhiWeiTools
.
sleep
(
500
);
//
ZhiWeiTools.sleep(500);
System
.
out
.
println
(
"--------------------------"
);
//
System.out.println("--------------------------");
}
//
}
headList
.
add
(
"like"
);
//
headList.add("like");
headList
.
add
(
"spreads"
);
//
headList.add("spreads");
headList
.
add
(
"cmts"
);
//
headList.add("cmts");
poi
.
exportExcel
(
"C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx"
,
"评论采集"
,
headList
,
//
poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\tx\\脉脉公司圈.xlsx", "评论采集", headList,
list
);
//
list);
}
//
}
}
//
}
src/test/java/com/zhiwei/Comment/QQNewCommentCountTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.Comment;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.QQNews;
//
//public class QQNewCommentCountTest {
// @Test
// public void qqNewCommentCountTest() {
// String id = "TEC2019030500050000";
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// System.out.println(ProxyFactory.getNatProxy());
// int i = QQNews.getQQNewsCommentCount(id, ProxyHolder.NAT_PROXY);
// System.out.println(i);
// }
//}
src/test/java/com/zhiwei/Comment/SinkeCommentCountTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.Comment;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.SinaKeji;
//
//public class SinkeCommentCountTest {
// @Test
// public void f() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// String url = "https://k.sina.cn/article_6972257940_19f94369400100fyxr.html?cre=tianyi&mod=nfin&loc=10&r=0&rfunc=24&tj=cxvertical_nfin&tr=12&fromsinago=1&http=fromhttp";
//
// int i = SinaKeji.getCommentCount(url, ProxyHolder.NAT_PROXY);
// System.out.println(i);
// }
//}
src/test/java/com/zhiwei/Comment/TxNewsCommentCountTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.Comment;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.TXNews;
//
//public class TxNewsCommentCountTest {
// @Test
// public void txNewsCommentCountTest() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local", GroupType.PROVIDER);
// String url = "https://view.inews.qq.com/a/20190207A0480R00";
//
// System.out.println(TXNews.getTxNewsCommentCount(url, ProxyHolder.NAT_PROXY));
// }
//}
src/test/java/com/zhiwei/Comment/YidianzixunCommentCountTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.Comment;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.parse.Yidianzixun;
//
//public class YidianzixunCommentCountTest {
// @Test
// public void yidianzixunCommentCountTest() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// String id = "0LQaOacC";
// int i = Yidianzixun.getYidianzixunCommentCount(id, ProxyHolder.NAT_PROXY);
// System.out.println(i);
// }
//}
src/test/java/com/zhiwei/Comment/YoukuHotCountTest.java
0 → 100644
View file @
cb5516a0
package
com
.
zhiwei
.
Comment
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Aiqiyi
;
import
com.zhiwei.parse.Youku
;
public
class
YoukuHotCountTest
{
@Test
public
void
f
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
path
=
"C:\\Users\\byte-zbs\\Documents\\WXWork\\1688854025129101\\Cache\\File\\2019-03\\优酷.xlsx"
;
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
path
,
0
);
List
<
Map
<
String
,
Object
>>
dataList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
headList
=
(
List
<
String
>)
map
.
get
(
"head"
);
headList
.
add
(
"count"
);
dataList
.
forEach
(
m
->
{
String
url
=
String
.
valueOf
(
m
.
get
(
"链接"
));
int
i
=
Youku
.
getYoukuHotCount
(
url
,
ProxyHolder
.
NAT_PROXY
);
System
.
out
.
println
(
url
+
" -- "
+
i
);
m
.
put
(
"count"
,
i
);
});
poi
.
exportExcel
(
path
,
"data"
,
headList
,
dataList
);
}
}
src/test/java/com/zhiwei/TestHttpBoot.java
View file @
cb5516a0
package
com
.
zhiwei
;
//
package com.zhiwei;
//
import
java.io.IOException
;
//
import java.io.IOException;
import
java.util.HashMap
;
//
import java.util.HashMap;
import
java.util.Map
;
//
import java.util.Map;
//
import
java.util.HashMap
;
//
import java.util.HashMap;
//
import
org.testng.annotations.Test
;
//
import org.testng.annotations.Test;
//
import
com.zhiwei.crawler.core.HttpBoot
;
//
import com.zhiwei.crawler.core.HttpBoot;
import
com.zhiwei.crawler.core.RequestUtils
;
//
import com.zhiwei.crawler.core.RequestUtils;
//
public
class
TestHttpBoot
{
//
public class TestHttpBoot {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
HttpBoot
httpBoot
=
new
HttpBoot
();
//
HttpBoot httpBoot = new HttpBoot();
String
url
=
"https://www.toutiao.com/c/user/following/?user_id=1034006366&count=20&_signature=wp5wPBAVmXlosTC8Fobui8KecC"
;
//
String url = "https://www.toutiao.com/c/user/following/?user_id=1034006366&count=20&_signature=wp5wPBAVmXlosTC8Fobui8KecC";
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
//
Map<String,Object> headers = new HashMap<>();
headers
.
put
(
"referer"
,
"https://www.qctt.cn/news/349056"
);
//
headers.put("referer", "https://www.qctt.cn/news/349056");
headers
.
put
(
"cookie"
,
"PHPSESSID=3rd6bvonb4g15t1fp777mjums0; Hm_lvt_70af9ea91e7adc8195f6d49511b9a2f1=1542253722; open_ad=1; Hm_lpvt_70af9ea91e7adc8195f6d49511b9a2f1=1542271394; vcode=sqmm; XSRF-TOKEN=eyJpdiI6IlFTNzkyYWNcLzB2SUwyN2dcL1hhUlpsZz09IiwidmFsdWUiOiJRSUpycjZJNGx3d1hUWkpOQUl1R2psSStuVU0yYW8xT1YxXC9QOFY1NjdyRXNrMWpFVE1kSm9IQ1o5Nm5keXlMTEFnZXdCOHVvWDg0U2picTE1cjZzMkE9PSIsIm1hYyI6IjZlYzk5NDI3ODEzMzA3ZTJjNDc3M2ZjMjBlNDJhZjc2YjU2ODFmYmY3YWRlMzdlMzM1NTBlNWMxNDk3MjFiZDEifQ%3D%3D; laravel_session=eyJpdiI6InJQMnByeFlIbXVhaUVVVVBLK1wvaXlRPT0iLCJ2YWx1ZSI6IlhUOUtIS2ZQZ0ZKNFh1RDVQYjBjSVZkVkpQZTdYRDNpa1wvV0o5QlJPbk8xZE0rQ3dZdnFMdjcya011ejVkdWEwUk1Qa29Zb2Y3OU0yUGkrWDF4Wk5adz09IiwibWFjIjoiZGJiYjlkNWZhNmJhMDFiMjkxYTAyMmUwZTEyMWVmZTQ0NmJiZDQ2ZGU3ZjNjNmUzNTIwZGI0NTc4NDJlZjNiMCJ9"
);
//
headers.put("cookie", "PHPSESSID=3rd6bvonb4g15t1fp777mjums0; Hm_lvt_70af9ea91e7adc8195f6d49511b9a2f1=1542253722; open_ad=1; Hm_lpvt_70af9ea91e7adc8195f6d49511b9a2f1=1542271394; vcode=sqmm; XSRF-TOKEN=eyJpdiI6IlFTNzkyYWNcLzB2SUwyN2dcL1hhUlpsZz09IiwidmFsdWUiOiJRSUpycjZJNGx3d1hUWkpOQUl1R2psSStuVU0yYW8xT1YxXC9QOFY1NjdyRXNrMWpFVE1kSm9IQ1o5Nm5keXlMTEFnZXdCOHVvWDg0U2picTE1cjZzMkE9PSIsIm1hYyI6IjZlYzk5NDI3ODEzMzA3ZTJjNDc3M2ZjMjBlNDJhZjc2YjU2ODFmYmY3YWRlMzdlMzM1NTBlNWMxNDk3MjFiZDEifQ%3D%3D; laravel_session=eyJpdiI6InJQMnByeFlIbXVhaUVVVVBLK1wvaXlRPT0iLCJ2YWx1ZSI6IlhUOUtIS2ZQZ0ZKNFh1RDVQYjBjSVZkVkpQZTdYRDNpa1wvV0o5QlJPbk8xZE0rQ3dZdnFMdjcya011ejVkdWEwUk1Qa29Zb2Y3OU0yUGkrWDF4Wk5adz09IiwibWFjIjoiZGJiYjlkNWZhNmJhMDFiMjkxYTAyMmUwZTEyMWVmZTQ0NmJiZDQ2ZGU3ZjNjNmUzNTIwZGI0NTc4NDJlZjNiMCJ9");
headers
.
put
(
"origin"
,
"https://www.qctt.cn"
);
//
headers.put("origin", "https://www.qctt.cn");
Map
<
String
,
Object
>
params
=
new
HashMap
<>();
//
Map<String,Object> params = new HashMap<>();
params
.
put
(
"id"
,
"349056"
);
//
params.put("id", "349056");
params
.
put
(
"page"
,
"3"
);
//
params.put("page", "3");
params
.
put
(
"_token"
,
"EJ58V0qilRw7P77czp0U6iO9QW2IOS1ZGiBk4wH1"
);
//
params.put("_token", "EJ58V0qilRw7P77czp0U6iO9QW2IOS1ZGiBk4wH1");
try
{
//
try {
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
)).
body
().
string
();
//
String result = httpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
System
.
out
.
println
(
result
);
//
System.out.println(result);
//
}
catch
(
IOException
e
)
{
//
} catch (IOException e) {
// TODO Auto-generated catch block
//
// TODO Auto-generated catch block
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
//
//
}
//
}
}
//
}
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
deleted
100644 → 0
View file @
2a35dd02
//package com.zhiwei.crawler;
//
//import java.util.ArrayList;
//import java.util.List;
//import java.util.Map;
//
//import org.junit.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Aiqiyi;
//import com.zhiwei.util.WordReadFile;
//
//public class AiqiyiByWordExample {
//
//
// @Test
// public void aiqiyiByWordTest() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
// List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
// for(String w : wordList) {
// List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,null);
// if(dataList != null && dataList.size() >= 1) {
// bodyList.addAll(dataList);
// }
// }
// List<String> headList = new ArrayList<String>();
// headList.add("count");
// headList.add("time");
// headList.add("source");
// headList.add("content");
// headList.add("url");
// headList.add("title");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel("D://crawlerdata/爱奇艺关键词采集.xlsx", "数据", headList, bodyList);
//
//
//
// }
//
//
//
//}
src/test/java/com/zhiwei/crawler/MaimaiBywordExample.java
View file @
cb5516a0
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.Arrays
;
//
import java.util.Arrays;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Maimai
;
//
import com.zhiwei.parse.Maimai;
//
public
class
MaimaiBywordExample
{
//
public class MaimaiBywordExample {
//
public
static
void
main
(
String
[]
args
)
{
//
public static void main(String[] args) {
String
word
=
"美团|某团|MT|大众点评|新美大|美团点评"
;
//
String word = "美团|某团|MT|大众点评|新美大|美团点评";
String
cookie
=
"_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"G8eNNNylPoi3oIPLUr/d/RDaMgtnpZCskxT7wu1pRRrkiy3J8G7StHgTx9DQBq4O8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAwNjY1Njg4LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=b_tga85tZskxsgKX8YIM_JKByi0"
;
//
String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550814253444; token=\"G8eNNNylPoi3oIPLUr/d/RDaMgtnpZCskxT7wu1pRRrkiy3J8G7StHgTx9DQBq4O8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"aa0Zx+VbwC41ceG8bxvIefAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTczMzMzNTM2Iiwic2VjcmV0IjoiVjJuNHdCVDBncVNacTRxVllGM29jRUVwIiwic3RhdHVzIjp0cnVlLCJfZXhwaXJlIjoxNTUwOTAwNjY1Njg4LCJfbWF4QWdlIjo4NjQwMDAwMH0=; session.sig=b_tga85tZskxsgKX8YIM_JKByi0";
String
time
=
"2019-02-15 00:00:00"
;
//
String time = "2019-02-15 00:00:00";
String
[]
words
=
word
.
split
(
"\\|"
);
//
String[] words = word.split("\\|");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
String
w
:
words
)
{
//
for(String w : words) {
//实名动态
//
//实名动态
// List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
//
//
List<Map<String,Object>> c = Maimai.getData(w, cookie, time, null);
//职言交流
//
//职言交流
List
<
Map
<
String
,
Object
>>
c2
=
Maimai
.
getDataByNoName
(
w
,
cookie
,
time
,
null
);
//
List<Map<String,Object>> c2 = Maimai.getDataByNoName(w, cookie, time, null);
// bodyList.addAll(c);
//
//
bodyList.addAll(c);
bodyList
.
addAll
(
c2
);
//
bodyList.addAll(c2);
}
//
}
List
<
String
>
headList
=
Arrays
.
asList
(
"time"
,
"url"
,
"text"
,
"name"
,
"like"
,
"comment_count"
,
"spreads"
,
"word"
);
//
List<String> headList = Arrays.asList("time","url","text","name","like","comment_count","spreads","word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-0222.xlsx"
,
"脉脉关键词"
,
headList
,
bodyList
);
//
poi.exportExcel("D:\\crawlerdata\\自媒体\\脉脉关键词采集-美团-0222.xlsx", "脉脉关键词", headList, bodyList);
}
//
}
//
}
//
}
src/test/java/com/zhiwei/crawler/QQAccountExample.java
View file @
cb5516a0
...
@@ -24,7 +24,8 @@ public class QQAccountExample {
...
@@ -24,7 +24,8 @@ public class QQAccountExample {
String
child
=
map
.
get
(
"帐号链接"
)+
""
;
String
child
=
map
.
get
(
"帐号链接"
)+
""
;
// System.out.println(child.split("chlid=")[1]);
// System.out.println(child.split("chlid=")[1]);
System
.
out
.
println
(
child
.
split
(
"="
)[
1
]);
System
.
out
.
println
(
child
.
split
(
"="
)[
1
]);
List
<
Map
<
String
,
Object
>>
lists
=
QQKB
.
getQQAccountData
(
child
.
split
(
"="
)[
1
],
cookie
,
null
);
List
<
Map
<
String
,
Object
>>
lists
=
QQKB
.
getQQAccountData
(
"5001789"
,
cookie
,
null
);
if
(
lists
!=
null
)
{
if
(
lists
!=
null
)
{
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
map1
.
put
(
"name"
,
map
.
get
(
"呢称"
));
map1
.
put
(
"name"
,
map
.
get
(
"呢称"
));
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
View file @
cb5516a0
...
@@ -9,10 +9,10 @@ public class QQKBCommentCountExample {
...
@@ -9,10 +9,10 @@ public class QQKBCommentCountExample {
@Test
@Test
public
void
qqkbCommentCountTest
()
{
public
void
qqkbCommentCountTest
()
{
String
cookie
=
"
phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0
"
;
String
cookie
=
""
;
String
url
=
"https://
tech.sina.cn/i/gn/2018-04-26/detail-ifztkpin4282154.d.html?pos=18
"
;
String
url
=
"https://
kuaibao.qq.com/s/20190305A16P6L00
"
;
int
i
=
QQKB
.
getCommentCount
(
cookie
,
url
,
null
);
int
i
=
QQKB
.
getCommentCount
(
url
,
null
);
System
.
out
.
println
(
i
);
System
.
out
.
println
(
i
);
}
}
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
View file @
cb5516a0
...
@@ -28,8 +28,11 @@ public class SouhuCommentCountExample {
...
@@ -28,8 +28,11 @@ public class SouhuCommentCountExample {
try
{
try
{
url
=
map1
.
get
(
"url"
)+
""
;
url
=
map1
.
get
(
"url"
)+
""
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
url
=
"http://m.sohu.com/a/299389309_114988"
;
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
ProxyHolder
.
NAT_PROXY
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
ProxyHolder
.
NAT_PROXY
);
int
j
=
Souhu
.
getSohuReadNum
(
url
,
ProxyHolder
.
NAT_PROXY
);
map1
.
put
(
"count"
,
i
);
map1
.
put
(
"count"
,
i
);
map1
.
put
(
"redNum"
,
j
);
System
.
out
.
println
(
map1
.
toString
());
System
.
out
.
println
(
map1
.
toString
());
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
...
...
src/test/java/com/zhiwei/crawler/WangyiCommentCountExample.java
View file @
cb5516a0
...
@@ -27,9 +27,11 @@ public class WangyiCommentCountExample {
...
@@ -27,9 +27,11 @@ public class WangyiCommentCountExample {
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
url
:
urlList
)
{
for
(
String
url
:
urlList
)
{
url
=
"https://3g.163.com/all/article/E9GAO0PK051188EC.html"
;
String
id
=
url
.
split
(
"/"
)[
url
.
split
(
"/"
).
length
-
1
].
split
(
".ht"
)[
0
];
String
id
=
url
.
split
(
"/"
)[
url
.
split
(
"/"
).
length
-
1
].
split
(
".ht"
)[
0
];
System
.
out
.
println
(
id
);
System
.
out
.
println
(
id
);
int
lists
=
Wangyi
.
getWangyiCommentCount
(
id
,
null
);
int
lists
=
Wangyi
.
getWangyiCommentCount
(
id
,
null
);
System
.
out
.
println
(
lists
);
ZhiWeiTools
.
sleep
(
3000
);
ZhiWeiTools
.
sleep
(
3000
);
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
...
...
src/test/java/com/zhiwei/crawler/YidianzixunAccountExample.java
View file @
cb5516a0
...
@@ -14,9 +14,9 @@ public class YidianzixunAccountExample {
...
@@ -14,9 +14,9 @@ public class YidianzixunAccountExample {
@Test
@Test
public
void
yidianzixunAccountTest
()
{
public
void
yidianzixunAccountTest
()
{
String
channelid
=
"m
13369
5"
;
String
channelid
=
"m
2331
5"
;
String
startTime
=
"20
1
7-01-01 00:00:00"
;
String
startTime
=
"20
0
7-01-01 00:00:00"
;
String
cookie
=
"wuid=
257912989774746; wuid_createAt=2018-04-21 12:26:54; UM_distinctid=162e674783dc4e-030ed894a4953b-4446042d-1fa400-162e674783e34a; JSESSIONID=8ee0cee7a49e812492917a669074974b9a004e7b28ed41bc99e96793df734961; weather_auth=2; Hm_lvt_15fafbae2b9b11d280c79eff3b840e45=1527148836,1527213305,1527752112; CNZZDATA1255169715=542587606-1524284730-null%7C1527749514; sptoken=Ug%3B99%3C3%3FU%3AU%3B%3AU48261efeced332cc9f20413132c69381bcc921bb210c93b90058b318eec23117; captcha=s%3A7c9d6bca395d270e3a4774968531f470.e1IzHNmf94UVpZlGYHYmDUnUk6sA1s7sPYj7RA932lo; Hm_lpvt_15fafbae2b9b11d280c79eff3b840e45=1527752125; cn_1255169715_dplus=%7B%22distinct_id%22%3A%20%22162e674783dc4e-030ed894a4953b-4446042d-1fa400-162e674783e34a%22%2C%22sp%22%3A%20%7B%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201527752148%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201527752148%7D
%7D"
;
String
cookie
=
"wuid=
90742539356820; wuid_createAt=2019-01-10 11:45:41; UM_distinctid=16835dd9ba11cb-0ef8d17063d93f-671b197c-1fa400-16835dd9ba2243; JSESSIONID=174b8df350cb5400283abedf2c26076357b0b7af0581024f2e39e90532b4edc9; weather_auth=2; DID=node82eee6d174caf2d4; Hm_lvt_15fafbae2b9b11d280c79eff3b840e45=1551686450,1551686458; CNZZDATA1255169715=931563543-1547087800-%7C1551761063; captcha=s%3A6e56492ffceaf88d9f131fa79435464a.TLAhZ1cfwj0vBTjKTO9Qf5qc6QLuipitrEMZjiqm8BM; Hm_lpvt_15fafbae2b9b11d280c79eff3b840e45=1551764582; cn_1255169715_dplus=%7B%22distinct_id%22%3A%20%2216835dd9ba11cb-0ef8d17063d93f-671b197c-1fa400-16835dd9ba2243%22%2C%22sp%22%3A%20%7B%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201547544080%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201547544080%7D%2C%22%24_sessionid%22%3A%200%2C%22%24_sessionTime%22%3A%201551765057%2C%22%24dp%22%3A%200%2C%22%24_sessionPVTime%22%3A%201551765057
%7D"
;
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunAccountData
(
channelid
,
startTime
,
null
,
cookie
);
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunAccountData
(
channelid
,
startTime
,
null
,
cookie
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
...
@@ -27,7 +27,7 @@ public class YidianzixunAccountExample {
...
@@ -27,7 +27,7 @@ public class YidianzixunAccountExample {
headList
.
add
(
"source"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
headList
.
add
(
"url"
);
headList
.
add
(
"summary"
);
headList
.
add
(
"summary"
);
poi
.
exportExcel
(
"D://crawlerdata/一点资讯-
虎嗅
.xlsx"
,
"虎嗅"
,
headList
,
dataList
);
poi
.
exportExcel
(
"D://crawlerdata/一点资讯-
m23315
.xlsx"
,
"虎嗅"
,
headList
,
dataList
);
}
}
...
...
src/test/java/com/zhiwei/hsitory/TxNewsTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.hsitory;
//
//import java.util.ArrayList;
//import java.util.Arrays;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.TXNews;
//
//public class TxNewsTest {
// @Test
// public void txNewsTest() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
//
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// Map<String,Object> map = poi.importExcel("C:\\Users\\byte-zbs\\Desktop\\腾讯.xlsx", 0);
// List<Map<String,Object>> lists = (List<Map<String, Object>>) map.get("body");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// lists.forEach(m -> {
// String url = String.valueOf(m.get("url"));
// System.out.println(url + "start");
// url = url.split("\\?")[0];
// String coralUin = url.split("/")[5];
// String coralUid = url.split("/")[4];
// List<Map<String,Object>> dataList = TXNews.getTxNewsComments(coralUin, coralUid, ProxyHolder.NAT_PROXY);
// bodyList.addAll(dataList);
// System.out.println(url + " end " + dataList.size());
// });
//
// List<String> headList = Arrays.asList("name","replayUrl","content","time","replayNum","agreeNum");
// poi.exportExcel("C:\\Users\\byte-zbs\\Desktop\\腾讯.xlsx", "result", headList, bodyList);
// }
//}
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
View file @
cb5516a0
...
@@ -6,6 +6,9 @@
...
@@ -6,6 +6,9 @@
//
//
//import org.testng.annotations.Test;
//import org.testng.annotations.Test;
//
//
//import com.zhiwei.common.config.GroupType;
//import com.zhiwei.crawler.proxy.ProxyFactory;
//import com.zhiwei.crawler.proxy.ProxyHolder;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.KuaiTousu;
//import com.zhiwei.parse.KuaiTousu;
//import com.zhiwei.parse.SinaTousu;
//import com.zhiwei.parse.SinaTousu;
...
@@ -14,12 +17,13 @@
...
@@ -14,12 +17,13 @@
//
//
// @Test
// @Test
// public void getSinaTousuData() {
// public void getSinaTousuData() {
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// String words = "
花呗|借呗|京东白条|京东金条|京东金融
";
// String words = "
美团|三快
";
// String[] ws = words.split("\\|");
// String[] ws = words.split("\\|");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String word : ws) {
// for(String word : ws) {
// List<Map<String,Object>> list = SinaTousu.getSinaTousuData(word,
null, "2018-07-01 00:00:00"
);
// List<Map<String,Object>> list = SinaTousu.getSinaTousuData(word,
ProxyHolder.NAT_PROXY, null
);
// bodyList.addAll(list);
// bodyList.addAll(list);
// System.out.println(word + " --------- " + bodyList.size());
// System.out.println(word + " --------- " + bodyList.size());
// }
// }
...
@@ -30,7 +34,7 @@
...
@@ -30,7 +34,7 @@
// headList.add("source");
// headList.add("source");
// headList.add("url");
// headList.add("url");
//
//
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉-美团-
3
.xlsx", "数据", headList, bodyList);
// poi.exportExcel("D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉-美团-
2
.xlsx", "数据", headList, bodyList);
//
//
//
//
//
//
...
...
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
0 → 100644
View file @
cb5516a0
package
com
.
zhiwei
.
shipin
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.common.config.GroupType
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Aiqiyi
;
import
com.zhiwei.util.WordReadFile
;
public
class
AiqiyiTest
{
@Test
public
void
aiqiyiTest
()
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
wordList
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
,
ProxyHolder
.
NAT_PROXY
);
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
bodyList
.
addAll
(
dataList
);
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"time"
);
headList
.
add
(
"source"
);
headList
.
add
(
"content"
);
headList
.
add
(
"url"
);
headList
.
add
(
"title"
);
headList
.
add
(
"word"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D://crawlerdata/爱奇艺关键词采集-txh-0320.xlsx"
,
"数据"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/shipin/BilibiliTest.java
View file @
cb5516a0
...
@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile;
...
@@ -13,7 +13,7 @@ import com.zhiwei.util.WordReadFile;
public
class
BilibiliTest
{
public
class
BilibiliTest
{
@Test
@Test
public
void
f
()
{
public
void
f
()
{
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词
-2
.txt"
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
...
@@ -31,8 +31,9 @@ public class BilibiliTest {
...
@@ -31,8 +31,9 @@ public class BilibiliTest {
headlist
.
add
(
"source"
);
headlist
.
add
(
"source"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"word"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-txh-0
219-农药
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
poi
.
exportExcel
(
"D://crawlerdata//bilibili关键词采集数据-txh-0
320
.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
}
}
}
}
src/test/java/com/zhiwei/shipin/QQTVTest.java
View file @
cb5516a0
...
@@ -18,7 +18,7 @@ public class QQTVTest {
...
@@ -18,7 +18,7 @@ public class QQTVTest {
@Test
@Test
public
void
f
()
{
public
void
f
()
{
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
String
time
=
"
1970-07
-01 00:00:00"
;
String
time
=
"
2018-01
-01 00:00:00"
;
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
...
@@ -35,8 +35,9 @@ public class QQTVTest {
...
@@ -35,8 +35,9 @@ public class QQTVTest {
headlist
.
add
(
"source"
);
headlist
.
add
(
"source"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"title"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"url"
);
headlist
.
add
(
"word"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D://crawlerdata//腾讯视频关键词采集数据-txh-0
130.xlsx"
,
"B站
数据"
,
headlist
,
bodyList
);
poi
.
exportExcel
(
"D://crawlerdata//腾讯视频关键词采集数据-txh-0
320.xlsx"
,
"腾讯视频
数据"
,
headlist
,
bodyList
);
...
...
src/test/java/com/zhiwei/shipin/SohuTVTest.java
View file @
cb5516a0
//package com.zhiwei.shipin;
package
com
.
zhiwei
.
shipin
;
//
//import java.util.ArrayList;
import
java.util.ArrayList
;
//import java.util.List;
import
java.util.List
;
//import java.util.Map;
import
java.util.Map
;
//
//import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//import com.zhiwei.parse.shipin.SohuTV;
import
com.zhiwei.parse.shipin.SohuTV
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//import com.zhiwei.util.WordReadFile;
import
com.zhiwei.util.WordReadFile
;
//
//public class SohuTVTest {
public
class
SohuTVTest
{
// @Test
@Test
// public void f() {
public
void
f
()
{
// List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
// List<Map<String, Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
// String cookie = "SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321";
String
cookie
=
"SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321"
;
// for (String word : wordList) {
for
(
String
word
:
wordList
)
{
// List<Map<String, Object>> dataList = SohuTV.sohuTVData(word, cookie, null);
List
<
Map
<
String
,
Object
>>
dataList
=
SohuTV
.
sohuTVData
(
word
,
cookie
,
null
);
// if (dataList != null) {
if
(
dataList
!=
null
)
{
// System.out.println(word + " ----- " + dataList.size());
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
// bodyList.addAll(dataList);
bodyList
.
addAll
(
dataList
);
// }
}
// ZhiWeiTools.sleep(1000);
ZhiWeiTools
.
sleep
(
1000
);
// }
}
// List<String> headlist = new ArrayList<>();
List
<
String
>
headlist
=
new
ArrayList
<>();
// headlist.add("playCount");
headlist
.
add
(
"playCount"
);
// headlist.add("time");
headlist
.
add
(
"time"
);
// headlist.add("source");
headlist
.
add
(
"source"
);
// headlist.add("title");
headlist
.
add
(
"title"
);
// headlist.add("url");
headlist
.
add
(
"url"
);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
headlist
.
add
(
"word"
);
// poi.exportExcel("D://crawlerdata//搜狐视频关键词采集数据-txh-0219.xlsx", "B站数据", headlist, bodyList);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
poi
.
exportExcel
(
"D://crawlerdata//搜狐视频关键词采集数据-txh-0320.xlsx"
,
"搜狐数据"
,
headlist
,
bodyList
);
// }
//}
}
}
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
View file @
cb5516a0
//package com.zhiwei.shipin;
package
com
.
zhiwei
.
shipin
;
//
//import java.util.ArrayList;
import
java.util.ArrayList
;
//import java.util.List;
import
java.util.List
;
//import java.util.Map;
import
java.util.Map
;
//
//import org.testng.annotations.Test;
import
org.testng.annotations.Test
;
//
//import com.zhiwei.common.config.GroupType;
import
com.zhiwei.common.config.GroupType
;
//import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//import com.zhiwei.parse.Youku;
import
com.zhiwei.parse.Youku
;
//import com.zhiwei.util.WordReadFile;
import
com.zhiwei.util.WordReadFile
;
//
//public class YoukuKeyWordTest {
public
class
YoukuKeyWordTest
{
// @Test
@Test
// public void f() {
public
void
f
()
{
// ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
// GroupType.PROVIDER);
GroupType
.
PROVIDER
);
// List<String> words = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
String
>
words
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
// List<Map<String,Object>> bodyList = new ArrayList<>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
// for(String w : words) {
for
(
String
w
:
words
)
{
// System.out.println(w);
System
.
out
.
println
(
w
);
// bodyList.addAll(Youku.getDataList(w));
bodyList
.
addAll
(
Youku
.
getDataList
(
w
));
// }
}
// List<String> headList = new ArrayList<>();
List
<
String
>
headList
=
new
ArrayList
<>();
// headList.add("title");
headList
.
add
(
"title"
);
// headList.add("time");
headList
.
add
(
"time"
);
// headList.add("url");
headList
.
add
(
"url"
);
// headList.add("uper");
headList
.
add
(
"uper"
);
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
headList
.
add
(
"word"
);
// poi.exportExcel("D://crawlerdata//优酷数据-txh-0219.xlsx", "数据", headList, bodyList);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
poi
.
exportExcel
(
"D://crawlerdata//优酷数据-txh-0320.xlsx"
,
"数据"
,
headList
,
bodyList
);
// }
//}
}
}
src/test/java/com/zhiwei/user/MaimaiTest.java
0 → 100644
View file @
cb5516a0
//package com.zhiwei.user;
//
//import java.util.ArrayList;
//import java.util.Arrays;
//import java.util.List;
//import java.util.Map;
//
//import org.testng.annotations.Test;
//
//import com.zhiwei.excelpoi.excel.PoiExcelUtil;
//import com.zhiwei.parse.Maimai;
//
//public class MaimaiTest {
// @Test
// public void maimaiUserCrawler() {
// String path = "D:\\crawlerdata\\脉脉用户.xlsx";
// String word = "美团|美团网|大众点评|美团点评|摩拜|猫眼|榛果|三快科技|三快在线";
// String cookie = "_buuid=ba30f54f-57ed-4dd4-af5f-31cb08d2eacf; sessionid=lejfy3gdu5tf9x9zowxfhtq5o73dubc5; guid=GxsfBBgZGwQYGx4EGBkeVgcYGxkdHhMeHhkcVhwZBB0ZHwVDWEtMS3kKEhMEEh0fGQQaBBsdBU9HRVhCaQoDRUFJT20KT0FDRgoGZmd+YmECChwZBB0ZHwVeQ2FIT31PRlpaawoDHhx9ZX0KERkEHAp+ZApZXUVOREN9AgoaBB8FS0ZGQ1BFZw==; seid=s1550629286782; token=\"OCY36EFdeYzGytlQFyKRdM0DcXNdViYI02kT4QbUMpaSk/CqMXrqBOx8EFo5/fQU8CKuzcDfAvoCmBm7+jVysA==\"; uid=\"q1bNxxk8WW3MzjbCfKr/hfAirs3A3wL6ApgZu/o1crA=\"; session=eyJ1IjoiMTc2NjQ0NzY1Iiwic2VjcmV0IjoiLXFsV2c2Ym9feEJqOWxQbWdWTjcwWWg3Iiwic3RhdHVzIjp0cnVlLCJtaWQ0NTY4NzYwIjpmYWxzZSwiX2V4cGlyZSI6MTU1MDcxNTc2NzgwMSwiX21heEFnZSI6ODY0MDAwMDB9; session.sig=lVCTA7DLvo1K_r_bTjbQOH13Alc";
// String[] words = word.split("\\|");
// List<Map<String,Object>> bodyList = new ArrayList<>();
// for(String w : words) {
// bodyList.addAll(Maimai.getUserList(w, cookie, null));
// }
// List<String> headList = Arrays.asList("id","name","gender","url","rank","compos","city");
// PoiExcelUtil poi = PoiExcelUtil.getInstance();
// poi.exportExcel(path, "result", headList, bodyList);
// }
//}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment