Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
3e350f8b
Commit
3e350f8b
authored
Aug 05, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改部分代理使用方式 并升级版本
parent
096c4f21
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
408 additions
and
412 deletions
+408
-412
pom.xml
+1
-1
src/main/java/com/zhiwei/parse/Baijia.java
+9
-10
src/main/java/com/zhiwei/parse/Douban.java
+3
-3
src/main/java/com/zhiwei/parse/Maimai.java
+3
-3
src/main/java/com/zhiwei/parse/QQKB.java
+1
-2
src/main/java/com/zhiwei/parse/TXNews.java
+1
-2
src/main/java/com/zhiwei/parse/Wangyi.java
+1
-1
src/main/java/com/zhiwei/parse/analysis/Ts21cnAnalysis.java
+0
-1
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
+54
-54
src/test/java/com/zhiwei/crawler/YidainzixunByWordExample.java
+44
-44
src/test/java/com/zhiwei/hsitory/YidianzixunAccountExample.java
+37
-37
src/test/java/com/zhiwei/keyword/XueqiuKeyWord.java
+47
-47
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
+44
-44
src/test/java/com/zhiwei/shipin/BilibiliTest.java
+42
-42
src/test/java/com/zhiwei/shipin/QQTVTest.java
+46
-46
src/test/java/com/zhiwei/shipin/SohuTVTest.java
+39
-39
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
+36
-36
No files found.
pom.xml
View file @
3e350f8b
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
articlenewscrawler
</artifactId>
<artifactId>
articlenewscrawler
</artifactId>
<version>
0.1.
6
-SNAPSHOT
</version>
<version>
0.1.
7
-SNAPSHOT
</version>
<name>
articlenewscrawler
</name>
<name>
articlenewscrawler
</name>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
<description>
采集凤凰,一点资讯,搜狐历时文章和文章评论
</description>
...
...
src/main/java/com/zhiwei/parse/Baijia.java
View file @
3e350f8b
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -21,8 +22,6 @@ import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
...
@@ -21,8 +22,6 @@ import com.zhiwei.parse.analysis.BaijiaAccountAnalysis;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Request
;
public
class
Baijia
{
public
class
Baijia
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Baijia
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Baijia
.
class
);
private
static
BaijiaAccountAnalysis
baijiaAccountAnalysis
=
new
BaijiaAccountAnalysis
();
private
static
BaijiaAccountAnalysis
baijiaAccountAnalysis
=
new
BaijiaAccountAnalysis
();
...
@@ -70,17 +69,18 @@ public class Baijia {
...
@@ -70,17 +69,18 @@ public class Baijia {
/**
/**
*
*
* @Description 获取百家号第三种方法
* @Description 获取百家号第三种方法
* @param app
_i
d
* @param app
I
d
* @param startTime
* @param startTime
* @param proxy
* @param proxy
* @return
* @return
*/
*/
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccountByBaiduData
(
String
app_id
,
String
name
,
String
startTime
,
String
cookie
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccountByBaiduData
(
String
appId
,
String
name
,
String
startTime
,
String
cookie
,
ProxyHolder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
headerMap
.
put
(
"cookie"
,
cookie
);
headerMap
.
put
(
"cookie"
,
cookie
);
String
uk
=
getUkData
(
app
_i
d
,
proxy
,
cookie
);
String
uk
=
getUkData
(
app
I
d
,
proxy
,
cookie
);
if
(
Objects
.
isNull
(
uk
))
{
if
(
Objects
.
isNull
(
uk
))
{
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
...
@@ -90,12 +90,11 @@ public class Baijia {
...
@@ -90,12 +90,11 @@ public class Baijia {
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
try
{
try
{
String
url
=
"https://author.baidu.com/list?type=article&tab=2&uk="
+
uk
+
"&ctime="
+
ctime
+
"&num=50"
;
String
url
=
"https://author.baidu.com/list?type=article&tab=2&uk="
+
uk
+
"&ctime="
+
ctime
+
"&num=50"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
),
proxy
).
body
().
string
();
String
result
=
httpBoot
.
syncCall
(
request
,
proxy
).
body
().
string
();
Map
<
String
,
Object
>
dMap
=
baijiaAccountAnalysis
.
getBaijiaAccountData3
(
result
,
name
,
startTime
);
Map
<
String
,
Object
>
dMap
=
baijiaAccountAnalysis
.
getBaijiaAccountData3
(
result
,
name
,
startTime
);
List
<
Map
<
String
,
Object
>>
dList
=
(
List
<
Map
<
String
,
Object
>>)
dMap
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
dList
=
(
List
<
Map
<
String
,
Object
>>)
dMap
.
get
(
"data"
);
dataList
.
addAll
(
dList
);
dataList
.
addAll
(
dList
);
logger
.
info
(
"{} 数据采集结果 {}"
,
app
_i
d
,
dataList
.
size
());
logger
.
info
(
"{} 数据采集结果 {}"
,
app
I
d
,
dataList
.
size
());
if
(!(
boolean
)
dMap
.
get
(
"more"
))
{
if
(!(
boolean
)
dMap
.
get
(
"more"
))
{
f
=
false
;
f
=
false
;
}
}
...
@@ -111,9 +110,9 @@ public class Baijia {
...
@@ -111,9 +110,9 @@ public class Baijia {
return
dataList
;
return
dataList
;
}
}
private
static
String
getUkData
(
String
app
_id
,
Proxy
proxy
,
String
cookie
)
{
private
static
String
getUkData
(
String
app
Id
,
ProxyHolder
proxy
,
String
cookie
)
{
String
url
=
"https://author.baidu.com/profile?context={\"from\":0,\"app_id\":\""
String
url
=
"https://author.baidu.com/profile?context={\"from\":0,\"app_id\":\""
+
app
_i
d
+
"\"}&cmdType=&pagelets[]=root&reqID=0&ispeed=1#"
;
+
app
I
d
+
"\"}&cmdType=&pagelets[]=root&reqID=0&ispeed=1#"
;
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
headers
.
put
(
"Host"
,
"author.baidu.com"
);
headers
.
put
(
"Host"
,
"author.baidu.com"
);
headers
.
put
(
"cookie"
,
cookie
);
headers
.
put
(
"cookie"
,
cookie
);
...
...
src/main/java/com/zhiwei/parse/Douban.java
View file @
3e350f8b
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
...
@@ -2,7 +2,6 @@ package com.zhiwei.parse;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -16,6 +15,7 @@ import org.slf4j.Logger;
...
@@ -16,6 +15,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.parse.analysis.DoubanCommentAnalysis
;
import
com.zhiwei.parse.analysis.DoubanCommentAnalysis
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
@@ -36,7 +36,7 @@ public class Douban {
...
@@ -36,7 +36,7 @@ public class Douban {
* @param stime
* @param stime
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
doubanTopicGetByWord
(
String
word
,
Proxy
proxy
,
String
cookie
,
String
stime
)
{
public
static
List
<
Map
<
String
,
Object
>>
doubanTopicGetByWord
(
String
word
,
Proxy
Holder
proxy
,
String
cookie
,
String
stime
)
{
int
page
=
0
;
int
page
=
0
;
int
count
=
20
;
int
count
=
20
;
boolean
more
=
true
;
boolean
more
=
true
;
...
@@ -105,7 +105,7 @@ public class Douban {
...
@@ -105,7 +105,7 @@ public class Douban {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getDoubanComment
(
String
url
,
Proxy
proxy
,
String
cookie
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDoubanComment
(
String
url
,
Proxy
Holder
proxy
,
String
cookie
)
{
if
(
url
.
contains
(
"#"
))
{
if
(
url
.
contains
(
"#"
))
{
url
=
url
.
split
(
"#"
)[
0
];
url
=
url
.
split
(
"#"
)[
0
];
}
}
...
...
src/main/java/com/zhiwei/parse/Maimai.java
View file @
3e350f8b
...
@@ -44,7 +44,7 @@ public class Maimai {
...
@@ -44,7 +44,7 @@ public class Maimai {
* @return
* @return
*/
*/
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
key
,
String
cookie
,
String
time
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
boolean
f
=
true
;
boolean
f
=
true
;
...
@@ -82,7 +82,7 @@ public class Maimai {
...
@@ -82,7 +82,7 @@ public class Maimai {
* @return
* @return
*/
*/
@SuppressWarnings
(
"unchecked"
)
@SuppressWarnings
(
"unchecked"
)
public
static
List
<
Map
<
String
,
Object
>>
getDataByNoName
(
String
key
,
String
cookie
,
String
time
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDataByNoName
(
String
key
,
String
cookie
,
String
time
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMaimaiKeywordHeaderMap
(
cookie
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
boolean
f
=
true
;
boolean
f
=
true
;
...
@@ -94,7 +94,7 @@ public class Maimai {
...
@@ -94,7 +94,7 @@ public class Maimai {
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getDataByNoName
(
result
,
time
,
key
);
Map
<
String
,
Object
>
map
=
maimaiBywordAnalysis
.
getDataByNoName
(
result
,
time
,
key
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
f
=
(
boolean
)
map
.
get
(
"hasMore"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
List
<
Map
<
String
,
Object
>>
daList
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"data"
);
if
(
daList
!=
null
&&
daList
.
size
()
>
0
)
{
if
(
daList
!=
null
&&
!
daList
.
isEmpty
()
)
{
dataList
.
addAll
(
daList
);
dataList
.
addAll
(
daList
);
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"highlight=true&sortby=time&jsononly=1"
;
url
=
"https://maimai.cn/search/gossips?query="
+
URLEncoder
.
encode
(
key
,
"utf-8"
)+
"&limit=20&offset="
+
i
+
"highlight=true&sortby=time&jsononly=1"
;
i
+=
20
;
i
+=
20
;
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
3e350f8b
...
@@ -12,7 +12,6 @@ import org.slf4j.LoggerFactory;
...
@@ -12,7 +12,6 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.bean.QQkbUser
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
...
@@ -32,7 +31,7 @@ public class QQKB {
...
@@ -32,7 +31,7 @@ public class QQKB {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQAccountData
(
String
child
,
String
cookie
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQAccountData
(
String
child
,
String
cookie
,
Proxy
Holder
proxy
)
{
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
...
...
src/main/java/com/zhiwei/parse/TXNews.java
View file @
3e350f8b
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -31,7 +30,7 @@ public class TXNews {
...
@@ -31,7 +30,7 @@ public class TXNews {
public
static
boolean
txNewshasMoreData
=
true
;
public
static
boolean
txNewshasMoreData
=
true
;
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
2
).
build
();
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
devid
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
String
devid
,
Proxy
Holder
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
...
...
src/main/java/com/zhiwei/parse/Wangyi.java
View file @
3e350f8b
...
@@ -36,7 +36,7 @@ public class Wangyi {
...
@@ -36,7 +36,7 @@ public class Wangyi {
* @param id
* @param id
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
id
,
Proxy
proxy
)
{
public
static
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
id
,
Proxy
Holder
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
int
i
=
0
;
int
i
=
0
;
...
...
src/main/java/com/zhiwei/parse/analysis/Ts21cnAnalysis.java
View file @
3e350f8b
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
static
java
.
util
.
Objects
.
isNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
...
...
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
View file @
3e350f8b
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Dayu
;
//
import com.zhiwei.parse.Dayu;
//
public
class
DayuAccountExample
{
//
public class DayuAccountExample {
//
//
@Test
//
@Test
public
void
dayuAccountTest
()
{
//
public void dayuAccountTest() {
//https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true
//
//https://api.m.sm.cn/rest?method=Subscribe.list&format=html&from=wh10331&uc_biz_str=S:custom%7CC:search%7CN:true
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
//
// String mid = "0a8b2360fd8b4ded971cd324a56d32f0";
//
//
String mid = "0a8b2360fd8b4ded971cd324a56d32f0";
// String name = "大鱼海棠雨";
//
//
String name = "大鱼海棠雨";
String
startTime
=
"2017-01-01 00:00:00"
;
//
String startTime = "2017-01-01 00:00:00";
String
path
=
"D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx"
;
//
String path = "D:\\crawlerdata\\自媒体\\大鱼历史文章.xlsx";
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
path
,
0
);
//
Map<String,Object> map = poi.importExcel(path, 0);
List
<
Map
<
String
,
Object
>>
lists
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
//
List<Map<String,Object>> lists = (List<Map<String, Object>>) map.get("body");
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"url"
);
//
headList.add("url");
// headList.add("content_id");
//
//
headList.add("content_id");
// headList.add("origin_id");
//
//
headList.add("origin_id");
// headList.add("xss_item_id");
//
//
headList.add("xss_item_id");
for
(
Map
<
String
,
Object
>
data
:
lists
)
{
//
for(Map<String,Object> data : lists) {
String
mid
=
data
.
get
(
"mid"
)+
""
;
//
String mid = data.get("mid")+"";
String
name
=
data
.
get
(
"name"
)+
""
;
//
String name = data.get("name")+"";
mid
=
"7b345070c4124574b9cbcab8c4a1aeb8"
;
//
mid = "7b345070c4124574b9cbcab8c4a1aeb8";
name
=
"国魂"
;
//
name = "国魂";
if
(
mid
.
length
()
<
1
&&
name
.
length
()
<
1
)
{
//
if(mid.length() < 1 && name.length() < 1) {
continue
;
//
continue;
}
//
}
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuAccountData
(
mid
,
name
,
null
,
null
);
//
List<Map<String,Object>> dataList = Dayu.getDayuAccountData(mid,name,null,null);
poi
.
exportExcel
(
path
,
name
,
headList
,
dataList
);
//
poi.exportExcel(path, name, headList, dataList);
}
//
}
//
//
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/crawler/YidainzixunByWordExample.java
View file @
3e350f8b
package
com
.
zhiwei
.
crawler
;
//
package com.zhiwei.crawler;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Yidianzixun
;
//
import com.zhiwei.parse.Yidianzixun;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
YidainzixunByWordExample
{
//
public class YidainzixunByWordExample {
//
@Test
//
@Test
public
void
yidianzixunByWordTest
()
{
//
public void yidianzixunByWordTest() {
//
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata/关键词.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata/关键词.txt");
List
<
Map
<
String
,
Object
>>
listAll
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> listAll = new ArrayList<Map<String,Object>>();
for
(
String
word
:
wordList
)
{
//
for(String word : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunDataByWord
(
word
,
null
);
//
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunDataByWord(word,null);
System
.
out
.
println
(
dataList
.
size
());
//
System.out.println(dataList.size());
listAll
.
addAll
(
dataList
);
//
listAll.addAll(dataList);
System
.
out
.
println
(
listAll
.
size
());
//
System.out.println(listAll.size());
}
//
}
//
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"docid"
);
//
headList.add("docid");
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"comment_count"
);
//
headList.add("comment_count");
headList
.
add
(
"summary"
);
//
headList.add("summary");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"wm_copyright"
);
//
headList.add("wm_copyright");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"url"
);
//
headList.add("url");
System
.
out
.
println
(
listAll
.
size
());
//
System.out.println(listAll.size());
poi
.
exportExcel
(
"D://crawlerdata/一点资讯-软博会.xlsx"
,
"asd"
,
headList
,
listAll
);
//
poi.exportExcel("D://crawlerdata/一点资讯-软博会.xlsx", "asd", headList, listAll);
//
//
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/hsitory/YidianzixunAccountExample.java
View file @
3e350f8b
package
com
.
zhiwei
.
hsitory
;
//
package com.zhiwei.hsitory;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
//
import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Yidianzixun
;
//
import com.zhiwei.parse.Yidianzixun;
//
//
public
class
YidianzixunAccountExample
{
//
public class YidianzixunAccountExample {
//
@Test
//
@Test
public
void
yidianzixunAccountTest
()
{
//
public void yidianzixunAccountTest() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER);
String
channelid
=
"m190159"
;
//
String channelid = "m190159";
String
startTime
=
"2007-01-01 00:00:00"
;
//
String startTime = "2007-01-01 00:00:00";
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunAccountData
(
channelid
,
startTime
,
ProxyHolder
.
NAT_HEAVY_PROXY
,
null
);
//
List<Map<String,Object>> dataList = Yidianzixun.getYidianzixunAccountData(channelid, startTime,ProxyHolder.NAT_HEAVY_PROXY,null);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"comment_count"
);
//
headList.add("comment_count");
headList
.
add
(
"ctype"
);
//
headList.add("ctype");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"summary"
);
//
headList.add("summary");
poi
.
exportExcel
(
"D://crawlerdata//历史文章采集/一点资讯-新华社中国新三板.xlsx"
,
"新华社中国新三板"
,
headList
,
dataList
);
//
poi.exportExcel("D://crawlerdata//历史文章采集/一点资讯-新华社中国新三板.xlsx", "新华社中国新三板", headList, dataList);
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/keyword/XueqiuKeyWord.java
View file @
3e350f8b
package
com
.
zhiwei
.
keyword
;
//
package com.zhiwei.keyword;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Xueqiu
;
//
import com.zhiwei.parse.Xueqiu;
//
public
class
XueqiuKeyWord
{
//
public class XueqiuKeyWord {
//
@Test
//
@Test
public
void
f
()
{
//
public void f() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
String
word
=
"软博会|软件博览会"
;
//
String word = "软博会|软件博览会";
String
endTime
=
"2018-01-01 00:00:00"
;
//
String endTime = "2018-01-01 00:00:00";
String
cookie
=
"aliyungf_tc=AQAAADi3G3I7GAEAgtgMPF/W3FeFauXk; device_id=3221abfd50f9f8be2abca9182c338c9e; Hm_lvt_1db88642e346389874251b5a1eded6e3=1561962809; s=d911zeymzo; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=b2a8d68c56272cca3b71e1bcaf04b744b16e9497; xq_a_token.sig=hj7OrhYid1GuAaF-AmLYrPuyFDk; xqat=b2a8d68c56272cca3b71e1bcaf04b744b16e9497; xqat.sig=gker8dcZJ2Ez4s58eIGPa5zCd0w; xq_r_token=b0ad7b059411acac8c9e2f0a40336804bb60d047; xq_r_token.sig=DwNSLIigE8xtwQKLygagcM28Dd0; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1273068356; u.sig=UyqQW-br8hcsOyT6anFmS_SFpCs; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1561963129"
;
//
String cookie = "aliyungf_tc=AQAAADi3G3I7GAEAgtgMPF/W3FeFauXk; device_id=3221abfd50f9f8be2abca9182c338c9e; Hm_lvt_1db88642e346389874251b5a1eded6e3=1561962809; s=d911zeymzo; remember=1; remember.sig=K4F3faYzmVuqC0iXIERCQf55g2Y; xq_a_token=b2a8d68c56272cca3b71e1bcaf04b744b16e9497; xq_a_token.sig=hj7OrhYid1GuAaF-AmLYrPuyFDk; xqat=b2a8d68c56272cca3b71e1bcaf04b744b16e9497; xqat.sig=gker8dcZJ2Ez4s58eIGPa5zCd0w; xq_r_token=b0ad7b059411acac8c9e2f0a40336804bb60d047; xq_r_token.sig=DwNSLIigE8xtwQKLygagcM28Dd0; xq_is_login=1; xq_is_login.sig=J3LxgPVPUzbBg3Kee_PquUfih7Q; u=1273068356; u.sig=UyqQW-br8hcsOyT6anFmS_SFpCs; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1561963129";
//
//
//
String
[]
words
=
word
.
split
(
"\\|"
);
//
String[] words = word.split("\\|");
//
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
String
w
:
words
)
{
//
for(String w : words) {
System
.
out
.
println
(
w
);
//
System.out.println(w);
//
List
<
Map
<
String
,
Object
>>
dataList
=
Xueqiu
.
getData
(
w
,
endTime
,
null
,
cookie
);
//
List<Map<String, Object>> dataList = Xueqiu.getData(w, endTime, null, cookie);
System
.
out
.
println
(
w
+
" ---- "
+
dataList
.
size
());
//
System.out.println(w + " ---- " + dataList.size());
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"uper"
);
//
headList.add("uper");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"likeCount"
);
//
headList.add("likeCount");
headList
.
add
(
"replyCount"
);
//
headList.add("replyCount");
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\雪球网页采集-软博会-relevance.xlsx"
,
"马化腾"
,
headList
,
bodyList
);
//
poi.exportExcel("D:\\crawlerdata\\自媒体\\雪球网页采集-软博会-relevance.xlsx", "马化腾", headList, bodyList);
//
}
//
}
}
//
}
src/test/java/com/zhiwei/shipin/AiqiyiTest.java
View file @
3e350f8b
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
//
import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Aiqiyi
;
//
import com.zhiwei.parse.Aiqiyi;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
AiqiyiTest
{
//
public class AiqiyiTest {
@Test
//
@Test
public
void
aiqiyiTest
()
{
//
public void aiqiyiTest() {
//
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181","local",GroupType.PROVIDER);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
//
List<Map<String,Object>> bodyList = new ArrayList<Map<String,Object>>();
for
(
String
w
:
wordList
)
{
//
for(String w : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
,
ProxyHolder
.
NAT_HEAVY_PROXY
);
//
List<Map<String,Object>> dataList = Aiqiyi.getAiqiyiByWordData(w,ProxyHolder.NAT_HEAVY_PROXY);
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
//
if(dataList != null && dataList.size() >= 1) {
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
}
//
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
//
List<String> headList = new ArrayList<String>();
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"source"
);
//
headList.add("source");
headList
.
add
(
"content"
);
//
headList.add("content");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"word"
);
//
headList.add("word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata//视频/爱奇艺关键词采集-毓婷-0716.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
poi.exportExcel("D://crawlerdata//视频/爱奇艺关键词采集-毓婷-0716.xlsx", "数据", headList, bodyList);
//
//
//
//
}
//
}
//
}
//
}
src/test/java/com/zhiwei/shipin/BilibiliTest.java
View file @
3e350f8b
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.BiliBili
;
//
import com.zhiwei.parse.BiliBili;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
BilibiliTest
{
//
public class BilibiliTest {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词-1.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词-1.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
List<Map<String, Object>> bodyList = new ArrayList<>();
String
cookie
=
"LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274"
;
//
String cookie = "LIVE_BUVID=AUTO8715300758995538; sid=kp5rluge; fts=1530161621; im_notify_type_35324319=0; buvid3=08ABE6AE-5061-4CE5-B34F-1A8AAB64DB3320712infoc; rpdid=olppsmkxmpdoskwoxiwww; finger=edc6ecda; stardustvideo=1; UM_distinctid=164fe68fb31996-01f161c3523abe-6114167a-1fa400-164fe68fb32274";
for
(
String
word
:
wordList
)
{
//
for (String word : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
BiliBili
.
getData
(
word
,
null
,
"2019-07-18 00:00:00"
,
//
List<Map<String, Object>> dataList = BiliBili.getData(word, null, "2019-07-18 00:00:00",
cookie
);
//
cookie);
if
(
dataList
!=
null
)
{
//
if (dataList != null) {
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
//
System.out.println(word + " ----- " + dataList.size());
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
}
//
}
List
<
String
>
headlist
=
new
ArrayList
<>();
//
List<String> headlist = new ArrayList<>();
headlist
.
add
(
"submitcount"
);
//
headlist.add("submitcount");
headlist
.
add
(
"playcount"
);
//
headlist.add("playcount");
headlist
.
add
(
"time"
);
//
headlist.add("time");
headlist
.
add
(
"source"
);
//
headlist.add("source");
headlist
.
add
(
"title"
);
//
headlist.add("title");
headlist
.
add
(
"url"
);
//
headlist.add("url");
headlist
.
add
(
"word"
);
//
headlist.add("word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata//视频//bilibili关键词采集数据-吃鸡否-0722.xlsx"
,
"B站数据"
,
headlist
,
bodyList
);
//
poi.exportExcel("D://crawlerdata//视频//bilibili关键词采集数据-吃鸡否-0722.xlsx", "B站数据", headlist, bodyList);
//
}
//
}
}
//
}
src/test/java/com/zhiwei/shipin/QQTVTest.java
View file @
3e350f8b
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
//
import com.zhiwei.crawler.proxy.ProxyHolder;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.shipin.QQTV
;
//
import com.zhiwei.parse.shipin.QQTV;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//
import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
QQTVTest
{
//
public class QQTVTest {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
GroupType
.
PROVIDER
);
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",GroupType.PROVIDER);
String
time
=
"2019-04-11 00:00:00"
;
//
String time = "2019-04-11 00:00:00";
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
List<Map<String, Object>> bodyList = new ArrayList<>();
for
(
String
word
:
wordList
)
{
//
for (String word : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
QQTV
.
getData
(
word
,
time
,
ProxyHolder
.
NAT_HEAVY_PROXY
);
//
List<Map<String, Object>> dataList = QQTV.getData(word,time, ProxyHolder.NAT_HEAVY_PROXY);
if
(
dataList
!=
null
)
{
//
if (dataList != null) {
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
//
System.out.println(word + " ----- " + dataList.size());
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
ZhiWeiTools
.
sleep
(
1000
);
//
ZhiWeiTools.sleep(1000);
}
//
}
List
<
String
>
headlist
=
new
ArrayList
<>();
//
List<String> headlist = new ArrayList<>();
headlist
.
add
(
"playCount"
);
//
headlist.add("playCount");
headlist
.
add
(
"time"
);
//
headlist.add("time");
headlist
.
add
(
"source"
);
//
headlist.add("source");
headlist
.
add
(
"title"
);
//
headlist.add("title");
headlist
.
add
(
"url"
);
//
headlist.add("url");
headlist
.
add
(
"word"
);
//
headlist.add("word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata//视频//腾讯视频关键词采集数据-毓婷-0716.xlsx"
,
"腾讯视频数据"
,
headlist
,
bodyList
);
//
poi.exportExcel("D://crawlerdata//视频//腾讯视频关键词采集数据-毓婷-0716.xlsx", "腾讯视频数据", headlist, bodyList);
//
//
//
//
}
//
}
}
//
}
src/test/java/com/zhiwei/shipin/SohuTVTest.java
View file @
3e350f8b
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.shipin.SohuTV
;
//
import com.zhiwei.parse.shipin.SohuTV;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//
import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
SohuTVTest
{
//
public class SohuTVTest {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
//
List<String> wordList = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
List<Map<String, Object>> bodyList = new ArrayList<>();
String
cookie
=
"SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321"
;
//
String cookie = "SUV=1901101134139015; IPLOC=CN3301; gidinf=x099980109ee0f08567b42835000336ade2ef3762611; fuid=15474616189304048886; newpuid=15474616191372936893; beans_mz_userid=UBThg01XRPg8; pmai=dad35c1c318bdd22; ifoxinstalled=false; beans_freq=1; beans_dmp=%7B%22admaster%22%3A1547461620%2C%22shunfei%22%3A1547461620%2C%22reachmax%22%3A1548816807%2C%22lingji%22%3A1547461620%2C%22yoyi%22%3A1547461620%2C%22ipinyou%22%3A1547461620%2C%22ipinyou_admaster%22%3A1547461620%2C%22miaozhen%22%3A1548816807%2C%22diantong%22%3A1547461620%2C%22huayang%22%3A1547461620%7D; beans_dmp_done=1; reqtype=pc; sokey=%5B%7B%22key%22%3A%22%E7%BE%8E%E5%9B%A2%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%22%7D%2C%7B%22key%22%3A%22%E5%B8%AE%E5%AE%9D%E9%80%82%20%E4%BA%8C%E5%99%81%E8%8B%B1%22%7D%5D; t=1548817812321";
for
(
String
word
:
wordList
)
{
//
for (String word : wordList) {
List
<
Map
<
String
,
Object
>>
dataList
=
SohuTV
.
sohuTVData
(
word
,
cookie
,
null
);
//
List<Map<String, Object>> dataList = SohuTV.sohuTVData(word, cookie, null);
if
(
dataList
!=
null
)
{
//
if (dataList != null) {
System
.
out
.
println
(
word
+
" ----- "
+
dataList
.
size
());
//
System.out.println(word + " ----- " + dataList.size());
bodyList
.
addAll
(
dataList
);
//
bodyList.addAll(dataList);
}
//
}
ZhiWeiTools
.
sleep
(
1000
);
//
ZhiWeiTools.sleep(1000);
}
//
}
List
<
String
>
headlist
=
new
ArrayList
<>();
//
List<String> headlist = new ArrayList<>();
headlist
.
add
(
"playCount"
);
//
headlist.add("playCount");
headlist
.
add
(
"time"
);
//
headlist.add("time");
headlist
.
add
(
"source"
);
//
headlist.add("source");
headlist
.
add
(
"title"
);
//
headlist.add("title");
headlist
.
add
(
"url"
);
//
headlist.add("url");
headlist
.
add
(
"word"
);
//
headlist.add("word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata//视频//搜狐视频关键词采集数据-毓婷-0716.xlsx"
,
"搜狐数据"
,
headlist
,
bodyList
);
//
poi.exportExcel("D://crawlerdata//视频//搜狐视频关键词采集数据-毓婷-0716.xlsx", "搜狐数据", headlist, bodyList);
//
}
//
}
}
//
}
src/test/java/com/zhiwei/shipin/YoukuKeyWordTest.java
View file @
3e350f8b
package
com
.
zhiwei
.
shipin
;
//
package com.zhiwei.shipin;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.junit.Test
;
//
import org.junit.Test;
//
import
com.zhiwei.common.config.GroupType
;
//
import com.zhiwei.common.config.GroupType;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
//
import com.zhiwei.crawler.proxy.ProxyFactory;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
//
import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import
com.zhiwei.parse.Youku
;
//
import com.zhiwei.parse.Youku;
import
com.zhiwei.util.WordReadFile
;
//
import com.zhiwei.util.WordReadFile;
//
public
class
YoukuKeyWordTest
{
//
public class YoukuKeyWordTest {
@Test
//
@Test
public
void
f
()
{
//
public void f() {
ProxyFactory
.
init
(
"zookeeper://192.168.0.36:2181"
,
"local"
,
//
ProxyFactory.init("zookeeper://192.168.0.36:2181", "local",
GroupType
.
PROVIDER
);
//
GroupType.PROVIDER);
List
<
String
>
words
=
WordReadFile
.
getWords
(
"D://crawlerdata//关键词.txt"
);
//
List<String> words = WordReadFile.getWords("D://crawlerdata//关键词.txt");
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
//
List<Map<String,Object>> bodyList = new ArrayList<>();
for
(
String
w
:
words
)
{
//
for(String w : words) {
System
.
out
.
println
(
w
);
//
System.out.println(w);
bodyList
.
addAll
(
Youku
.
getDataList
(
w
));
//
bodyList.addAll(Youku.getDataList(w));
}
//
}
List
<
String
>
headList
=
new
ArrayList
<>();
//
List<String> headList = new ArrayList<>();
headList
.
add
(
"title"
);
//
headList.add("title");
headList
.
add
(
"time"
);
//
headList.add("time");
headList
.
add
(
"url"
);
//
headList.add("url");
headList
.
add
(
"uper"
);
//
headList.add("uper");
headList
.
add
(
"word"
);
//
headList.add("word");
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
//
PoiExcelUtil poi = PoiExcelUtil.getInstance();
poi
.
exportExcel
(
"D://crawlerdata//视频//优酷数据-毓婷-0716.xlsx"
,
"数据"
,
headList
,
bodyList
);
//
poi.exportExcel("D://crawlerdata//视频//优酷数据-毓婷-0716.xlsx", "数据", headList, bodyList);
//
}
//
}
}
//
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment