Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
36eb5887
Commit
36eb5887
authored
Nov 30, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
投诉网站提交
parent
67a6c8f2
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
519 additions
and
10 deletions
+519
-10
src/main/java/com/zhiwei/parse/Gftai.java
+54
-0
src/main/java/com/zhiwei/parse/KuaiTousu.java
+53
-0
src/main/java/com/zhiwei/parse/SinaTousu.java
+60
-0
src/main/java/com/zhiwei/parse/analysis/GftaiAnalysis.java
+52
-0
src/main/java/com/zhiwei/parse/analysis/KuaiTousuAnalysis.java
+55
-0
src/main/java/com/zhiwei/parse/analysis/SinaTousuAnalysis.java
+51
-0
src/test/java/com/zhiwei/Comment/FenghuangCommentExample.java
+61
-0
src/test/java/com/zhiwei/Comment/QicheComment.java
+2
-2
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
+17
-5
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
+5
-3
src/test/java/com/zhiwei/keyword/GftaiTest.java
+33
-0
src/test/java/com/zhiwei/keyword/KuaiTousuTest.java
+38
-0
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
+38
-0
No files found.
src/main/java/com/zhiwei/parse/Gftai.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.parse.analysis.GftaiAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
Gftai
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
Gftai
.
class
);
private
static
GftaiAnalysis
gftaiAnalysis
=
new
GftaiAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
int
page
=
1
;
int
count
=
1
;
while
(
true
)
{
try
{
String
url
=
"http://www.gftai.com/gftso?t=xyts&kd="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&sid=24&rn=10&pn="
+
page
;
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
List
<
Map
<
String
,
Object
>>
list
=
gftaiAnalysis
.
getData
(
result
);
dataList
.
addAll
(
list
);
logger
.
info
(
"采集第 {} 页 ,一共采集到 {} 条"
,
page
,
dataList
.
size
());
if
(
list
.
size
()
<
5
)
{
break
;
}
page
++;
ZhiWeiTools
.
sleep
(
2500
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {}"
,
e
);
count
++;
if
(
count
>
3
)
{
break
;
}
}
}
return
dataList
;
}
}
src/main/java/com/zhiwei/parse/KuaiTousu.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.parse.analysis.KuaiTousuAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
KuaiTousu
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
KuaiTousu
.
class
);
private
static
KuaiTousuAnalysis
kuaiTousuAnalysis
=
new
KuaiTousuAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
Proxy
proxy
)
{
int
page
=
1
;
int
count
=
1
;
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
while
(
true
)
{
try
{
String
url
=
"http://ts.gd.sina.com.cn/list/latestv1/key/"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"/p/"
+
page
+
".html"
;
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxy
).
body
().
string
();
List
<
Map
<
String
,
Object
>>
dataList
=
kuaiTousuAnalysis
.
getData
(
result
);
bodyList
.
addAll
(
dataList
);
logger
.
info
(
"采集到第{}页,一共 采集到 {}"
,
page
,
bodyList
.
size
());
if
(
dataList
.
isEmpty
())
{
break
;
}
page
++;
ZhiWeiTools
.
sleep
(
2000
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {} "
,
e
);
count
++;
if
(
count
>
3
)
{
break
;
}
}
}
return
Collections
.
emptyList
();
}
}
src/main/java/com/zhiwei/parse/SinaTousu.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.parse.analysis.SinaTousuAnalysis
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SinaTousu
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SinaTousu
.
class
);
private
static
SinaTousuAnalysis
sinaTousuAnalysis
=
new
SinaTousuAnalysis
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
public
static
List
<
Map
<
String
,
Object
>>
getSinaTousuData
(
String
word
,
Proxy
proxy
,
String
time
)
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
int
page
=
1
;
int
count
=
1
;
while
(
true
)
{
try
{
if
(
count
>
3
)
{
break
;
}
String
url
=
"https://tousu.sina.com.cn/api/index/s?keywords="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)+
"&page_size=100&page="
;
String
result
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
+
page
),
proxy
).
body
().
string
();
List
<
Map
<
String
,
Object
>>
dataList
=
sinaTousuAnalysis
.
getData
(
result
,
time
);
if
(
dataList
.
isEmpty
())
{
break
;
}
bodyList
.
addAll
(
dataList
);
logger
.
info
(
"黑猫投诉 关键词采集 第{}页 ,一共采集到数据 {} "
,
page
,
bodyList
.
size
());
page
++;
ZhiWeiTools
.
sleep
(
3000
);
}
catch
(
UnsupportedEncodingException
e
)
{
count
++;
logger
.
error
(
"UnsupportedEncodingException {}"
,
e
);
}
catch
(
IOException
e
)
{
count
++;
logger
.
error
(
"IOException {}"
,
e
);
}
}
return
bodyList
;
}
}
src/main/java/com/zhiwei/parse/analysis/GftaiAnalysis.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
public
class
GftaiAnalysis
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
GftaiAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
)
{
try
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Document
doc
=
Jsoup
.
parse
(
result
);
Elements
elements
=
doc
.
select
(
"div.se_container_left > ul"
);
if
(
nonNull
(
elements
))
{
for
(
Element
element
:
elements
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
title
=
element
.
select
(
"li>a"
).
text
();
String
url
=
element
.
select
(
"li>a"
).
attr
(
"href"
);
String
content
=
element
.
select
(
"li.se_result_con"
).
text
();
String
time
=
element
.
select
(
"p.search_quick"
).
text
().
split
(
"\\.\\.\\."
)[
1
].
trim
();
map
.
put
(
"title"
,
title
);
map
.
put
(
"url"
,
url
);
map
.
put
(
"content"
,
content
);
map
.
put
(
"time"
,
time
);
map
.
put
(
"source"
,
"国富泰信用"
);
dataList
.
add
(
map
);
}
}
return
dataList
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {}"
,
e
);
}
return
Collections
.
emptyList
();
}
}
src/main/java/com/zhiwei/parse/analysis/KuaiTousuAnalysis.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
public
class
KuaiTousuAnalysis
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
KuaiTousuAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
)
{
try
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<>();
Document
doc
=
Jsoup
.
parse
(
result
);
Elements
elements
=
doc
.
select
(
"div.ts-list > div.ts-list-item.haspic"
);
if
(
nonNull
(
elements
))
{
for
(
Element
element
:
elements
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
String
title
=
element
.
select
(
"div.ts-list-item-title.clearfix > div.title.fl > a"
).
text
();;
String
url
=
element
.
select
(
"div.ts-list-item-title.clearfix > div.title.fl > a"
).
attr
(
"href"
);
String
time
=
element
.
select
(
"div.ts-list-item-date-author > span.date"
).
text
();
String
content
=
element
.
select
(
"div.ts-list-item-txt"
).
text
();
String
source
=
element
.
select
(
"div.ts-list-item-date-author > span.author"
).
text
();
map
.
put
(
"title"
,
title
);
map
.
put
(
"time"
,
time
);
map
.
put
(
"content"
,
content
);
map
.
put
(
"url"
,
url
);
map
.
put
(
"source"
,
source
);
dataList
.
add
(
map
);
}
return
dataList
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"Exception {} "
,
e
);
}
return
Collections
.
emptyList
();
}
}
src/main/java/com/zhiwei/parse/analysis/SinaTousuAnalysis.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
parse
.
analysis
;
import
static
java
.
util
.
Objects
.
nonNull
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.tools.timeparse.TimeParse
;
public
class
SinaTousuAnalysis
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SinaTousuAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getData
(
String
result
,
String
time
)
{
try
{
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONObject
(
"result"
).
getJSONObject
(
"data"
).
getJSONArray
(
"lists"
);
if
(
nonNull
(
jsonArray
))
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
()
;
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
String
ctime
=
TimeParse
.
dateFormartString
(
new
Date
(
data
.
getJSONObject
(
"main"
).
getLong
(
"timestamp"
)*
1000L
),
"yyyy-MM-dd HH:mm:ss"
);
if
(!
nonNull
(
time
)
||
ctime
.
compareTo
(
time
)
<=
0
)
{
continue
;
}
Map
<
String
,
Object
>
map
=
new
HashMap
<>();
map
.
put
(
"title"
,
data
.
getJSONObject
(
"main"
).
getString
(
"title"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"url"
,
"https:"
+
data
.
getJSONObject
(
"main"
).
getString
(
"url"
));
map
.
put
(
"content"
,
data
.
getJSONObject
(
"main"
).
getString
(
"summary"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"time"
,
ctime
);
map
.
put
(
"source"
,
data
.
getJSONObject
(
"author"
).
getString
(
"title"
));
bodyList
.
add
(
map
);
}
return
bodyList
;
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"用户错误信息 {} "
,
e
);
}
return
Collections
.
emptyList
();
}
}
src/test/java/com/zhiwei/Comment/FenghuangCommentExample.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
Comment
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.junit.Test
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Fenghuang
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
FenghuangCommentExample
{
@Test
public
void
fenghuangCommentTest
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体/凤凰评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
String
>
urlList
=
new
ArrayList
<
String
>();
for
(
Map
<
String
,
Object
>
map1
:
list
)
{
String
url
=
""
;
try
{
url
=
map1
.
get
(
"url"
)+
""
;
System
.
out
.
println
(
url
);
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangCommentData2
(
url
,
null
);
if
(
dataList
==
null
||
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
}
if
(
dataList
!=
null
)
{
for
(
Map
<
String
,
Object
>
m
:
dataList
)
{
m
.
put
(
"from_url"
,
url
);
bodyList
.
add
(
m
);
}
}
}
catch
(
Exception
e
)
{
System
.
out
.
println
(
url
);
e
.
printStackTrace
();
continue
;
}
ZhiWeiTools
.
sleep
(
1000
);
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"source"
);
headList
.
add
(
"content"
);
headList
.
add
(
"id"
);
headList
.
add
(
"like"
);
headList
.
add
(
"from"
);
headList
.
add
(
"time"
);
headList
.
add
(
"from_url"
);
for
(
String
s
:
urlList
)
{
System
.
out
.
println
(
s
);
}
poi
.
exportExcel
(
"D://crawlerdata//自媒体/凤凰评论采集.xlsx"
,
"评论采集"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/
keyword
/QicheComment.java
→
src/test/java/com/zhiwei/
Comment
/QicheComment.java
View file @
36eb5887
package
com
.
zhiwei
.
keyword
;
package
com
.
zhiwei
.
Comment
;
import
org.testng.annotations.Test
;
import
com.zhiwei.parse.QicheHome
;
public
class
Qiche
KeyWord
{
public
class
Qiche
Comment
{
@Test
public
void
f
()
{
String
articleid
=
"922761"
;
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
View file @
36eb5887
...
...
@@ -8,17 +8,30 @@ import org.junit.Test;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.QQKB
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
QQKBCommentExample
{
//天天快报与腾讯新闻都可用 不用cookie
@Test
public
void
qqkbCommentTest
()
{
String
url
=
"http
://op.inews.qq.com/m/20180424A0309700?refer=100000355&chl_code=auto&h=
0"
;
String
url
=
"http
s://kuaibao.qq.com/s/20181122A11WQB0
0"
;
//https://kuaibao.qq.com/s/20180423A1PI7400?refer=kb_news
// https://kuaibao.qq.com/s/20180423A0L60800?refer=kb_news
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
url
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata//自媒体/快报评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
Map
<
String
,
Object
>
m
:
list
)
{
String
u
=
m
.
get
(
"地址"
).
toString
();
System
.
out
.
println
(
u
);
ZhiWeiTools
.
sleep
(
2000
);
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
u
,
null
);
if
(
dataList
!=
null
)
{
bodyList
.
addAll
(
dataList
);
}
}
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"reply_id"
);
//id
headList
.
add
(
"like"
);
//点赞数
...
...
@@ -26,9 +39,8 @@ public class QQKBCommentExample {
headList
.
add
(
"reply_num"
);
//回复数
headList
.
add
(
"time"
);
//时间
headList
.
add
(
"content"
);
//内容
System
.
out
.
println
(
dataList
.
size
());
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D:\\crawlerdata\\快报评论采集-2.xlsx"
,
"sada"
,
headList
,
dataList
);
System
.
out
.
println
(
bodyList
.
size
());
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\快报评论采集-zhj.xlsx"
,
"sada"
,
headList
,
bodyList
);
}
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
View file @
36eb5887
...
...
@@ -9,6 +9,7 @@ import org.junit.Test;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Fenghuang
;
import
com.zhiwei.parse.Souhu
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
public
class
SouhuCommentExample
{
...
...
@@ -16,7 +17,7 @@ public class SouhuCommentExample {
public
void
souhuCommentTest
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata/搜狐评论采集.xlsx"
,
0
);
Map
<
String
,
Object
>
map
=
poi
.
importExcel
(
"D://crawlerdata/
/自媒体//
搜狐评论采集.xlsx"
,
0
);
List
<
Map
<
String
,
Object
>>
list
=
(
List
<
Map
<
String
,
Object
>>)
map
.
get
(
"body"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
String
>
urlList
=
new
ArrayList
<
String
>();
...
...
@@ -24,11 +25,12 @@ public class SouhuCommentExample {
String
url
=
""
;
try
{
url
=
map1
.
get
(
"url"
)+
""
;
System
.
out
.
println
(
url
);
List
<
Map
<
String
,
Object
>>
dataList
=
Souhu
.
getSouhuCommentData
(
url
,
null
);
if
(
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
}
ZhiWeiTools
.
sleep
(
2000
);
if
(
dataList
!=
null
)
{
bodyList
.
addAll
(
dataList
);
}
...
...
@@ -50,7 +52,7 @@ public class SouhuCommentExample {
for
(
String
s
:
urlList
)
{
System
.
out
.
println
(
s
);
}
poi
.
exportExcel
(
"D://crawlerdata/搜狐评论采集.xlsx"
,
"搜狐评论"
,
headList
,
bodyList
);
poi
.
exportExcel
(
"D://crawlerdata/
/自媒体//
搜狐评论采集.xlsx"
,
"搜狐评论"
,
headList
,
bodyList
);
}
...
...
src/test/java/com/zhiwei/keyword/GftaiTest.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
keyword
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Gftai
;
public
class
GftaiTest
{
@Test
public
void
f
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
words
=
"民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"
;
String
[]
ws
=
words
.
split
(
"\\|"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
word
:
ws
)
{
List
<
Map
<
String
,
Object
>>
list
=
Gftai
.
getData
(
word
,
null
);
bodyList
.
addAll
(
list
);
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
}
List
<
String
>
headList
=
new
ArrayList
<>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"content"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\国富泰信用.xlsx"
,
"数据"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/keyword/KuaiTousuTest.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
keyword
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.Gftai
;
import
com.zhiwei.parse.KuaiTousu
;
public
class
KuaiTousuTest
{
@Test
public
void
f
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
words
=
"民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"
;
String
[]
ws
=
words
.
split
(
"\\|"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
word
:
ws
)
{
List
<
Map
<
String
,
Object
>>
list
=
KuaiTousu
.
getData
(
word
,
null
);
bodyList
.
addAll
(
list
);
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
}
List
<
String
>
headList
=
new
ArrayList
<>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"content"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\新浪广东快投诉.xlsx"
,
"数据"
,
headList
,
bodyList
);
}
}
src/test/java/com/zhiwei/keyword/SinaTousuTest.java
0 → 100644
View file @
36eb5887
package
com
.
zhiwei
.
keyword
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map
;
import
org.testng.annotations.Test
;
import
com.zhiwei.excelpoi.excel.PoiExcelUtil
;
import
com.zhiwei.parse.KuaiTousu
;
import
com.zhiwei.parse.SinaTousu
;
public
class
SinaTousuTest
{
@Test
public
void
getSinaTousuData
()
{
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
String
words
=
"民宿|短租|住宿|途家|爱彼迎|小猪短租|榛果民宿|Airbnb"
;
String
[]
ws
=
words
.
split
(
"\\|"
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<>();
for
(
String
word
:
ws
)
{
List
<
Map
<
String
,
Object
>>
list
=
SinaTousu
.
getSinaTousuData
(
word
,
null
,
"2018-01-01 00:00:00"
);
bodyList
.
addAll
(
list
);
System
.
out
.
println
(
word
+
" --------- "
+
bodyList
.
size
());
}
List
<
String
>
headList
=
new
ArrayList
<>();
headList
.
add
(
"title"
);
headList
.
add
(
"time"
);
headList
.
add
(
"content"
);
headList
.
add
(
"source"
);
headList
.
add
(
"url"
);
poi
.
exportExcel
(
"D:\\crawlerdata\\自媒体\\投诉\\黑猫投诉.xlsx"
,
"数据"
,
headList
,
bodyList
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment