Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
articlenewscrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
articlenewscrawler
Commits
e77ce092
Commit
e77ce092
authored
May 03, 2018
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加代理爬取 搜狐号增加采集来源
parent
89439323
Hide whitespace changes
Inline
Side-by-side
Showing
59 changed files
with
207 additions
and
186 deletions
+207
-186
src/main/java/com/zhiwei/httpclient/HeadGet.java
+6
-15
src/main/java/com/zhiwei/httpclient/HttpClient.java
+5
-4
src/main/java/com/zhiwei/parse/Aiqiyi.java
+6
-4
src/main/java/com/zhiwei/parse/Baijia.java
+5
-6
src/main/java/com/zhiwei/parse/Dayu.java
+13
-12
src/main/java/com/zhiwei/parse/Fenghuang.java
+11
-11
src/main/java/com/zhiwei/parse/Meipai.java
+4
-3
src/main/java/com/zhiwei/parse/Miaopai.java
+3
-5
src/main/java/com/zhiwei/parse/PearVideo.java
+3
-2
src/main/java/com/zhiwei/parse/QQKB.java
+15
-16
src/main/java/com/zhiwei/parse/Soku.java
+3
-2
src/main/java/com/zhiwei/parse/Souhu.java
+26
-7
src/main/java/com/zhiwei/parse/TXNews.java
+4
-3
src/main/java/com/zhiwei/parse/Wangyi.java
+5
-4
src/main/java/com/zhiwei/parse/XiGua.java
+5
-4
src/main/java/com/zhiwei/parse/Xiaomi.java
+4
-2
src/main/java/com/zhiwei/parse/Yidianzixun.java
+8
-7
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
+6
-5
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
+3
-2
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
+5
-6
src/main/java/com/zhiwei/parse/analysis/DayuCommentAnalysis.java
+6
-5
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
+4
-4
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
+7
-6
src/main/java/com/zhiwei/parse/analysis/MeipaiByWordAnalysis.java
+3
-2
src/main/java/com/zhiwei/parse/analysis/QQKBByWordAnalysis.java
+0
-8
src/main/java/com/zhiwei/parse/analysis/QQKBCommentAnalysis.java
+5
-4
src/main/java/com/zhiwei/parse/analysis/SouhuAccountAnalysis.java
+4
-3
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
+3
-2
src/main/java/com/zhiwei/parse/analysis/WangyiCommentAnalysis.java
+1
-0
src/main/java/com/zhiwei/parse/analysis/YidianzixunCommentAnalysis.java
+3
-2
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/BaijiaAccountExample.java
+2
-2
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/DayuByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/DayuCommentCountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/DayuCommentExample.java
+1
-1
src/test/java/com/zhiwei/crawler/FenghuangAccountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/FenghuangByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/FenghuangCommentCountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/FenghuangCommentExample.java
+1
-1
src/test/java/com/zhiwei/crawler/MeipaiByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/MiaopaiByUrlExample.java
+1
-1
src/test/java/com/zhiwei/crawler/PearVideoByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/QQAccountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
+1
-1
src/test/java/com/zhiwei/crawler/SoKuByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/SouhuAccountExample.java
+2
-1
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
+1
-1
src/test/java/com/zhiwei/crawler/TXNewsByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/WangyiCommentCountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/WangyiCommentExample.java
+1
-1
src/test/java/com/zhiwei/crawler/XiaomiShequByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/XiguaAccountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/XiguaByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/YidainzixunByWordExample.java
+1
-1
src/test/java/com/zhiwei/crawler/YidianzixunAccountExample.java
+1
-1
src/test/java/com/zhiwei/crawler/YidianzixunCommentExample.java
+1
-1
No files found.
src/main/java/com/zhiwei/httpclient/HeadGet.java
View file @
e77ce092
package
com
.
zhiwei
.
httpclient
;
package
com
.
zhiwei
.
httpclient
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.URLEncoder
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
com.alibaba.fastjson.JSONObject
;
import
com.sun.net.httpserver.Headers
;
public
class
HeadGet
{
public
class
HeadGet
{
/**
/**
...
@@ -788,13 +779,13 @@ public class HeadGet {
...
@@ -788,13 +779,13 @@ public class HeadGet {
}
}
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
String
url
=
"https://
news.baidu.com/sn/api/homesubcribe?forum_id=b_1560023960896882
&page=1"
;
String
url
=
"https://
a.jiemian.com/index.php?m=user&a=centerArticle&id=100032140
&page=1"
;
String
cookie
=
"
BAIDUID=4DB3FA13736131DBC2094C010E6EBCB0:FG=1; BIDUPSID=250CCE0442BEBCB3568D8EC515953434; PSTM=1522304033; BDUSS=zJEdDI0WFBCUE05M3BVTlhSbnozYkpUflZveW9aaGZ3ODBVTC1WRzVwaUxkZlphQVFBQUFBJCQAAAAAAAAAAAEAAADTCNY9Y3k5MDkyMDk5NTEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIvozlqL6M5ac; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BD_CK_SAM=1; BDSVRTM=98; BDSFRCVID=9g8sJeC62rdtQM7AdMI6hrB7leHy_qbTH6aoIgcaD_KjQB22bioFEG0PDU8g0KubMyQBogKKKgOTHIjP; H_BDCLCKID_SF=tJPOoD-bJI83fP36qRj8hPCsqxby26nQB2ceaJ5nJDoAoqOVWR5N-T-_-f7H3jbQ5RRb3CnvQpP-HJ7TyfCWM5_PhMbhhUcHKaufKl0MLpbYbb0xynoD-lFzLfnMBMni52OnapT_LIFaMII6D5DaejPShMr2aK6KaI58LRu8Kb7VbIOgDbbkbfJBD4QqhR5na26b3R3v2PoIMnRvhbQDD4t7yajK2-bmaN6A3lQ8aI3oD45HDTopQT8rKqAOK5OibCrpaC_Eab3vOpvTXpO1ytIreGLjt5LHJnFOVbD8bRrEDnukhtu_-P4DePjK-nJZ5m7mXp0b04TPjljgqj7jKU_mBpJbW60qXKb7BPF5BDOkbC86D6K5jjjM-f8X-PcKaD70LPI8Kb7VbprDXbbkbfJBDxc4-U_jB26b3tbe2PoIMnRNjl5tQU47yajK2-tfK64qXl5CyPOJftjT3-opQT8rQb_OK5Oib4jZ-fo9ab3vOpvTXpO1ytIreGKJtTF8fnuOV-35b5rtHJrwMtJo5DCHbq8sq4-O-2Q-5KL--JbMVqC6LtOYyjKJK4Kf2PQ7MGOD3fbdJJjoOJ3n-fOryPIuLGKH5tcy3eTxoUJgQCnJhhvG-xcB0fDebPRiB-b9QgbABftLK-oj-DLmD60h3e; PSINO=5; locale=zh; BDRCVFR[feWj1Vr5u3D]=I67x6TjHwwYf0; FP_UID=f9e064a71741aa2e821e58ca2b30c3da; H_PS_PSSID=1433_21104_20882_20927; userId=1524191310247; Hm_lvt_348091a80fe10e213d94a7de762bbd44=1524191312; Hm_lpvt_348091a80fe10e213d94a7de762bbd44=1524191395
"
;
String
cookie
=
"
pgv_pvi=1395917824; pgv_si=s4065829888
"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
get
BaijiaAccount2HeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
get
AiqiyiBywordHeaderMap
(
cookie
);
// Map<String,Object> paramMap = HeadGet.getTxNewsAccountpageParamMap("1979");
// Map<String,Object> paramMap = HeadGet.getTxNewsAccountpageParamMap("1979");
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
,
headerMap
);
//
System.out.println(result);
System
.
out
.
println
(
result
);
System
.
out
.
println
(
result
.
length
());
System
.
out
.
println
(
result
.
length
());
}
}
}
}
src/main/java/com/zhiwei/httpclient/HttpClient.java
View file @
e77ce092
package
com
.
zhiwei
.
httpclient
;
package
com
.
zhiwei
.
httpclient
;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.net.Proxy
;
import
java.util.Map
;
import
java.util.Map
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
...
@@ -19,9 +20,9 @@ public class HttpClient {
...
@@ -19,9 +20,9 @@ public class HttpClient {
* @return
* @return
* @throws IOException
* @throws IOException
*/
*/
public
static
String
executeHttpRequestGet
(
String
url
,
Map
<
String
,
String
>
headerMap
)
{
public
static
String
executeHttpRequestGet
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
)
{
try
{
try
{
String
result
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
String
result
=
HttpClientTemplateOK
.
get
(
url
,
proxy
,
headerMap
);
return
result
;
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
...
@@ -30,9 +31,9 @@ public class HttpClient {
...
@@ -30,9 +31,9 @@ public class HttpClient {
}
}
public
static
String
executeHttpRequestPost
(
String
url
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
public
static
String
executeHttpRequestPost
(
String
url
,
Proxy
proxy
,
Map
<
String
,
String
>
headerMap
,
Map
<
String
,
Object
>
paramMap
)
{
try
{
try
{
String
result
=
HttpClientTemplateOK
.
post
(
url
,
null
,
headerMap
,
paramMap
);
String
result
=
HttpClientTemplateOK
.
post
(
url
,
proxy
,
headerMap
,
paramMap
);
return
result
;
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
logger
.
error
(
"httpClient 获取数据出现问题:{}"
,
e
.
getMessage
());
...
...
src/main/java/com/zhiwei/parse/Aiqiyi.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -24,18 +25,18 @@ public class Aiqiyi {
...
@@ -24,18 +25,18 @@ public class Aiqiyi {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getAiqiyiByWordData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiBywordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap1
=
HeadGet
.
getAiqiyiHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap1
=
HeadGet
.
getAiqiyiHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
for
(
int
i
=
1
;
i
<=
20
;
i
++)
{
String
url
=
"http://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"
;
String
url
=
"http://so.iqiyi.com/so/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_ctg_%E7%94%9F%E6%B4%BB_t_0_page_"
+
i
+
"_p_1_qc_0_rd__site__m_11_bitrate_?af=true"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
String
>
urlList
=
aiqiyiByWordAnalysis
.
getAiqiyiUrlList
(
result
);
List
<
String
>
urlList
=
aiqiyiByWordAnalysis
.
getAiqiyiUrlList
(
result
);
for
(
String
newurl
:
urlList
)
{
for
(
String
newurl
:
urlList
)
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
Map
<
String
,
Object
>
map
=
aiqiyiByWordAnalysis
.
getAiqiyiData
(
newurl
,
headerMap1
);
Map
<
String
,
Object
>
map
=
aiqiyiByWordAnalysis
.
getAiqiyiData
(
newurl
,
headerMap1
,
proxy
);
if
(
map
!=
null
)
{
if
(
map
!=
null
)
{
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
@@ -45,7 +46,8 @@ public class Aiqiyi {
...
@@ -45,7 +46,8 @@ public class Aiqiyi {
return
dataList
;
return
dataList
;
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
return
null
;
logger
.
info
(
"采集数据出错:{}"
,
e
.
getMessage
());
return
dataList
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/Baijia.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -13,7 +13,6 @@ import com.alibaba.fastjson.JSONObject;
...
@@ -13,7 +13,6 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.BaijiaAccountAnalysis
;
import
com.zhiwei.parse.analysis.BaijiaAccountAnalysis
;
import
com.zhiwei.zhiweiTools.timeParse.TimeParse
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
public
class
Baijia
{
public
class
Baijia
{
...
@@ -27,13 +26,13 @@ public class Baijia {
...
@@ -27,13 +26,13 @@ public class Baijia {
* @param startTime
* @param startTime
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccount2Data
(
String
app_id
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccount2Data
(
String
app_id
,
String
startTime
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccount2HeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccount2HeaderMap
(
null
);
String
url
=
"https://news.baidu.com/sn/api/homesubcribe?forum_id="
+
app_id
;
String
url
=
"https://news.baidu.com/sn/api/homesubcribe?forum_id="
+
app_id
;
boolean
f
=
true
;
boolean
f
=
true
;
while
(
f
)
{
while
(
f
)
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"news"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONArray
(
"news"
);
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArry
.
size
();
i
++)
{
...
@@ -66,7 +65,7 @@ public class Baijia {
...
@@ -66,7 +65,7 @@ public class Baijia {
* @param startTime
* @param startTime
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccountData
(
String
app_id
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getBaijiaAccountData
(
String
app_id
,
String
startTime
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
0
;
int
i
=
0
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
...
@@ -74,7 +73,7 @@ public class Baijia {
...
@@ -74,7 +73,7 @@ public class Baijia {
while
(
true
)
{
while
(
true
)
{
try
{
try
{
String
url
=
"https://baijia.baidu.com/writerlistarticle?ajax=json&app_id="
+
app_id
+
"&_limit=20&_skip="
;
String
url
=
"https://baijia.baidu.com/writerlistarticle?ajax=json&app_id="
+
app_id
+
"&_limit=20&_skip="
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
list
=
baijiaAccountAnalysis
.
getBaijiaAccountData
(
result
,
startTime
);
List
<
Map
<
String
,
Object
>>
list
=
baijiaAccountAnalysis
.
getBaijiaAccountData
(
result
,
startTime
);
if
(
list
==
null
||
list
.
size
()
<
1
){
if
(
list
==
null
||
list
.
size
()
<
1
){
break
;
break
;
...
...
src/main/java/com/zhiwei/parse/Dayu.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
...
@@ -29,14 +30,14 @@ public class Dayu {
...
@@ -29,14 +30,14 @@ public class Dayu {
* @param mid
* @param mid
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getDayuAccountData
(
String
mid
,
String
name
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDayuAccountData
(
String
mid
,
String
name
,
String
startTime
,
Proxy
proxy
)
{
int
i
=
1
;
int
i
=
1
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuAccountHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
url
=
"http://ff.dayu.com/contents/author/"
+
mid
+
"?biz_id=1002&_size=50&_page="
+
i
+
"&_order_type=published_at&status=1&_fetch=1"
;
String
url
=
"http://ff.dayu.com/contents/author/"
+
mid
+
"?biz_id=1002&_size=50&_page="
+
i
+
"&_order_type=published_at&status=1&_fetch=1"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuAccountAnalysis
.
getDayuAccountData
(
result
,
name
,
startTime
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuAccountAnalysis
.
getDayuAccountData
(
result
,
name
,
startTime
);
if
(
lists
==
null
)
{
if
(
lists
==
null
)
{
...
@@ -65,22 +66,22 @@ public class Dayu {
...
@@ -65,22 +66,22 @@ public class Dayu {
* @param articleId
* @param articleId
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getDayuCommentData
(
String
articleId
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDayuCommentData
(
String
articleId
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot?sn=0&count=10&ts="
+
new
Date
().
getTime
();
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot?sn=0&count=10&ts="
+
new
Date
().
getTime
();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
9991
;
int
i
=
9991
;
try
{
try
{
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuCommentAnalysis
.
getDayuCommentData
(
result
,
articleId
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuCommentAnalysis
.
getDayuCommentData
(
result
,
articleId
,
proxy
);
dataList
.
addAll
(
lists
);
dataList
.
addAll
(
lists
);
while
(
true
)
{
while
(
true
)
{
lists
.
clear
();
lists
.
clear
();
ZhiWeiTools
.
sleep
(
3000
);
ZhiWeiTools
.
sleep
(
3000
);
System
.
out
.
println
(
url
+
"&hotValue="
+
i
);
System
.
out
.
println
(
url
+
"&hotValue="
+
i
);
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"&hotValue="
+
i
,
headerMap
);
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"&hotValue="
+
i
,
proxy
,
headerMap
);
lists
=
dayuCommentAnalysis
.
getDayuCommentData
(
result
,
articleId
);
lists
=
dayuCommentAnalysis
.
getDayuCommentData
(
result
,
articleId
,
proxy
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
break
;
}
}
...
@@ -103,10 +104,10 @@ public class Dayu {
...
@@ -103,10 +104,10 @@ public class Dayu {
* @param articleId
* @param articleId
* @return
* @return
*/
*/
public
static
int
getDayuCommentCount
(
String
articleId
)
{
public
static
int
getDayuCommentCount
(
String
articleId
,
Proxy
proxy
)
{
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot"
;
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/article/"
+
articleId
+
"/comments/byhot"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"data"
).
getInteger
(
"comment_cnt"
);
return
json
.
getJSONObject
(
"data"
).
getInteger
(
"comment_cnt"
);
}
}
...
@@ -117,7 +118,7 @@ public class Dayu {
...
@@ -117,7 +118,7 @@ public class Dayu {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuByWordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
1
;
int
i
=
1
;
...
@@ -125,8 +126,8 @@ public class Dayu {
...
@@ -125,8 +126,8 @@ public class Dayu {
while
(
true
)
{
while
(
true
)
{
String
url
=
"http://zzd.sm.cn/iflow/api/v1/article/fsearch?page="
+
i
+
"&size=20&sid=&q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&scene=0"
;
String
url
=
"http://zzd.sm.cn/iflow/api/v1/article/fsearch?page="
+
i
+
"&size=20&sid=&q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&scene=0"
;
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuByWordAnalysis
.
getDayuByWordData
(
result
);
List
<
Map
<
String
,
Object
>>
lists
=
dayuByWordAnalysis
.
getDayuByWordData
(
result
,
proxy
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
break
;
}
}
...
...
src/main/java/com/zhiwei/parse/Fenghuang.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.
io.UnsupportedEncodingException
;
import
java.
net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -29,7 +29,7 @@ public class Fenghuang {
...
@@ -29,7 +29,7 @@ public class Fenghuang {
* @param startTime 可不传 格式(2017-12-09 17:53:02)
* @param startTime 可不传 格式(2017-12-09 17:53:02)
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangAccountData
(
String
id
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangAccountData
(
String
id
,
String
startTime
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
1
;
int
i
=
1
;
boolean
f
=
true
;
boolean
f
=
true
;
...
@@ -38,7 +38,7 @@ public class Fenghuang {
...
@@ -38,7 +38,7 @@ public class Fenghuang {
for
(
int
j
=
0
;
j
<
4
;
j
++){
for
(
int
j
=
0
;
j
<
4
;
j
++){
f
=
true
;
f
=
true
;
String
url
=
"http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"
+
id
+
"&page="
+
i
+
"&pagesize=20&tag=article"
;
String
url
=
"http://api.3g.ifeng.com/api_wemedia_index?followid=weMedia_"
+
id
+
"&page="
+
i
+
"&pagesize=20&tag=article"
;
List
<
Map
<
String
,
Object
>>
list
=
fenghuangAccountAnalysis
.
getArticleData
(
url
,
startTime
);
List
<
Map
<
String
,
Object
>>
list
=
fenghuangAccountAnalysis
.
getArticleData
(
url
,
startTime
,
proxy
);
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
dataList
.
addAll
(
list
);
dataList
.
addAll
(
list
);
System
.
out
.
println
(
"====================采集第"
+
i
+
"页===共获取数据=="
+
dataList
.
size
());
System
.
out
.
println
(
"====================采集第"
+
i
+
"页===共获取数据=="
+
dataList
.
size
());
...
@@ -63,8 +63,8 @@ public class Fenghuang {
...
@@ -63,8 +63,8 @@ public class Fenghuang {
* @param docUrl
* @param docUrl
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData
(
String
url
)
{
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangCommentData
(
String
url
,
Proxy
proxy
)
{
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
);
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
if
(
url
==
null
)
{
if
(
url
==
null
)
{
return
null
;
return
null
;
}
}
...
@@ -73,7 +73,7 @@ public class Fenghuang {
...
@@ -73,7 +73,7 @@ public class Fenghuang {
while
(
true
)
{
while
(
true
)
{
System
.
out
.
println
(
url
+
i
);
System
.
out
.
println
(
url
+
i
);
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
List
<
Map
<
String
,
Object
>>
list
=
fenghuangCommentAnalysis
.
getData
(
url
+
i
);
List
<
Map
<
String
,
Object
>>
list
=
fenghuangCommentAnalysis
.
getData
(
url
+
i
,
proxy
);
if
(
list
==
null
||
list
.
size
()
<
1
)
{
if
(
list
==
null
||
list
.
size
()
<
1
)
{
break
;
break
;
}
}
...
@@ -89,12 +89,12 @@ public class Fenghuang {
...
@@ -89,12 +89,12 @@ public class Fenghuang {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
)
{
public
static
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
);
url
=
fenghuangCommentAnalysis
.
getdocUrl
(
url
,
proxy
);
if
(
url
==
null
)
{
if
(
url
==
null
)
{
return
null
;
return
null
;
}
}
Map
<
String
,
Object
>
map
=
fenghuangCommentAnalysis
.
getFenghuangCommentCount
(
url
);
Map
<
String
,
Object
>
map
=
fenghuangCommentAnalysis
.
getFenghuangCommentCount
(
url
,
proxy
);
return
map
;
return
map
;
}
}
...
@@ -104,7 +104,7 @@ public class Fenghuang {
...
@@ -104,7 +104,7 @@ public class Fenghuang {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangByWord
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getFenghuangByWord
(
String
word
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
1
;
int
i
=
1
;
try
{
try
{
...
@@ -112,7 +112,7 @@ public class Fenghuang {
...
@@ -112,7 +112,7 @@ public class Fenghuang {
try
{
try
{
String
url
=
"http://search.ifeng.com/sofeng/search.action?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&c=1&p="
;
String
url
=
"http://search.ifeng.com/sofeng/search.action?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&c=1&p="
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangWordHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
lists
=
fenghuangByWordAnalysis
.
getFenghuangByWord
(
result
);
List
<
Map
<
String
,
Object
>>
lists
=
fenghuangByWordAnalysis
.
getFenghuangByWord
(
result
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
break
;
...
...
src/main/java/com/zhiwei/parse/Meipai.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -23,7 +24,7 @@ public class Meipai {
...
@@ -23,7 +24,7 @@ public class Meipai {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getMeipaiByWordData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getMeipaiByWordData
(
String
word
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
String
url
=
"http://www.meipai.com/search/mv?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
);
String
url
=
"http://www.meipai.com/search/mv?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
);
...
@@ -31,13 +32,13 @@ public class Meipai {
...
@@ -31,13 +32,13 @@ public class Meipai {
int
i
=
1
;
int
i
=
1
;
while
(
true
)
{
while
(
true
)
{
ZhiWeiTools
.
sleep
(
5000
);
ZhiWeiTools
.
sleep
(
5000
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"&page="
+
i
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"&page="
+
i
,
proxy
,
headerMap
);
List
<
String
>
urlList
=
meipaiByWordAnalysis
.
getURl
(
result
);
List
<
String
>
urlList
=
meipaiByWordAnalysis
.
getURl
(
result
);
if
(
urlList
.
size
()
<
1
)
{
if
(
urlList
.
size
()
<
1
)
{
break
;
break
;
}
}
for
(
String
newurl
:
urlList
)
{
for
(
String
newurl
:
urlList
)
{
Map
<
String
,
Object
>
map
=
meipaiByWordAnalysis
.
getMeipaiData
(
headerMap
,
newurl
);
Map
<
String
,
Object
>
map
=
meipaiByWordAnalysis
.
getMeipaiData
(
headerMap
,
newurl
,
proxy
);
if
(
map
!=
null
)
{
if
(
map
!=
null
)
{
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
...
src/main/java/com/zhiwei/parse/Miaopai.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HeadGet
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
public
class
Miaopai
{
public
class
Miaopai
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Miaopai
.
class
);
/**
/**
*
*
...
@@ -20,9 +18,9 @@ public class Miaopai {
...
@@ -20,9 +18,9 @@ public class Miaopai {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
Map
<
String
,
Object
>
getMiaopaiDataByURL
(
String
url
)
{
public
static
Map
<
String
,
Object
>
getMiaopaiDataByURL
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMiaoPaiByURlHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getMiaoPaiByURlHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
String
time
=
doc
.
select
(
"div.personalData > p.personalDataT > span:nth-child(1)"
).
text
();
String
time
=
doc
.
select
(
"div.personalData > p.personalDataT > span:nth-child(1)"
).
text
();
...
...
src/main/java/com/zhiwei/parse/PearVideo.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -24,13 +25,13 @@ public class PearVideo {
...
@@ -24,13 +25,13 @@ public class PearVideo {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getPearVideoData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getPearVideoData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getPearVideoByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getPearVideoByWordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
for
(
int
i
=
0
;
i
<=
9000
;
i
+=
10
)
{
for
(
int
i
=
0
;
i
<=
9000
;
i
+=
10
)
{
String
url
=
"http://www.pearvideo.com/search_loading.jsp?start="
+
i
+
"&k="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
);
String
url
=
"http://www.pearvideo.com/search_loading.jsp?start="
+
i
+
"&k="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
dataList1
=
pearVideoByWordAnalysis
.
getPearVideoData
(
result
);
List
<
Map
<
String
,
Object
>>
dataList1
=
pearVideoByWordAnalysis
.
getPearVideoData
(
result
);
if
(
dataList1
!=
null
&&
dataList1
.
size
()
>
0
)
{
if
(
dataList1
!=
null
&&
dataList1
.
size
()
>
0
)
{
dataList
.
addAll
(
dataList1
);
dataList
.
addAll
(
dataList1
);
...
...
src/main/java/com/zhiwei/parse/QQKB.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -12,7 +13,6 @@ import com.zhiwei.httpclient.HeadGet;
...
@@ -12,7 +13,6 @@ import com.zhiwei.httpclient.HeadGet;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.httpclient.HttpClient
;
import
com.zhiwei.parse.analysis.QQKBAccountAnalysis
;
import
com.zhiwei.parse.analysis.QQKBAccountAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.parse.analysis.QQKBCommentAnalysis
;
import
com.zhiwei.zhiweiTools.httpClient.HttpClientTemplateOK
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
import
com.zhiwei.zhiweiTools.tools.ZhiWeiTools
;
public
class
QQKB
{
public
class
QQKB
{
...
@@ -27,7 +27,7 @@ public class QQKB {
...
@@ -27,7 +27,7 @@ public class QQKB {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQAccountData
(
String
child
,
String
cookie
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQAccountData
(
String
child
,
String
cookie
,
Proxy
proxy
)
{
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
String
url
=
"http://r.cnews.qq.com/getSubNewsIndex"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQAccountHeaderMap
(
cookie
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQAccountOneParamMap
(
child
);
...
@@ -36,7 +36,7 @@ public class QQKB {
...
@@ -36,7 +36,7 @@ public class QQKB {
String
result
=
""
;
String
result
=
""
;
List
<
String
>
idsList
=
new
ArrayList
<
String
>();
List
<
String
>
idsList
=
new
ArrayList
<
String
>();
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
headerMap
,
paramMap
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
idsList
=
qqAccountAnalysis
.
getQQAllIds
(
result
);
idsList
=
qqAccountAnalysis
.
getQQAllIds
(
result
);
if
(
idsList
.
size
()
>
1
)
{
if
(
idsList
.
size
()
>
1
)
{
break
;
break
;
...
@@ -57,7 +57,7 @@ public class QQKB {
...
@@ -57,7 +57,7 @@ public class QQKB {
ZhiWeiTools
.
sleep
(
7000
);
ZhiWeiTools
.
sleep
(
7000
);
paramMap
.
clear
();
paramMap
.
clear
();
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
headerMap
,
paramMap
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
List
<
Map
<
String
,
Object
>>
list
=
qqAccountAnalysis
.
analysisQQAccountData
(
result
);
List
<
Map
<
String
,
Object
>>
list
=
qqAccountAnalysis
.
analysisQQAccountData
(
result
);
ids
=
""
;
ids
=
""
;
i
=
0
;
i
=
0
;
...
@@ -79,7 +79,7 @@ public class QQKB {
...
@@ -79,7 +79,7 @@ public class QQKB {
ZhiWeiTools
.
sleep
(
8000
);
ZhiWeiTools
.
sleep
(
8000
);
paramMap
.
clear
();
paramMap
.
clear
();
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
paramMap
=
HeadGet
.
getQQAccountOtherParamMap
(
ids
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
headerMap
,
paramMap
);
result
=
HttpClient
.
executeHttpRequestPost
(
url
,
proxy
,
headerMap
,
paramMap
);
List
<
Map
<
String
,
Object
>>
list
=
qqAccountAnalysis
.
analysisQQAccountData
(
result
);
List
<
Map
<
String
,
Object
>>
list
=
qqAccountAnalysis
.
analysisQQAccountData
(
result
);
if
(
list
!=
null
)
{
if
(
list
!=
null
)
{
dataList
.
addAll
(
list
);
dataList
.
addAll
(
list
);
...
@@ -102,9 +102,9 @@ public class QQKB {
...
@@ -102,9 +102,9 @@ public class QQKB {
* @param article_id
* @param article_id
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
url
)
{
public
static
List
<
Map
<
String
,
Object
>>
getQQKBCommentData
(
String
url
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
String
comment_id
=
getCid
(
url
);
String
comment_id
=
getCid
(
url
,
proxy
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
String
article_id
=
url
.
split
(
"/"
)[
4
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
null
);
try
{
try
{
...
@@ -112,10 +112,9 @@ public class QQKB {
...
@@ -112,10 +112,9 @@ public class QQKB {
int
i
=
1
;
int
i
=
1
;
while
(
true
)
{
while
(
true
)
{
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
headerMap
,
paramMap
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
proxy
,
headerMap
,
paramMap
);
// System.out.println(result);
paramMap
.
clear
();
paramMap
.
clear
();
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
null
,
comment_id
,
article_id
);
List
<
Map
<
String
,
Object
>>
lists
=
qqkbCommentAnalysis
.
getCommentData
(
result
,
null
,
comment_id
,
article_id
,
proxy
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
break
;
break
;
}
}
...
@@ -138,15 +137,15 @@ public class QQKB {
...
@@ -138,15 +137,15 @@ public class QQKB {
* @param url
* @param url
* @return
* @return
*/
*/
private
static
String
getCid
(
String
url
)
{
private
static
String
getCid
(
String
url
,
Proxy
proxy
)
{
try
{
try
{
if
(
url
.
contains
(
"view.inews.qq.com"
)){
if
(
url
.
contains
(
"view.inews.qq.com"
)){
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
result
=
result
.
split
(
"window.__initData = "
)[
1
].
split
(
";</script>"
)[
0
];
result
=
result
.
split
(
"window.__initData = "
)[
1
].
split
(
";</script>"
)[
0
];
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"content"
).
getString
(
"cid"
);
return
json
.
getJSONObject
(
"content"
).
getString
(
"cid"
);
}
else
if
(
url
.
contains
(
"kuaibao.qq.com"
))
{
}
else
if
(
url
.
contains
(
"kuaibao.qq.com"
))
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
String
cid
=
result
.
split
(
"var commentId = "
)[
1
].
split
(
";"
)[
0
];
String
cid
=
result
.
split
(
"var commentId = "
)[
1
].
split
(
";"
)[
0
];
return
cid
.
substring
(
1
,
cid
.
length
()-
1
);
return
cid
.
substring
(
1
,
cid
.
length
()-
1
);
}
}
...
@@ -164,13 +163,13 @@ public class QQKB {
...
@@ -164,13 +163,13 @@ public class QQKB {
* @param article_id
* @param article_id
* @return
* @return
*/
*/
public
static
int
getCommentCount
(
String
cookie
,
String
url
)
{
public
static
int
getCommentCount
(
String
cookie
,
String
url
,
Proxy
proxy
)
{
String
comment_id
=
getCid
(
url
);
String
comment_id
=
getCid
(
url
,
proxy
);
String
article_id
=
url
.
split
(
"/"
)[
4
];
String
article_id
=
url
.
split
(
"/"
)[
4
];
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
try
{
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentParamMap
(
comment_id
,
article_id
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
headerMap
,
paramMap
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsComment"
,
proxy
,
headerMap
,
paramMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getJSONObject
(
"comments"
).
getInteger
(
"count"
);
return
json
.
getJSONObject
(
"comments"
).
getInteger
(
"count"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/Soku.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -25,13 +26,13 @@ public class Soku {
...
@@ -25,13 +26,13 @@ public class Soku {
* @param type
* @param type
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSoKuByWordData
(
String
word
,
String
type
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSoKuByWordData
(
String
word
,
String
type
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSoKuByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSoKuByWordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
for
(
int
i
=
1
;
i
<
14
;
i
++)
{
for
(
int
i
=
1
;
i
<
14
;
i
++)
{
String
url
=
"http://www.soku.com/search_video_ajax/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_orderby_3_cateid_"
+
type
+
"_limitdate_365?site=14&_lg=20&page="
;
String
url
=
"http://www.soku.com/search_video_ajax/q_"
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"_orderby_3_cateid_"
+
type
+
"_limitdate_365?site=14&_lg=20&page="
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
lists
=
soKuByWordAnalysis
.
getSoKuData
(
result
);
List
<
Map
<
String
,
Object
>>
lists
=
soKuByWordAnalysis
.
getSoKuData
(
result
);
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
dataList
.
addAll
(
lists
);
dataList
.
addAll
(
lists
);
...
...
src/main/java/com/zhiwei/parse/Souhu.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -29,11 +32,11 @@ public class Souhu {
...
@@ -29,11 +32,11 @@ public class Souhu {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
int
getSouhuCommentCount
(
String
url
)
{
public
static
int
getSouhuCommentCount
(
String
url
,
Proxy
proxy
)
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
);
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
);
int
i
;
int
i
;
try
{
try
{
i
=
souhuCommentAnalysis
.
getSouhuCommentCount
(
newurl
);
i
=
souhuCommentAnalysis
.
getSouhuCommentCount
(
newurl
,
proxy
);
return
i
;
return
i
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"搜狐获取评论数出错了"
,
e
.
getMessage
());
logger
.
error
(
"搜狐获取评论数出错了"
,
e
.
getMessage
());
...
@@ -51,8 +54,10 @@ public class Souhu {
...
@@ -51,8 +54,10 @@ public class Souhu {
* @param isCulling 是否采集精选
* @param isCulling 是否采集精选
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSouHuAccountData
(
String
xpt
,
String
startTime
,
boolean
isCulling
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSouHuAccountData
(
String
xpt
,
String
startTime
,
boolean
isCulling
,
Proxy
proxy
)
{
int
i
=
1
;
int
i
=
1
;
String
name
=
getName
(
xpt
,
proxy
);
ZhiWeiTools
.
sleep
(
2000
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuAccountHeaderMap
(
null
);
boolean
f
=
true
;
boolean
f
=
true
;
...
@@ -64,7 +69,7 @@ public class Souhu {
...
@@ -64,7 +69,7 @@ public class Souhu {
url
=
url
+
"&categoryId=-1"
;
url
=
url
+
"&categoryId=-1"
;
}
}
try
{
try
{
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
@@ -72,7 +77,7 @@ public class Souhu {
...
@@ -72,7 +77,7 @@ public class Souhu {
result
=
result
.
substring
(
1
,
result
.
length
()-
1
);
result
=
result
.
substring
(
1
,
result
.
length
()-
1
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
List
<
Map
<
String
,
Object
>>
dataList1
=
souhuAccountAnalysis
.
analysisData
(
jsonArray
);
List
<
Map
<
String
,
Object
>>
dataList1
=
souhuAccountAnalysis
.
analysisData
(
jsonArray
,
name
);
if
(
jsonArray
.
size
()
<
1
)
{
if
(
jsonArray
.
size
()
<
1
)
{
break
;
break
;
}
}
...
@@ -102,6 +107,20 @@ public class Souhu {
...
@@ -102,6 +107,20 @@ public class Souhu {
}
}
private
static
String
getName
(
String
xpt
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuAccountHeaderMap
(
null
);
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
"http://mp.sohu.com/profile?xpt="
+
xpt
,
proxy
,
headerMap
);
Document
doc
=
Jsoup
.
parse
(
result
);
String
name
=
doc
.
select
(
"p#ff"
).
text
();
System
.
out
.
println
(
name
);
return
name
;
}
catch
(
Exception
e
)
{
return
null
;
}
}
/**
/**
*
*
* @Description 传入搜狐文章链接和cookie 可获取此文章所有评论
* @Description 传入搜狐文章链接和cookie 可获取此文章所有评论
...
@@ -109,14 +128,14 @@ public class Souhu {
...
@@ -109,14 +128,14 @@ public class Souhu {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getSouhuCommentData
(
String
url
)
{
public
static
List
<
Map
<
String
,
Object
>>
getSouhuCommentData
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
j
=
1
;
int
j
=
1
;
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
)
+
"&page_no="
+
j
;
String
newurl
=
souhuCommentAnalysis
.
getSouhuURL
(
url
)
+
"&page_no="
+
j
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
proxy
,
headerMap
);
System
.
out
.
println
(
newurl
);
System
.
out
.
println
(
newurl
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"jsonObject"
).
getJSONArray
(
"comments"
);
JSONArray
jsonArry
=
json
.
getJSONObject
(
"jsonObject"
).
getJSONArray
(
"comments"
);
...
...
src/main/java/com/zhiwei/parse/TXNews.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -18,11 +19,11 @@ public class TXNews {
...
@@ -18,11 +19,11 @@ public class TXNews {
private
static
TXNewsByWordAnalysis
txNewsByWordAnalysis
=
new
TXNewsByWordAnalysis
();
private
static
TXNewsByWordAnalysis
txNewsByWordAnalysis
=
new
TXNewsByWordAnalysis
();
public
static
boolean
hasMore
=
true
;
public
static
boolean
hasMore
=
true
;
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getData
(
String
word
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getTxNewspage1HeaderMap
(
null
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getTxNewspage1ParamMap
(
word
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC"
,
headerMap
,
paramMap
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.inews.qq.com/search?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC"
,
proxy
,
headerMap
,
paramMap
);
List
<
Map
<
String
,
Object
>>
dList
=
txNewsByWordAnalysis
.
getData
(
result
);
List
<
Map
<
String
,
Object
>>
dList
=
txNewsByWordAnalysis
.
getData
(
result
);
dataList
.
addAll
(
dList
);
dataList
.
addAll
(
dList
);
int
page
=
2
;
int
page
=
2
;
...
@@ -32,7 +33,7 @@ public class TXNews {
...
@@ -32,7 +33,7 @@ public class TXNews {
try
{
try
{
ZhiWeiTools
.
sleep
(
5000
);
ZhiWeiTools
.
sleep
(
5000
);
Map
<
String
,
Object
>
param2Map
=
HeadGet
.
getTxNewspagemoreParamMap
(
word
,
page
);
Map
<
String
,
Object
>
param2Map
=
HeadGet
.
getTxNewspagemoreParamMap
(
word
,
page
);
String
result2
=
HttpClient
.
executeHttpRequestPost
(
"http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC"
,
header2Map
,
param2Map
);
String
result2
=
HttpClient
.
executeHttpRequestPost
(
"http://r.inews.qq.com/searchMore?appver=11.2.1_qqnews_5.5.60&devid=6D33F35F-880D-42A6-A23F-881BEC6960EC"
,
proxy
,
header2Map
,
param2Map
);
page
++;
page
++;
List
<
Map
<
String
,
Object
>>
dList2
=
txNewsByWordAnalysis
.
getData
(
result2
);
List
<
Map
<
String
,
Object
>>
dList2
=
txNewsByWordAnalysis
.
getData
(
result2
);
dataList
.
addAll
(
dList2
);
dataList
.
addAll
(
dList2
);
...
...
src/main/java/com/zhiwei/parse/Wangyi.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -23,7 +24,7 @@ public class Wangyi {
...
@@ -23,7 +24,7 @@ public class Wangyi {
* @param id
* @param id
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
id
)
{
public
static
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
id
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
0
;
int
i
=
0
;
...
@@ -31,7 +32,7 @@ public class Wangyi {
...
@@ -31,7 +32,7 @@ public class Wangyi {
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
+
"/comments/newList?offset="
+
i
+
"&limit=30"
;
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
+
"/comments/newList?offset="
+
i
+
"&limit=30"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
List
<
Map
<
String
,
Object
>>
lists
=
wangyiCommentAnalysis
.
getWangyiCommentData
(
result
,
idList
);
List
<
Map
<
String
,
Object
>>
lists
=
wangyiCommentAnalysis
.
getWangyiCommentData
(
result
,
idList
);
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
if
(
lists
==
null
||
lists
.
size
()
<
1
)
{
...
@@ -57,10 +58,10 @@ public class Wangyi {
...
@@ -57,10 +58,10 @@ public class Wangyi {
* @param id
* @param id
* @return
* @return
*/
*/
public
static
int
getWangyiCommentCount
(
String
id
)
{
public
static
int
getWangyiCommentCount
(
String
id
,
Proxy
proxy
)
{
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
;
String
url
=
"http://comment.dy.163.com/api/v1/products/a2869674571f77b5a0867c3d71db5856/threads/"
+
id
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getWangyiCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
return
json
.
getInteger
(
"tcount"
);
return
json
.
getInteger
(
"tcount"
);
}
}
...
...
src/main/java/com/zhiwei/parse/XiGua.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -28,7 +29,7 @@ public class XiGua {
...
@@ -28,7 +29,7 @@ public class XiGua {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getXiguaVideoByWordData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getXiguaVideoByWordData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getXiguaByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getXiguaByWordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
0
;
int
i
=
0
;
...
@@ -37,7 +38,7 @@ public class XiGua {
...
@@ -37,7 +38,7 @@ public class XiGua {
while
(
true
)
{
while
(
true
)
{
String
url
=
"https://www.ixigua.com/search_content/?format=json&autoload=true&count=20&keyword="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&cur_tab=1&offset="
;
String
url
=
"https://www.ixigua.com/search_content/?format=json&autoload=true&count=20&keyword="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&cur_tab=1&offset="
;
System
.
out
.
println
(
url
+
i
);
System
.
out
.
println
(
url
+
i
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
i
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
lists
=
xiguaByWordAnalysis
.
getXiguaData
(
result
);
List
<
Map
<
String
,
Object
>>
lists
=
xiguaByWordAnalysis
.
getXiguaData
(
result
);
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
dataList
.
addAll
(
lists
);
dataList
.
addAll
(
lists
);
...
@@ -67,7 +68,7 @@ public class XiGua {
...
@@ -67,7 +68,7 @@ public class XiGua {
* @param url
* @param url
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getXiguaAccountData
(
String
url
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getXiguaAccountData
(
String
url
,
String
startTime
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getXiguaByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getXiguaByWordHeaderMap
(
null
);
String
time
=
"0"
;
String
time
=
"0"
;
...
@@ -76,7 +77,7 @@ public class XiGua {
...
@@ -76,7 +77,7 @@ public class XiGua {
while
(
true
)
{
while
(
true
)
{
String
uid
=
xiguaAccountAnalysis
.
getUid
(
url
);
String
uid
=
xiguaAccountAnalysis
.
getUid
(
url
);
String
newurl
=
"https://www.ixigua.com/c/user/article/?user_id="
+
uid
+
"&max_behot_time="
+
time
+
"&count=20"
;
String
newurl
=
"https://www.ixigua.com/c/user/article/?user_id="
+
uid
+
"&max_behot_time="
+
time
+
"&count=20"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
newurl
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
time
=
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
);
time
=
json
.
getJSONObject
(
"next"
).
getString
(
"max_behot_time"
);
List
<
Map
<
String
,
Object
>>
list
=
xiguaAccountAnalysis
.
getXiguaAccountData
(
json
,
time1
);
List
<
Map
<
String
,
Object
>>
list
=
xiguaAccountAnalysis
.
getXiguaAccountData
(
json
,
time1
);
...
...
src/main/java/com/zhiwei/parse/Xiaomi.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.io.UnsupportedEncodingException
;
import
java.io.UnsupportedEncodingException
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -18,14 +19,14 @@ public class Xiaomi {
...
@@ -18,14 +19,14 @@ public class Xiaomi {
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Xiaomi
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
Xiaomi
.
class
);
private
static
XiaomiShequByWordAnalysis
xiaomiShequByWordAnalysis
=
new
XiaomiShequByWordAnalysis
();
private
static
XiaomiShequByWordAnalysis
xiaomiShequByWordAnalysis
=
new
XiaomiShequByWordAnalysis
();
public
static
List
<
Map
<
String
,
Object
>>
getXiaomiByWordData
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getXiaomiByWordData
(
String
word
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getxiaomiShequByWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getxiaomiShequByWordHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
i
=
1
;
int
i
=
1
;
try
{
try
{
while
(
true
)
{
while
(
true
)
{
String
url
=
"http://so.bbs.xiaomi.cn/?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&p="
+
i
+
"&fid=457&time=63072000&order=1"
;
String
url
=
"http://so.bbs.xiaomi.cn/?q="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&p="
+
i
+
"&fid=457&time=63072000&order=1"
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
dataList
=
xiaomiShequByWordAnalysis
.
getdata
(
result
);
List
<
Map
<
String
,
Object
>>
dataList
=
xiaomiShequByWordAnalysis
.
getdata
(
result
);
if
(
dataList
==
null
||
dataList
.
size
()
<
1
)
{
if
(
dataList
==
null
||
dataList
.
size
()
<
1
)
{
break
;
break
;
...
@@ -38,6 +39,7 @@ public class Xiaomi {
...
@@ -38,6 +39,7 @@ public class Xiaomi {
return
bodyList
;
return
bodyList
;
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
logger
.
info
(
"数据采集出错:{}"
,
e
.
getMessage
());
return
bodyList
;
return
bodyList
;
}
}
}
}
...
...
src/main/java/com/zhiwei/parse/Yidianzixun.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
;
package
com
.
zhiwei
.
parse
;
import
java.net.Proxy
;
import
java.net.URLEncoder
;
import
java.net.URLEncoder
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.List
;
...
@@ -30,7 +31,7 @@ public class Yidianzixun {
...
@@ -30,7 +31,7 @@ public class Yidianzixun {
* @param startTime
* @param startTime
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunAccountData
(
String
channelid
,
String
startTime
)
{
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunAccountData
(
String
channelid
,
String
startTime
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunAccountHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
int
j
=
0
;
int
j
=
0
;
...
@@ -38,7 +39,7 @@ public class Yidianzixun {
...
@@ -38,7 +39,7 @@ public class Yidianzixun {
try
{
try
{
while
(
f
)
{
while
(
f
)
{
String
url
=
"http://www.yidianzixun.com/home/q/news_list_for_channel?channel_id="
+
channelid
+
"&cstart="
+
j
+
"&cend="
+(
j
+
10
);
String
url
=
"http://www.yidianzixun.com/home/q/news_list_for_channel?channel_id="
+
channelid
+
"&cstart="
+
j
+
"&cend="
+(
j
+
10
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"result"
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"result"
);
if
(
jsonArry
.
size
()
==
0
)
{
if
(
jsonArry
.
size
()
==
0
)
{
...
@@ -74,14 +75,14 @@ public class Yidianzixun {
...
@@ -74,14 +75,14 @@ public class Yidianzixun {
* @param cookie
* @param cookie
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunCommentData
(
String
url
)
{
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunCommentData
(
String
url
,
Proxy
proxy
)
{
url
=
yidianzixunCommentAnalysis
.
analysisURL
(
url
);
url
=
yidianzixunCommentAnalysis
.
analysisURL
(
url
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
String
urlb
=
url
;
String
urlb
=
url
;
while
(
true
)
{
while
(
true
)
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"comments"
);
JSONArray
jsonArry
=
json
.
getJSONArray
(
"comments"
);
String
comment_id
=
""
;
String
comment_id
=
""
;
...
@@ -93,7 +94,7 @@ public class Yidianzixun {
...
@@ -93,7 +94,7 @@ public class Yidianzixun {
}
}
if
(
data
.
toString
().
contains
(
"replies"
))
{
if
(
data
.
toString
().
contains
(
"replies"
))
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
List
<
Map
<
String
,
Object
>>
replyList
=
yidianzixunCommentAnalysis
.
getrepliesData
(
map
,
null
,
headerMap
);
List
<
Map
<
String
,
Object
>>
replyList
=
yidianzixunCommentAnalysis
.
getrepliesData
(
map
,
null
,
headerMap
,
proxy
);
if
(
replyList
!=
null
&&
replyList
.
size
()
>
0
)
{
if
(
replyList
!=
null
&&
replyList
.
size
()
>
0
)
{
dataList
.
addAll
(
replyList
);
dataList
.
addAll
(
replyList
);
}
}
...
@@ -119,14 +120,14 @@ public class Yidianzixun {
...
@@ -119,14 +120,14 @@ public class Yidianzixun {
* @param word
* @param word
* @return
* @return
*/
*/
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunDataByWord
(
String
word
)
{
public
static
List
<
Map
<
String
,
Object
>>
getYidianzixunDataByWord
(
String
word
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
int
i
=
0
;
int
i
=
0
;
while
(
true
)
{
while
(
true
)
{
String
url
=
"http://www.yidianzixun.com/home/q/news_list_for_keyword?display="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&cstart="
+
i
+
"&cend="
+(
i
+
10
)+
"&word_type=token"
;
String
url
=
"http://www.yidianzixun.com/home/q/news_list_for_keyword?display="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&cstart="
+
i
+
"&cend="
+(
i
+
10
)+
"&word_type=token"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunWordHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getYidianzixunWordHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
list
=
yidianzixunByWordAnalysis
.
getOnePageData
(
result
);
List
<
Map
<
String
,
Object
>>
list
=
yidianzixunByWordAnalysis
.
getOnePageData
(
result
);
if
(
list
==
null
||
list
.
size
()
<
1
)
{
if
(
list
==
null
||
list
.
size
()
<
1
)
{
break
;
break
;
...
...
src/main/java/com/zhiwei/parse/analysis/AiqiyiByWordAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -43,10 +44,10 @@ public class AiqiyiByWordAnalysis {
...
@@ -43,10 +44,10 @@ public class AiqiyiByWordAnalysis {
}
}
}
}
public
Map
<
String
,
Object
>
getAiqiyiData
(
String
url
,
Map
<
String
,
String
>
headerMap
)
{
public
Map
<
String
,
Object
>
getAiqiyiData
(
String
url
,
Map
<
String
,
String
>
headerMap
,
Proxy
proxy
)
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
String
time
=
doc
.
select
(
"#widget-vshort-ptime"
).
text
();
String
time
=
doc
.
select
(
"#widget-vshort-ptime"
).
text
();
if
(!
time
.
contains
(
"2017"
))
{
if
(!
time
.
contains
(
"2017"
))
{
...
@@ -61,7 +62,7 @@ public class AiqiyiByWordAnalysis {
...
@@ -61,7 +62,7 @@ public class AiqiyiByWordAnalysis {
String
title
=
doc
.
select
(
"#widget-videotitle"
).
attr
(
"title"
);
String
title
=
doc
.
select
(
"#widget-videotitle"
).
attr
(
"title"
);
String
id
=
result
.
split
(
" tvId: "
)[
1
].
split
(
","
)[
0
];
String
id
=
result
.
split
(
" tvId: "
)[
1
].
split
(
","
)[
0
];
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
int
count
=
getVideo_count
(
id
);
int
count
=
getVideo_count
(
id
,
proxy
);
dataMap
.
put
(
"count"
,
count
);
dataMap
.
put
(
"count"
,
count
);
dataMap
.
put
(
"title"
,
title
);
dataMap
.
put
(
"title"
,
title
);
System
.
out
.
println
(
dataMap
.
toString
());
System
.
out
.
println
(
dataMap
.
toString
());
...
@@ -73,11 +74,11 @@ public class AiqiyiByWordAnalysis {
...
@@ -73,11 +74,11 @@ public class AiqiyiByWordAnalysis {
}
}
public
int
getVideo_count
(
String
id
)
{
public
int
getVideo_count
(
String
id
,
Proxy
proxy
)
{
try
{
try
{
String
url
=
"http://cache.video.iqiyi.com/jp/pc/"
+
id
+
"/"
;
String
url
=
"http://cache.video.iqiyi.com/jp/pc/"
+
id
+
"/"
;
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiForCountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getAiqiyiForCountHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
String
count
=
result
.
split
(
":"
)[
1
].
split
(
"\\}"
)[
0
];
String
count
=
result
.
split
(
":"
)[
1
].
split
(
"\\}"
)[
0
];
return
Integer
.
valueOf
(
count
);
return
Integer
.
valueOf
(
count
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/analysis/BaijiaAccountAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -83,11 +84,11 @@ public class BaijiaAccountAnalysis {
...
@@ -83,11 +84,11 @@ public class BaijiaAccountAnalysis {
}
}
}
}
public
String
getBaijiaContent
(
String
url
)
{
public
String
getBaijiaContent
(
String
url
,
Proxy
proxy
)
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getBaijiaAccountHeaderMap
(
null
);
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Document
document
=
Jsoup
.
parse
(
result
);
Document
document
=
Jsoup
.
parse
(
result
);
return
document
.
select
(
"section.news-content"
).
text
();
return
document
.
select
(
"section.news-content"
).
text
();
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
...
src/main/java/com/zhiwei/parse/analysis/DayuByWordAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -8,8 +9,6 @@ import java.util.Map;
...
@@ -8,8 +9,6 @@ import java.util.Map;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -23,7 +22,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
...
@@ -23,7 +22,7 @@ import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public
class
DayuByWordAnalysis
{
public
class
DayuByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuByWordAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
DayuByWordAnalysis
.
class
);
public
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
result
)
{
public
List
<
Map
<
String
,
Object
>>
getDayuByWordData
(
String
result
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
...
@@ -37,7 +36,7 @@ public class DayuByWordAnalysis {
...
@@ -37,7 +36,7 @@ public class DayuByWordAnalysis {
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
data
.
getLong
(
"publish_time"
)),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"time"
,
TimeParse
.
dateFormartString
(
new
Date
(
data
.
getLong
(
"publish_time"
)),
"yyyy-MM-dd HH:mm:ss"
));
map
.
put
(
"id"
,
data
.
getString
(
"id"
));
map
.
put
(
"id"
,
data
.
getString
(
"id"
));
map
.
put
(
"source"
,
data
.
getString
(
"source_name"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"source"
,
data
.
getString
(
"source_name"
).
replaceAll
(
"<.*?>"
,
""
));
map
.
put
(
"content"
,
getContent
(
url
));
map
.
put
(
"content"
,
getContent
(
url
,
proxy
));
System
.
out
.
println
(
map
.
toString
());
System
.
out
.
println
(
map
.
toString
());
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
@@ -49,10 +48,10 @@ public class DayuByWordAnalysis {
...
@@ -49,10 +48,10 @@ public class DayuByWordAnalysis {
}
}
public
String
getContent
(
String
url
)
{
public
String
getContent
(
String
url
,
Proxy
proxy
)
{
ZhiWeiTools
.
sleep
(
2000
);
ZhiWeiTools
.
sleep
(
2000
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Pattern
pat
=
Pattern
.
compile
(
"xissJsonData = (.*);"
);
Pattern
pat
=
Pattern
.
compile
(
"xissJsonData = (.*);"
);
Matcher
matcher
=
pat
.
matcher
(
result
);
Matcher
matcher
=
pat
.
matcher
(
result
);
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/analysis/DayuCommentAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -26,7 +27,7 @@ public class DayuCommentAnalysis {
...
@@ -26,7 +27,7 @@ public class DayuCommentAnalysis {
* @param articleId
* @param articleId
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getDayuCommentData
(
String
result
,
String
articleId
)
{
public
List
<
Map
<
String
,
Object
>>
getDayuCommentData
(
String
result
,
String
articleId
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
).
getJSONObject
(
"data"
).
getJSONObject
(
"comments_map"
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
).
getJSONObject
(
"data"
).
getJSONObject
(
"comments_map"
);
...
@@ -45,7 +46,7 @@ public class DayuCommentAnalysis {
...
@@ -45,7 +46,7 @@ public class DayuCommentAnalysis {
int
i
=
data
.
getInteger
(
"reply_cnt"
);
int
i
=
data
.
getInteger
(
"reply_cnt"
);
dataMap
.
put
(
"replay_count"
,
i
);
dataMap
.
put
(
"replay_count"
,
i
);
if
(
i
>
0
)
{
if
(
i
>
0
)
{
dataList
.
addAll
(
getReplayData
(
id
,
articleId
));
dataList
.
addAll
(
getReplayData
(
id
,
articleId
,
proxy
));
}
}
dataList
.
add
(
dataMap
);
dataList
.
add
(
dataMap
);
}
}
...
@@ -63,10 +64,10 @@ public class DayuCommentAnalysis {
...
@@ -63,10 +64,10 @@ public class DayuCommentAnalysis {
* @param articleId
* @param articleId
* @return
* @return
*/
*/
private
List
<
Map
<
String
,
Object
>>
getReplayData
(
String
id
,
String
articleId
)
{
private
List
<
Map
<
String
,
Object
>>
getReplayData
(
String
id
,
String
articleId
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getDayuCommentHeaderMap
(
null
);
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/detail/"
+
id
+
"/comments?articleId="
+
articleId
+
"&count=10&ts="
;
String
url
=
"http://m.uczzd.cn/iflow/api/v2/cmt/detail/"
+
id
+
"/comments?articleId="
+
articleId
+
"&count=10&ts="
;
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"-1"
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
"-1"
,
proxy
,
headerMap
);
List
<
Map
<
String
,
Object
>>
data
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
data
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
String
>
timeList
=
new
ArrayList
<
String
>();
List
<
String
>
timeList
=
new
ArrayList
<
String
>();
while
(
true
)
{
while
(
true
)
{
...
@@ -79,7 +80,7 @@ public class DayuCommentAnalysis {
...
@@ -79,7 +80,7 @@ public class DayuCommentAnalysis {
if
(
time
==
0
)
{
if
(
time
==
0
)
{
break
;
break
;
}
}
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
time
,
headerMap
);
result
=
HttpClient
.
executeHttpRequestGet
(
url
+
time
,
proxy
,
headerMap
);
}
}
System
.
out
.
println
(
"=====================评论下回复获取数=="
+
data
.
size
());
System
.
out
.
println
(
"=====================评论下回复获取数=="
+
data
.
size
());
return
data
;
return
data
;
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangAccountAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.
io.IOException
;
import
java.
net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -24,7 +24,7 @@ public class FenghuangAccountAnalysis {
...
@@ -24,7 +24,7 @@ public class FenghuangAccountAnalysis {
* @param result
* @param result
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getArticleData
(
String
url
,
String
startTime
)
{
public
List
<
Map
<
String
,
Object
>>
getArticleData
(
String
url
,
String
startTime
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
...
@@ -32,7 +32,7 @@ public class FenghuangAccountAnalysis {
...
@@ -32,7 +32,7 @@ public class FenghuangAccountAnalysis {
JSONArray
jsonArry
=
null
;
JSONArray
jsonArry
=
null
;
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
for
(
int
i
=
0
;
i
<
3
;
i
++)
{
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"feeds"
).
getJSONArray
(
"list"
);
jsonArry
=
json
.
getJSONObject
(
"data"
).
getJSONObject
(
"feeds"
).
getJSONArray
(
"list"
);
if
(
jsonArry
==
null
||
jsonArry
.
size
()
<
1
)
{
if
(
jsonArry
==
null
||
jsonArry
.
size
()
<
1
)
{
...
@@ -49,7 +49,7 @@ public class FenghuangAccountAnalysis {
...
@@ -49,7 +49,7 @@ public class FenghuangAccountAnalysis {
try
{
try
{
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArry
.
getJSONObject
(
i
);
String
articleurl
=
data
.
getString
(
"id"
);
String
articleurl
=
data
.
getString
(
"id"
);
String
articleResult
=
HttpClient
.
executeHttpRequestGet
(
articleurl
,
headerMap
);
String
articleResult
=
HttpClient
.
executeHttpRequestGet
(
articleurl
,
proxy
,
headerMap
);
Map
<
String
,
Object
>
dataMap
=
getArticle
(
articleResult
);
Map
<
String
,
Object
>
dataMap
=
getArticle
(
articleResult
);
ZhiWeiTools
.
sleep
(
1000
);
ZhiWeiTools
.
sleep
(
1000
);
if
(
dataMap
!=
null
)
{
if
(
dataMap
!=
null
)
{
...
...
src/main/java/com/zhiwei/parse/analysis/FenghuangCommentAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -18,11 +19,11 @@ import com.zhiwei.zhiweiTools.timeParse.TimeParse;
...
@@ -18,11 +19,11 @@ import com.zhiwei.zhiweiTools.timeParse.TimeParse;
public
class
FenghuangCommentAnalysis
{
public
class
FenghuangCommentAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangCommentAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
FenghuangCommentAnalysis
.
class
);
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
)
{
public
Map
<
String
,
Object
>
getFenghuangCommentCount
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
map
.
put
(
"real_count"
,
json
.
getInteger
(
"real_num"
));
map
.
put
(
"real_count"
,
json
.
getInteger
(
"real_num"
));
map
.
put
(
"comment_num"
,
json
.
getInteger
(
"comment_num"
));
map
.
put
(
"comment_num"
,
json
.
getInteger
(
"comment_num"
));
...
@@ -40,9 +41,9 @@ public class FenghuangCommentAnalysis {
...
@@ -40,9 +41,9 @@ public class FenghuangCommentAnalysis {
* @param url
* @param url
* @return
* @return
*/
*/
public
String
getdocUrl
(
String
url
)
{
public
String
getdocUrl
(
String
url
,
Proxy
proxy
)
{
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
null
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
null
);
result
=
result
.
split
(
"commentsUrl = '"
)[
1
].
split
(
"',"
)[
0
];
result
=
result
.
split
(
"commentsUrl = '"
)[
1
].
split
(
"',"
)[
0
];
System
.
out
.
println
(
result
);
System
.
out
.
println
(
result
);
...
@@ -71,12 +72,12 @@ public class FenghuangCommentAnalysis {
...
@@ -71,12 +72,12 @@ public class FenghuangCommentAnalysis {
* @param url
* @param url
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
)
{
public
List
<
Map
<
String
,
Object
>>
getData
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getFenghuangCommentHeaderMap
(
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
String
result
;
String
result
;
try
{
try
{
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"链接获取信息失败"
,
e
.
getMessage
());
logger
.
error
(
"链接获取信息失败"
,
e
.
getMessage
());
return
null
;
return
null
;
...
...
src/main/java/com/zhiwei/parse/analysis/MeipaiByWordAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -25,10 +26,10 @@ public class MeipaiByWordAnalysis {
...
@@ -25,10 +26,10 @@ public class MeipaiByWordAnalysis {
* @param result
* @param result
* @return
* @return
*/
*/
public
Map
<
String
,
Object
>
getMeipaiData
(
Map
<
String
,
String
>
headerMap
,
String
url
)
{
public
Map
<
String
,
Object
>
getMeipaiData
(
Map
<
String
,
String
>
headerMap
,
String
url
,
Proxy
proxy
)
{
try
{
try
{
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
dataMap
=
new
HashMap
<
String
,
Object
>();
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
Document
doc
=
Jsoup
.
parse
(
result
);
Document
doc
=
Jsoup
.
parse
(
result
);
String
video_count
=
doc
.
select
(
"div.detail-location"
).
text
().
split
(
"播放"
)[
0
];
String
video_count
=
doc
.
select
(
"div.detail-location"
).
text
().
split
(
"播放"
)[
0
];
String
time
=
doc
.
select
(
"div.detail-time.pa > strong"
).
text
();
String
time
=
doc
.
select
(
"div.detail-time.pa > strong"
).
text
();
...
...
src/main/java/com/zhiwei/parse/analysis/QQKBByWordAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
public
class
QQKBByWordAnalysis
{
public
class
QQKBByWordAnalysis
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
QQKBByWordAnalysis
.
class
);
// public List<Map<String,Object>> get
}
}
src/main/java/com/zhiwei/parse/analysis/QQKBCommentAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
...
@@ -50,7 +51,7 @@ public class QQKBCommentAnalysis {
...
@@ -50,7 +51,7 @@ public class QQKBCommentAnalysis {
* @param result
* @param result
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getCommentData
(
String
result
,
String
cookie
,
String
comment_id
,
String
article_id
)
{
public
List
<
Map
<
String
,
Object
>>
getCommentData
(
String
result
,
String
cookie
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
...
@@ -66,7 +67,7 @@ public class QQKBCommentAnalysis {
...
@@ -66,7 +67,7 @@ public class QQKBCommentAnalysis {
String
reply_id
=
data
.
getString
(
"reply_id"
);
String
reply_id
=
data
.
getString
(
"reply_id"
);
if
(
data
.
toString
().
contains
(
"reply_num"
))
{
if
(
data
.
toString
().
contains
(
"reply_num"
))
{
replay_num
=
data
.
getInteger
(
"reply_num"
);
replay_num
=
data
.
getInteger
(
"reply_num"
);
List
<
Map
<
String
,
Object
>>
lists
=
getReplyCommentData
(
cookie
,
reply_id
,
comment_id
,
article_id
);
List
<
Map
<
String
,
Object
>>
lists
=
getReplyCommentData
(
cookie
,
reply_id
,
comment_id
,
article_id
,
proxy
);
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
if
(
lists
!=
null
&&
lists
.
size
()
>
0
)
{
dataList
.
addAll
(
lists
);
dataList
.
addAll
(
lists
);
}
}
...
@@ -107,7 +108,7 @@ public class QQKBCommentAnalysis {
...
@@ -107,7 +108,7 @@ public class QQKBCommentAnalysis {
}
}
}
}
public
List
<
Map
<
String
,
Object
>>
getReplyCommentData
(
String
cookie
,
String
reply_id
,
String
comment_id
,
String
article_id
)
{
public
List
<
Map
<
String
,
Object
>>
getReplyCommentData
(
String
cookie
,
String
reply_id
,
String
comment_id
,
String
article_id
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getQQKBCommentHeaderMap
(
cookie
);
try
{
try
{
...
@@ -115,7 +116,7 @@ public class QQKBCommentAnalysis {
...
@@ -115,7 +116,7 @@ public class QQKBCommentAnalysis {
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentReplyParamMap
(
null
,
comment_id
,
article_id
,
reply_id
);
Map
<
String
,
Object
>
paramMap
=
HeadGet
.
getQQKBCommentReplyParamMap
(
null
,
comment_id
,
article_id
,
reply_id
);
while
(
true
)
{
while
(
true
)
{
ZhiWeiTools
.
sleep
(
3000
);
ZhiWeiTools
.
sleep
(
3000
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsOrigReplyComment"
,
headerMap
,
paramMap
);
String
result
=
HttpClient
.
executeHttpRequestPost
(
"http://r.cnews.qq.com/getQQNewsOrigReplyComment"
,
proxy
,
headerMap
,
paramMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
if
(
json
.
getJSONObject
(
"comments"
).
getString
(
"reply_list"
)
==
null
)
{
if
(
json
.
getJSONObject
(
"comments"
).
getString
(
"reply_list"
)
==
null
)
{
break
;
break
;
...
...
src/main/java/com/zhiwei/parse/analysis/SouhuAccountAnalysis.java
View file @
e77ce092
...
@@ -25,11 +25,11 @@ public class SouhuAccountAnalysis {
...
@@ -25,11 +25,11 @@ public class SouhuAccountAnalysis {
* @param startTime
* @param startTime
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
analysisData
(
JSONArray
jsonArray
)
{
public
List
<
Map
<
String
,
Object
>>
analysisData
(
JSONArray
jsonArray
,
String
name
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
Map
<
String
,
Object
>
map
=
parseHtmlByAccount
(
data
);
Map
<
String
,
Object
>
map
=
parseHtmlByAccount
(
data
,
name
);
if
(
map
!=
null
)
{
if
(
map
!=
null
)
{
dataList
.
add
(
map
);
dataList
.
add
(
map
);
}
}
...
@@ -45,11 +45,12 @@ public class SouhuAccountAnalysis {
...
@@ -45,11 +45,12 @@ public class SouhuAccountAnalysis {
* @param data
* @param data
* @return
* @return
*/
*/
private
static
Map
<
String
,
Object
>
parseHtmlByAccount
(
JSONObject
data
)
{
private
static
Map
<
String
,
Object
>
parseHtmlByAccount
(
JSONObject
data
,
String
name
)
{
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
Map
<
String
,
Object
>
map
=
new
HashMap
<
String
,
Object
>();
try
{
try
{
String
title
=
data
.
getString
(
"title"
);
String
title
=
data
.
getString
(
"title"
);
map
.
put
(
"title"
,
URLDecoder
.
decode
(
title
,
"UTF-8"
));
map
.
put
(
"title"
,
URLDecoder
.
decode
(
title
,
"UTF-8"
));
map
.
put
(
"source"
,
name
);
String
content
=
data
.
getString
(
"brief"
);
String
content
=
data
.
getString
(
"brief"
);
map
.
put
(
"content"
,
URLDecoder
.
decode
(
content
,
"UTF-8"
));
map
.
put
(
"content"
,
URLDecoder
.
decode
(
content
,
"UTF-8"
));
map
.
put
(
"newsPv"
,
data
.
getString
(
"newsPv"
));
map
.
put
(
"newsPv"
,
data
.
getString
(
"newsPv"
));
...
...
src/main/java/com/zhiwei/parse/analysis/SouhuCommentAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.Map
;
...
@@ -39,11 +40,11 @@ public class SouhuCommentAnalysis {
...
@@ -39,11 +40,11 @@ public class SouhuCommentAnalysis {
return
newurl
;
return
newurl
;
}
}
public
int
getSouhuCommentCount
(
String
url
)
{
public
int
getSouhuCommentCount
(
String
url
,
Proxy
proxy
)
{
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
Map
<
String
,
String
>
headerMap
=
HeadGet
.
getSouhuCommentHeaderMap
(
null
);
int
i
;
int
i
;
try
{
try
{
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
headerMap
);
String
result
=
HttpClient
.
executeHttpRequestGet
(
url
,
proxy
,
headerMap
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
i
=
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"participation_sum"
);
i
=
json
.
getJSONObject
(
"jsonObject"
).
getInteger
(
"participation_sum"
);
return
i
;
return
i
;
...
...
src/main/java/com/zhiwei/parse/analysis/WangyiCommentAnalysis.java
View file @
e77ce092
...
@@ -14,6 +14,7 @@ public class WangyiCommentAnalysis {
...
@@ -14,6 +14,7 @@ public class WangyiCommentAnalysis {
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WangyiCommentAnalysis
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WangyiCommentAnalysis
.
class
);
@SuppressWarnings
(
"unchecked"
)
public
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
result
,
List
<
String
>
idList
)
{
public
List
<
Map
<
String
,
Object
>>
getWangyiCommentData
(
String
result
,
List
<
String
>
idList
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
dataList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
...
...
src/main/java/com/zhiwei/parse/analysis/YidianzixunCommentAnalysis.java
View file @
e77ce092
package
com
.
zhiwei
.
parse
.
analysis
;
package
com
.
zhiwei
.
parse
.
analysis
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
...
@@ -41,12 +42,12 @@ public class YidianzixunCommentAnalysis {
...
@@ -41,12 +42,12 @@ public class YidianzixunCommentAnalysis {
* @return
* @return
*/
*/
public
List
<
Map
<
String
,
Object
>>
getrepliesData
(
Map
<
String
,
Object
>
map
,
String
cookie
,
public
List
<
Map
<
String
,
Object
>>
getrepliesData
(
Map
<
String
,
Object
>
map
,
String
cookie
,
Map
<
String
,
String
>
headerMap
)
{
Map
<
String
,
String
>
headerMap
,
Proxy
proxy
)
{
List
<
Map
<
String
,
Object
>>
replylists
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
replylists
=
new
ArrayList
<
Map
<
String
,
Object
>>();
try
{
try
{
String
replyurl
=
"http://www.yidianzixun.com/home/q/getmorereplies?comment_id="
String
replyurl
=
"http://www.yidianzixun.com/home/q/getmorereplies?comment_id="
+
map
.
get
(
"comment_id"
);
+
map
.
get
(
"comment_id"
);
String
replyresult
=
HttpClient
.
executeHttpRequestGet
(
replyurl
,
headerMap
);
String
replyresult
=
HttpClient
.
executeHttpRequestGet
(
replyurl
,
proxy
,
headerMap
);
JSONObject
replyjson
=
JSONObject
.
parseObject
(
replyresult
);
JSONObject
replyjson
=
JSONObject
.
parseObject
(
replyresult
);
JSONArray
replyjsonArry
=
replyjson
.
getJSONObject
(
"comment"
).
getJSONArray
(
"replies"
);
JSONArray
replyjsonArry
=
replyjson
.
getJSONObject
(
"comment"
).
getJSONArray
(
"replies"
);
for
(
int
j
=
0
;
j
<
replyjsonArry
.
size
();
j
++)
{
for
(
int
j
=
0
;
j
<
replyjsonArry
.
size
();
j
++)
{
...
...
src/test/java/com/zhiwei/crawler/AiqiyiByWordExample.java
View file @
e77ce092
...
@@ -18,7 +18,7 @@ public class AiqiyiByWordExample {
...
@@ -18,7 +18,7 @@ public class AiqiyiByWordExample {
String
[]
words
=
word
.
split
(
","
);
String
[]
words
=
word
.
split
(
","
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
);
List
<
Map
<
String
,
Object
>>
dataList
=
Aiqiyi
.
getAiqiyiByWordData
(
w
,
null
);
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
if
(
dataList
!=
null
&&
dataList
.
size
()
>=
1
)
{
bodyList
.
addAll
(
dataList
);
bodyList
.
addAll
(
dataList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/BaijiaAccountExample.java
View file @
e77ce092
...
@@ -16,7 +16,7 @@ public class BaijiaAccountExample {
...
@@ -16,7 +16,7 @@ public class BaijiaAccountExample {
String
app_id
=
"1536766390576806"
;
String
app_id
=
"1536766390576806"
;
String
startTime
=
"2016-01-01 00:00:00"
;
String
startTime
=
"2016-01-01 00:00:00"
;
//2017-11-30 17:48:17
//2017-11-30 17:48:17
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccountData
(
app_id
,
startTime
);
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccountData
(
app_id
,
startTime
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
@@ -34,7 +34,7 @@ public class BaijiaAccountExample {
...
@@ -34,7 +34,7 @@ public class BaijiaAccountExample {
String
app_id
=
"b_1536766390576806"
;
String
app_id
=
"b_1536766390576806"
;
String
startTime
=
"2016-01-01 00:00:00"
;
String
startTime
=
"2016-01-01 00:00:00"
;
//2017-11-30 17:48:17
//2017-11-30 17:48:17
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccount2Data
(
app_id
,
startTime
);
List
<
Map
<
String
,
Object
>>
lists
=
Baijia
.
getBaijiaAccount2Data
(
app_id
,
startTime
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
...
src/test/java/com/zhiwei/crawler/DayuAccountExample.java
View file @
e77ce092
...
@@ -39,7 +39,7 @@ public class DayuAccountExample {
...
@@ -39,7 +39,7 @@ public class DayuAccountExample {
if
(
mid
.
length
()
<
1
&&
name
.
length
()
<
1
)
{
if
(
mid
.
length
()
<
1
&&
name
.
length
()
<
1
)
{
continue
;
continue
;
}
}
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuAccountData
(
mid
,
name
,
null
);
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuAccountData
(
mid
,
name
,
null
,
null
);
poi
.
exportExcel
(
path
,
name
,
headList
,
dataList
);
poi
.
exportExcel
(
path
,
name
,
headList
,
dataList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/DayuByWordExample.java
View file @
e77ce092
...
@@ -14,7 +14,7 @@ public class DayuByWordExample {
...
@@ -14,7 +14,7 @@ public class DayuByWordExample {
public
void
dayuByWordTest
()
{
public
void
dayuByWordTest
()
{
String
word
=
"沃尔玛"
;
String
word
=
"沃尔玛"
;
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuByWordData
(
word
);
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuByWordData
(
word
,
null
);
System
.
out
.
println
(
dataList
.
size
());
System
.
out
.
println
(
dataList
.
size
());
...
...
src/test/java/com/zhiwei/crawler/DayuCommentCountExample.java
View file @
e77ce092
...
@@ -10,7 +10,7 @@ public class DayuCommentCountExample {
...
@@ -10,7 +10,7 @@ public class DayuCommentCountExample {
public
void
dayuCommentCountTest
()
{
public
void
dayuCommentCountTest
()
{
String
articleId
=
"6987993456991247474"
;
String
articleId
=
"6987993456991247474"
;
int
i
=
Dayu
.
getDayuCommentCount
(
articleId
);
int
i
=
Dayu
.
getDayuCommentCount
(
articleId
,
null
);
System
.
out
.
println
(
i
);
System
.
out
.
println
(
i
);
}
}
...
...
src/test/java/com/zhiwei/crawler/DayuCommentExample.java
View file @
e77ce092
...
@@ -32,7 +32,7 @@ public class DayuCommentExample {
...
@@ -32,7 +32,7 @@ public class DayuCommentExample {
}
else
{
}
else
{
articleId
=
url
;
articleId
=
url
;
}
}
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuCommentData
(
articleId
);
List
<
Map
<
String
,
Object
>>
dataList
=
Dayu
.
getDayuCommentData
(
articleId
,
null
);
if
(
dataList
.
size
()
<=
0
)
{
if
(
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
urlList
.
add
(
url
);
}
}
...
...
src/test/java/com/zhiwei/crawler/FenghuangAccountExample.java
View file @
e77ce092
...
@@ -20,7 +20,7 @@ public class FenghuangAccountExample {
...
@@ -20,7 +20,7 @@ public class FenghuangAccountExample {
for
(
int
i
=
0
;
i
<
ids
.
length
;
i
++)
{
for
(
int
i
=
0
;
i
<
ids
.
length
;
i
++)
{
try
{
try
{
String
startTime
=
"2017-01-01 00:00:00"
;
//可为空
String
startTime
=
"2017-01-01 00:00:00"
;
//可为空
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangAccountData
(
ids
[
i
],
startTime
);
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangAccountData
(
ids
[
i
],
startTime
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
...
src/test/java/com/zhiwei/crawler/FenghuangByWordExample.java
View file @
e77ce092
...
@@ -19,7 +19,7 @@ public class FenghuangByWordExample {
...
@@ -19,7 +19,7 @@ public class FenghuangByWordExample {
List
<
Map
<
String
,
Object
>>
listAll
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
listAll
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
try
{
try
{
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangByWord
(
word
);
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangByWord
(
word
,
null
);
if
(
dataList
!=
null
&&
dataList
.
size
()
>
0
)
{
if
(
dataList
!=
null
&&
dataList
.
size
()
>
0
)
{
listAll
.
addAll
(
dataList
);
listAll
.
addAll
(
dataList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/FenghuangCommentCountExample.java
View file @
e77ce092
...
@@ -14,7 +14,7 @@ public class FenghuangCommentCountExample {
...
@@ -14,7 +14,7 @@ public class FenghuangCommentCountExample {
String
url
=
"http://wemedia.ifeng.com/40906977/wemedia.shtml"
;
String
url
=
"http://wemedia.ifeng.com/40906977/wemedia.shtml"
;
//http://news.ifeng.com/a/20161229/50492484_0.shtml
//http://news.ifeng.com/a/20161229/50492484_0.shtml
//http://wemedia.ifeng.com/4096977/wemedia.shtml
//http://wemedia.ifeng.com/4096977/wemedia.shtml
Map
<
String
,
Object
>
map
=
Fenghuang
.
getFenghuangCommentCount
(
url
);
Map
<
String
,
Object
>
map
=
Fenghuang
.
getFenghuangCommentCount
(
url
,
null
);
System
.
out
.
println
(
map
.
toString
());
System
.
out
.
println
(
map
.
toString
());
}
}
...
...
src/test/java/com/zhiwei/crawler/FenghuangCommentExample.java
View file @
e77ce092
...
@@ -24,7 +24,7 @@ public class FenghuangCommentExample {
...
@@ -24,7 +24,7 @@ public class FenghuangCommentExample {
try
{
try
{
url
=
map1
.
get
(
"url"
)+
""
;
url
=
map1
.
get
(
"url"
)+
""
;
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangCommentData
(
url
);
List
<
Map
<
String
,
Object
>>
dataList
=
Fenghuang
.
getFenghuangCommentData
(
url
,
null
);
if
(
dataList
.
size
()
<=
0
)
{
if
(
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
urlList
.
add
(
url
);
}
}
...
...
src/test/java/com/zhiwei/crawler/MeipaiByWordExample.java
View file @
e77ce092
...
@@ -17,7 +17,7 @@ public class MeipaiByWordExample {
...
@@ -17,7 +17,7 @@ public class MeipaiByWordExample {
String
[]
words
=
word
.
split
(
","
);
String
[]
words
=
word
.
split
(
","
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
Meipai
.
getMeipaiByWordData
(
w
);
List
<
Map
<
String
,
Object
>>
dataList
=
Meipai
.
getMeipaiByWordData
(
w
,
null
);
if
(
dataList
!=
null
)
{
if
(
dataList
!=
null
)
{
bodyList
.
addAll
(
dataList
);
bodyList
.
addAll
(
dataList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/MiaopaiByUrlExample.java
View file @
e77ce092
...
@@ -31,7 +31,7 @@ public class MiaopaiByUrlExample {
...
@@ -31,7 +31,7 @@ public class MiaopaiByUrlExample {
urlList
.
add
(
url
);
urlList
.
add
(
url
);
ZhiWeiTools
.
sleep
(
5000
);
ZhiWeiTools
.
sleep
(
5000
);
System
.
out
.
println
(
url
);
System
.
out
.
println
(
url
);
Map
<
String
,
Object
>
dataMap
=
Miaopai
.
getMiaopaiDataByURL
(
url
);
Map
<
String
,
Object
>
dataMap
=
Miaopai
.
getMiaopaiDataByURL
(
url
,
null
);
if
(
dataMap
!=
null
)
{
if
(
dataMap
!=
null
)
{
bodyList
.
add
(
dataMap
);
bodyList
.
add
(
dataMap
);
}
}
...
...
src/test/java/com/zhiwei/crawler/PearVideoByWordExample.java
View file @
e77ce092
...
@@ -15,7 +15,7 @@ public class PearVideoByWordExample {
...
@@ -15,7 +15,7 @@ public class PearVideoByWordExample {
public
void
pearVideoByWordTest
()
{
public
void
pearVideoByWordTest
()
{
String
word
=
"美食"
;
String
word
=
"美食"
;
List
<
Map
<
String
,
Object
>>
bodyList
=
PearVideo
.
getPearVideoData
(
word
);
List
<
Map
<
String
,
Object
>>
bodyList
=
PearVideo
.
getPearVideoData
(
word
,
null
);
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"time"
);
headList
.
add
(
"time"
);
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
...
src/test/java/com/zhiwei/crawler/QQAccountExample.java
View file @
e77ce092
...
@@ -24,7 +24,7 @@ public class QQAccountExample {
...
@@ -24,7 +24,7 @@ public class QQAccountExample {
String
child
=
map
.
get
(
"帐号链接"
)+
""
;
String
child
=
map
.
get
(
"帐号链接"
)+
""
;
// System.out.println(child.split("chlid=")[1]);
// System.out.println(child.split("chlid=")[1]);
System
.
out
.
println
((
String
)
map
.
get
(
"child"
));
System
.
out
.
println
((
String
)
map
.
get
(
"child"
));
List
<
Map
<
String
,
Object
>>
lists
=
QQKB
.
getQQAccountData
((
String
)
map
.
get
(
"child"
),
cookie
);
List
<
Map
<
String
,
Object
>>
lists
=
QQKB
.
getQQAccountData
((
String
)
map
.
get
(
"child"
),
cookie
,
null
);
if
(
lists
!=
null
)
{
if
(
lists
!=
null
)
{
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
map1
.
put
(
"name"
,
map
.
get
(
"呢称"
));
map1
.
put
(
"name"
,
map
.
get
(
"呢称"
));
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentCountExample.java
View file @
e77ce092
...
@@ -12,7 +12,7 @@ public class QQKBCommentCountExample {
...
@@ -12,7 +12,7 @@ public class QQKBCommentCountExample {
String
cookie
=
"phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"
;
String
cookie
=
"phone_id=;%20phone_token=;%20luin=o0497332654;%20lskey=0003000049dd058f533cbebb240223ede63b864224f7eebe0f4aeca6a623572bb290a5800741d191a5768bb0;%20uin=o0497332654;%20skey=MIZmc2Oel3;%20sigA2=4282ABA809551D3534C72F999EE8F2A75219ED9452DEF04E4CBCE6B680C2C893C3E1BA617F5E0F387E558888B2ABEDFE87A4A25B16F9066C1154B2BC7A1133CA7B356AB9D3BA26ED;%20openid=oijc7uLZNVp85r-MpFBYxwxzdEkg;%20appid=wxe90c9765ad00e2cd;%20access_token=5_IWbsKfygpJ0lnbUnnFnfwgGT4n96Oq-jHALnMUe8UzpoJghQDouvfSSWdh-JOdgAm3jRJUPbux6fcIPghoNxo24xdED8ennAANksJuHiwdw;%20refresh_token=5_IWbsKfygpJ0lnbUnnFnfwgDT4pA9HEbY-wuqnqIHWf9AzdmIueZFuzHYfnZPuSNEc0ZjDuHXrtSrRBMMD-7kgj06iF0NdAOi-KRj6-mrmlA;%20unionid=onCs1uNNpjMXeYIHAhacGypamEBk;%20logintype=0"
;
String
url
=
""
;
String
url
=
""
;
int
i
=
QQKB
.
getCommentCount
(
cookie
,
url
);
int
i
=
QQKB
.
getCommentCount
(
cookie
,
url
,
null
);
System
.
out
.
println
(
i
);
System
.
out
.
println
(
i
);
}
}
...
...
src/test/java/com/zhiwei/crawler/QQKBCommentExample.java
View file @
e77ce092
...
@@ -16,7 +16,7 @@ public class QQKBCommentExample {
...
@@ -16,7 +16,7 @@ public class QQKBCommentExample {
public
void
qqkbCommentTest
()
{
public
void
qqkbCommentTest
()
{
String
url
=
"https://kuaibao.qq.com/s/20180116C0EA8G00"
;
String
url
=
"https://kuaibao.qq.com/s/20180116C0EA8G00"
;
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
url
);
List
<
Map
<
String
,
Object
>>
dataList
=
QQKB
.
getQQKBCommentData
(
url
,
null
);
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"reply_id"
);
//id
headList
.
add
(
"reply_id"
);
//id
headList
.
add
(
"like"
);
//点赞数
headList
.
add
(
"like"
);
//点赞数
...
...
src/test/java/com/zhiwei/crawler/SoKuByWordExample.java
View file @
e77ce092
...
@@ -20,7 +20,7 @@ public class SoKuByWordExample {
...
@@ -20,7 +20,7 @@ public class SoKuByWordExample {
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
for
(
String
t
:
types
)
{
for
(
String
t
:
types
)
{
List
<
Map
<
String
,
Object
>>
list
=
Soku
.
getSoKuByWordData
(
w
,
t
);
List
<
Map
<
String
,
Object
>>
list
=
Soku
.
getSoKuByWordData
(
w
,
t
,
null
);
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
bodyList
.
addAll
(
list
);
bodyList
.
addAll
(
list
);
}
}
...
...
src/test/java/com/zhiwei/crawler/SouhuAccountExample.java
View file @
e77ce092
...
@@ -16,7 +16,7 @@ public class SouhuAccountExample {
...
@@ -16,7 +16,7 @@ public class SouhuAccountExample {
@Test
@Test
public
void
souhuAccountTest
()
{
public
void
souhuAccountTest
()
{
List
<
Map
<
String
,
Object
>>
lists
=
Souhu
.
getSouHuAccountData
(
"MjI5MzAyOTMyMEBzaW5hLnNvaHUuY29t"
,
"2016-01-01 00:00:00"
,
false
);
List
<
Map
<
String
,
Object
>>
lists
=
Souhu
.
getSouHuAccountData
(
"MjI5MzAyOTMyMEBzaW5hLnNvaHUuY29t"
,
"2016-01-01 00:00:00"
,
false
,
null
);
System
.
out
.
println
(
lists
.
size
());
System
.
out
.
println
(
lists
.
size
());
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
@@ -26,6 +26,7 @@ public class SouhuAccountExample {
...
@@ -26,6 +26,7 @@ public class SouhuAccountExample {
headList
.
add
(
"comment"
);
headList
.
add
(
"comment"
);
headList
.
add
(
"tags"
);
headList
.
add
(
"tags"
);
headList
.
add
(
"newsid"
);
headList
.
add
(
"newsid"
);
headList
.
add
(
"source"
);
headList
.
add
(
"newsPv"
);
headList
.
add
(
"newsPv"
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
poi
.
exportExcel
(
"D:\\crawlerdata\\搜狐号历史文章-太保乱谈.xlsx"
,
"太保乱谈"
,
headList
,
lists
);
poi
.
exportExcel
(
"D:\\crawlerdata\\搜狐号历史文章-太保乱谈.xlsx"
,
"太保乱谈"
,
headList
,
lists
);
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentCountExample.java
View file @
e77ce092
...
@@ -11,7 +11,7 @@ public class SouhuCommentCountExample {
...
@@ -11,7 +11,7 @@ public class SouhuCommentCountExample {
public
void
souhuCommentCountTest
()
{
public
void
souhuCommentCountTest
()
{
String
url
=
"https://www.sohu.com/a/210588884_267106?_f=index_news_7"
;
String
url
=
"https://www.sohu.com/a/210588884_267106?_f=index_news_7"
;
int
i
=
Souhu
.
getSouhuCommentCount
(
url
);
int
i
=
Souhu
.
getSouhuCommentCount
(
url
,
null
);
System
.
out
.
println
(
i
);
System
.
out
.
println
(
i
);
}
}
...
...
src/test/java/com/zhiwei/crawler/SouhuCommentExample.java
View file @
e77ce092
...
@@ -25,7 +25,7 @@ public class SouhuCommentExample {
...
@@ -25,7 +25,7 @@ public class SouhuCommentExample {
try
{
try
{
url
=
map1
.
get
(
"url"
)+
""
;
url
=
map1
.
get
(
"url"
)+
""
;
List
<
Map
<
String
,
Object
>>
dataList
=
Souhu
.
getSouhuCommentData
(
url
);
List
<
Map
<
String
,
Object
>>
dataList
=
Souhu
.
getSouhuCommentData
(
url
,
null
);
if
(
dataList
.
size
()
<=
0
)
{
if
(
dataList
.
size
()
<=
0
)
{
urlList
.
add
(
url
);
urlList
.
add
(
url
);
}
}
...
...
src/test/java/com/zhiwei/crawler/TXNewsByWordExample.java
View file @
e77ce092
...
@@ -11,7 +11,7 @@ public class TXNewsByWordExample {
...
@@ -11,7 +11,7 @@ public class TXNewsByWordExample {
public
static
void
main
(
String
[]
args
)
{
public
static
void
main
(
String
[]
args
)
{
String
word
=
"唐嫣"
;
String
word
=
"唐嫣"
;
List
<
Map
<
String
,
Object
>>
dataList
=
TXNews
.
getData
(
word
);
List
<
Map
<
String
,
Object
>>
dataList
=
TXNews
.
getData
(
word
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
...
src/test/java/com/zhiwei/crawler/WangyiCommentCountExample.java
View file @
e77ce092
...
@@ -10,7 +10,7 @@ public class WangyiCommentCountExample {
...
@@ -10,7 +10,7 @@ public class WangyiCommentCountExample {
public
void
wangyiCommentCountTest
()
{
public
void
wangyiCommentCountTest
()
{
String
id
=
"D77CENT50001875P"
;
String
id
=
"D77CENT50001875P"
;
int
i
=
Wangyi
.
getWangyiCommentCount
(
id
);
int
i
=
Wangyi
.
getWangyiCommentCount
(
id
,
null
);
System
.
out
.
println
(
i
);
System
.
out
.
println
(
i
);
}
}
...
...
src/test/java/com/zhiwei/crawler/WangyiCommentExample.java
View file @
e77ce092
...
@@ -22,7 +22,7 @@ public class WangyiCommentExample {
...
@@ -22,7 +22,7 @@ public class WangyiCommentExample {
for
(
String
url
:
urlList
)
{
for
(
String
url
:
urlList
)
{
String
id
=
url
.
split
(
"a/"
)[
1
].
split
(
".ht"
)[
0
];
String
id
=
url
.
split
(
"a/"
)[
1
].
split
(
".ht"
)[
0
];
List
<
Map
<
String
,
Object
>>
lists
=
Wangyi
.
getWangyiCommentData
(
id
);
List
<
Map
<
String
,
Object
>>
lists
=
Wangyi
.
getWangyiCommentData
(
id
,
null
);
System
.
out
.
println
(
lists
.
size
());
System
.
out
.
println
(
lists
.
size
());
if
(
lists
!=
null
)
{
if
(
lists
!=
null
)
{
bodyList
.
addAll
(
lists
);
bodyList
.
addAll
(
lists
);
...
...
src/test/java/com/zhiwei/crawler/XiaomiShequByWordExample.java
View file @
e77ce092
...
@@ -15,7 +15,7 @@ public class XiaomiShequByWordExample {
...
@@ -15,7 +15,7 @@ public class XiaomiShequByWordExample {
String
[]
words
=
word
.
split
(
","
);
String
[]
words
=
word
.
split
(
","
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
Xiaomi
.
getXiaomiByWordData
(
w
);
List
<
Map
<
String
,
Object
>>
dataList
=
Xiaomi
.
getXiaomiByWordData
(
w
,
null
);
if
(
dataList
!=
null
&&
dataList
.
size
()
>
0
)
{
if
(
dataList
!=
null
&&
dataList
.
size
()
>
0
)
{
bodyList
.
addAll
(
dataList
);
bodyList
.
addAll
(
dataList
);
}
}
...
...
src/test/java/com/zhiwei/crawler/XiguaAccountExample.java
View file @
e77ce092
...
@@ -23,7 +23,7 @@ public class XiguaAccountExample {
...
@@ -23,7 +23,7 @@ public class XiguaAccountExample {
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
for
(
Map
<
String
,
Object
>
map1
:
lists
)
{
String
url
=
map1
.
get
(
"主页"
)+
""
;
String
url
=
map1
.
get
(
"主页"
)+
""
;
if
(
url
!=
null
&&
url
.
length
()
>
5
)
{
if
(
url
!=
null
&&
url
.
length
()
>
5
)
{
List
<
Map
<
String
,
Object
>>
lists1
=
XiGua
.
getXiguaAccountData
(
url
,
startTime
);
List
<
Map
<
String
,
Object
>>
lists1
=
XiGua
.
getXiguaAccountData
(
url
,
startTime
,
null
);
if
(
lists1
!=
null
&&
lists
.
size
()
>
0
)
{
if
(
lists1
!=
null
&&
lists
.
size
()
>
0
)
{
bodyList
.
addAll
(
lists1
);
bodyList
.
addAll
(
lists1
);
}
}
...
...
src/test/java/com/zhiwei/crawler/XiguaByWordExample.java
View file @
e77ce092
...
@@ -19,7 +19,7 @@ public class XiguaByWordExample {
...
@@ -19,7 +19,7 @@ public class XiguaByWordExample {
String
[]
words
=
word
.
split
(
","
);
String
[]
words
=
word
.
split
(
","
);
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
bodyList
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
w
:
words
)
{
for
(
String
w
:
words
)
{
List
<
Map
<
String
,
Object
>>
list
=
XiGua
.
getXiguaVideoByWordData
(
w
);
List
<
Map
<
String
,
Object
>>
list
=
XiGua
.
getXiguaVideoByWordData
(
w
,
null
);
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
if
(
list
!=
null
&&
list
.
size
()
>
0
)
{
bodyList
.
addAll
(
list
);
bodyList
.
addAll
(
list
);
}
}
...
...
src/test/java/com/zhiwei/crawler/YidainzixunByWordExample.java
View file @
e77ce092
...
@@ -18,7 +18,7 @@ public class YidainzixunByWordExample {
...
@@ -18,7 +18,7 @@ public class YidainzixunByWordExample {
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata/关键词.txt"
);
List
<
String
>
wordList
=
WordReadFile
.
getWords
(
"D://crawlerdata/关键词.txt"
);
List
<
Map
<
String
,
Object
>>
listAll
=
new
ArrayList
<
Map
<
String
,
Object
>>();
List
<
Map
<
String
,
Object
>>
listAll
=
new
ArrayList
<
Map
<
String
,
Object
>>();
for
(
String
word
:
wordList
)
{
for
(
String
word
:
wordList
)
{
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunDataByWord
(
word
);
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunDataByWord
(
word
,
null
);
System
.
out
.
println
(
dataList
.
size
());
System
.
out
.
println
(
dataList
.
size
());
listAll
.
addAll
(
dataList
);
listAll
.
addAll
(
dataList
);
System
.
out
.
println
(
listAll
.
size
());
System
.
out
.
println
(
listAll
.
size
());
...
...
src/test/java/com/zhiwei/crawler/YidianzixunAccountExample.java
View file @
e77ce092
...
@@ -16,7 +16,7 @@ public class YidianzixunAccountExample {
...
@@ -16,7 +16,7 @@ public class YidianzixunAccountExample {
public
void
yidianzixunAccountTest
()
{
public
void
yidianzixunAccountTest
()
{
String
channelid
=
"m143901"
;
String
channelid
=
"m143901"
;
String
startTime
=
"2017-01-01 00:00:00"
;
String
startTime
=
"2017-01-01 00:00:00"
;
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunAccountData
(
channelid
,
startTime
);
List
<
Map
<
String
,
Object
>>
dataList
=
Yidianzixun
.
getYidianzixunAccountData
(
channelid
,
startTime
,
null
);
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
PoiExcelUtil
poi
=
PoiExcelUtil
.
getInstance
();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
List
<
String
>
headList
=
new
ArrayList
<
String
>();
headList
.
add
(
"title"
);
headList
.
add
(
"title"
);
...
...
src/test/java/com/zhiwei/crawler/YidianzixunCommentExample.java
View file @
e77ce092
...
@@ -12,7 +12,7 @@ public class YidianzixunCommentExample {
...
@@ -12,7 +12,7 @@ public class YidianzixunCommentExample {
@Test
@Test
public
void
yidianzixunCommentTest
()
{
public
void
yidianzixunCommentTest
()
{
String
url
=
"http://www.yidianzixun.com/article/0ILHigvv"
;
String
url
=
"http://www.yidianzixun.com/article/0ILHigvv"
;
List
<
Map
<
String
,
Object
>>
lists
=
Yidianzixun
.
getYidianzixunCommentData
(
url
);
List
<
Map
<
String
,
Object
>>
lists
=
Yidianzixun
.
getYidianzixunCommentData
(
url
,
null
);
System
.
out
.
println
(
lists
.
size
());
System
.
out
.
println
(
lists
.
size
());
for
(
Map
<
String
,
Object
>
map
:
lists
)
{
for
(
Map
<
String
,
Object
>
map
:
lists
)
{
System
.
out
.
println
(
map
.
toString
());
System
.
out
.
println
(
map
.
toString
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment