Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
f60e48bb
Commit
f60e48bb
authored
Jul 19, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
新增淘宝热搜采集功能 See merge request
!103
parents
57cecc63
666de369
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
396 additions
and
17 deletions
+396
-17
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
+1
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
+108
-0
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchCrawlerTest.java
+33
-12
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchRun.java
+40
-0
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoRunTest.java
+25
-0
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+13
-1
src/main/java/com/zhiwei/searchhotcrawler/util/TaoBaoUtils.java
+5
-4
src/main/resources/taobao.js
+171
-0
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
View file @
f60e48bb
...
@@ -27,4 +27,5 @@ public enum HotSearchType {
...
@@ -27,4 +27,5 @@ public enum HotSearchType {
虎嗅热文推荐
,
虎嗅热文推荐
,
快手热榜
,
快手热榜
,
淘宝热搜
,
淘宝热搜
,
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/TaoBaoHotSearchCrawler.java
0 → 100644
View file @
f60e48bb
package
com
.
zhiwei
.
searchhotcrawler
.
crawler
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.util.TaoBaoUtils
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
java.time.Duration
;
import
java.util.*
;
/**
* @author ll
* @ClassName:TaoBaoHotSearchCrawler
* @Description:
* @date 2021年6月18日 下午16:33:31
*/
@Log4j2
public
class
TaoBaoHotSearchCrawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
throwException
(
false
).
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
public
static
List
<
HotSearchList
>
taoBaoHotSearch
(
Date
date
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
String
htmlBody
=
null
;
Boolean
ht
=
false
;
long
time
=
new
Date
().
getTime
();
for
(
int
t
=
0
;
t
<
3
&&
ht
==
false
;
t
++)
{
String
signss
=
"undefined&"
+
time
+
"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"
;
String
sig
=
TaoBaoUtils
.
parsJSFunction
(
signss
);
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sig
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
List
<
String
>
values
=
response
.
networkResponse
().
headers
().
values
(
"Set-Cookie"
);
String
tk
=
values
.
get
(
1
);
String
[]
splitTk
=
tk
.
split
(
";"
);
String
_m_h5_tk
=
splitTk
[
0
];
token
=
_m_h5_tk
.
substring
(
9
,
41
);
String
enc
=
values
.
get
(
2
);
String
[]
splitEnc
=
enc
.
split
(
";"
);
String
_m_h5_tk_enc
=
splitEnc
[
0
];
headerMap
.
put
(
"cookie"
,
_m_h5_tk
+
";"
+
_m_h5_tk_enc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
e
);
}
String
signs
=
token
+
"&"
+
time
+
"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"
;
String
sign
=
TaoBaoUtils
.
parsJSFunction
(
signs
);
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
ht
=
!
htmlBody
.
contains
(
"非法请求"
);
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
e
);
}
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"data"
))
{
return
ansysData
(
htmlBody
,
date
);
}
else
{
log
.
info
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
);
}
}
return
Collections
.
emptyList
();
}
private
static
List
<
HotSearchList
>
ansysData
(
String
htmlBody
,
Date
date
)
{
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
try
{
String
sub
=
htmlBody
.
substring
(
htmlBody
.
indexOf
(
"searchdoor"
),
htmlBody
.
indexOf
(
"searchdoorFrom"
));
String
substring
=
sub
.
substring
(
sub
.
indexOf
(
"showReminder"
)
+
27
,
sub
.
indexOf
(
"multi_bangdan_flag"
)
-
2
).
trim
();
JSONArray
objects
=
JSONObject
.
parseArray
(
substring
);
JSONArray
jsonArray
=
objects
.
getJSONObject
(
0
).
getJSONObject
(
"result"
).
getJSONArray
(
"text"
);
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
try
{
JSONObject
jsonObject
=
jsonArray
.
getJSONObject
(
i
);
//获取标题
String
name
=
jsonObject
.
getString
(
"showtext"
);
//获取排名
String
showmark
=
jsonObject
.
getString
(
"showmark"
);
Integer
rank
=
Integer
.
valueOf
(
showmark
);
String
searchtext
=
jsonObject
.
getString
(
"searchtext"
);
//获取链接
String
url
=
"https://s.m.taobao.com/h5?q="
+
searchtext
;
//获取标签
String
tagText
=
jsonObject
.
getString
(
"tagText"
);
if
(
""
.
equals
(
tagText
))
{
tagText
=
null
;
}
//String tagText = jsonObject.getString("tagText");
Long
count
=
null
;
HotSearchList
hotSearchList
=
new
HotSearchList
(
url
,
name
,
count
,
true
,
rank
,
HotSearchType
.
淘宝热搜
.
name
(),
tagText
,
date
);
list
.
add
(
hotSearchList
);
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误"
,
e
);
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,数据不是json结构"
,
e
);
}
return
list
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchCrawlerTest.java
View file @
f60e48bb
...
@@ -27,20 +27,38 @@ public class TaoBaoHotSearchCrawlerTest {
...
@@ -27,20 +27,38 @@ public class TaoBaoHotSearchCrawlerTest {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
throwException
(
false
).
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
throwException
(
false
).
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
public
static
List
<
HotSearchList
>
taoBaoHotSearch
(
Date
date
)
{
public
static
List
<
HotSearchList
>
taoBaoHotSearch
(
Date
date
)
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
String
htmlBody
=
null
;
Boolean
ht
=
false
;
long
time
=
new
Date
().
getTime
();
long
time
=
new
Date
().
getTime
();
String
signs
=
"undefined&"
+
time
+
"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"
;
for
(
int
t
=
0
;
t
<
3
&&
ht
==
false
;
t
++)
{
//String signs="undefined&1624862377708&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String
signss
=
"undefined&"
+
time
+
"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"
;
String
sig
=
TaoBaoUtils
.
parsJSFunction
(
signss
);
String
urls
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sig
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Request
request1
=
RequestUtils
.
wrapGet
(
urls
);
String
token
=
null
;
try
(
Response
response
=
httpBoot
.
syncCall
(
request1
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
List
<
String
>
values
=
response
.
networkResponse
().
headers
().
values
(
"Set-Cookie"
);
String
tk
=
values
.
get
(
1
);
String
[]
splitTk
=
tk
.
split
(
";"
);
String
_m_h5_tk
=
splitTk
[
0
];
token
=
_m_h5_tk
.
substring
(
9
,
41
);
String
enc
=
values
.
get
(
2
);
String
[]
splitEnc
=
enc
.
split
(
";"
);
String
_m_h5_tk_enc
=
splitEnc
[
0
];
headerMap
.
put
(
"cookie"
,
_m_h5_tk
+
";"
+
_m_h5_tk_enc
);
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
e
);
}
String
signs
=
token
+
"&"
+
time
+
"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"
;
String
sign
=
TaoBaoUtils
.
parsJSFunction
(
signs
);
String
sign
=
TaoBaoUtils
.
parsJSFunction
(
signs
);
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="
+
time
+
"&sign="
+
sign
+
"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
//String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="+time+"&sign="+sign+"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
String
url
=
"https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624929605260&sign=ada01d783dc9772d2f84124d293bac26&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"cookie"
,
"_m_h5_tk=975fb07b671f12a689d4ec36cf2e9047_1624937028814; _m_h5_tk_enc=ffb83d60b283eee5992d5e32429c2597;"
);
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
Request
request
=
RequestUtils
.
wrapGet
(
url
,
headerMap
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
htmlBody
=
response
.
body
().
string
();
htmlBody
=
response
.
body
().
string
();
System
.
out
.
println
(
htmlBody
);
ht
=
!
htmlBody
.
contains
(
"非法请求"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
e
);
log
.
error
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
,
e
);
}
}
...
@@ -49,6 +67,7 @@ public class TaoBaoHotSearchCrawlerTest {
...
@@ -49,6 +67,7 @@ public class TaoBaoHotSearchCrawlerTest {
}
else
{
}
else
{
log
.
info
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
);
log
.
info
(
"解析淘宝热搜时出现解析错误,页面结构有问题"
);
}
}
}
return
Collections
.
emptyList
();
return
Collections
.
emptyList
();
}
}
...
@@ -68,18 +87,20 @@ public class TaoBaoHotSearchCrawlerTest {
...
@@ -68,18 +87,20 @@ public class TaoBaoHotSearchCrawlerTest {
String
searchtext
=
jsonObject
.
getString
(
"searchtext"
);
String
searchtext
=
jsonObject
.
getString
(
"searchtext"
);
String
url
=
"https://s.m.taobao.com/h5?q="
+
searchtext
;
String
url
=
"https://s.m.taobao.com/h5?q="
+
searchtext
;
String
tagText
=
jsonObject
.
getString
(
"tagText"
);
String
tagText
=
jsonObject
.
getString
(
"tagText"
);
Long
count
=
0L
;
if
(
""
.
equals
(
tagText
))
{
HotSearchList
hotSearchList
=
new
HotSearchList
(
url
,
name
,
count
,
true
,
rank
,
HotSearchType
.
淘宝热搜
.
name
(),
tagText
,
date
);
tagText
=
null
;
}
Long
count
=
null
;
HotSearchList
hotSearchList
=
new
HotSearchList
(
url
,
name
,
count
,
true
,
rank
,
HotSearchType
.
淘宝热搜
.
name
(),
tagText
,
date
);
list
.
add
(
hotSearchList
);
list
.
add
(
hotSearchList
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误"
,
e
);
log
.
error
(
"解析淘宝热搜时出现解析错误"
,
e
);
}
}
}
}
System
.
out
.
println
(
jsonArray
.
size
());
System
.
out
.
println
(
jsonArray
.
size
());
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"解析淘宝热搜时出现解析错误,数据不是json结构"
,
e
);
log
.
error
(
"解析淘宝热搜时出现解析错误,数据不是json结构"
,
e
);
}
}
return
list
;
return
list
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoHotSearchRun.java
0 → 100644
View file @
f60e48bb
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.crawler.TaoBaoHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
lombok.extern.log4j.Log4j2
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
@Log4j2
public
class
TaoBaoHotSearchRun
extends
Thread
{
@Override
public
void
run
()
{
boolean
f
=
true
;
while
(
f
)
{
try
{
getHotList
();
TimeUnit
.
MINUTES
.
sleep
(
1
);
}
catch
(
Exception
e
)
{
e
.
fillInStackTrace
();
ZhiWeiTools
.
sleep
(
60
*
60
*
1000
);
}
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
log
.
info
(
"淘宝热榜采集开始........"
);
List
<
HotSearchList
>
taoBaoList
=
TaoBaoHotSearchCrawler
.
taoBaoHotSearch
(
new
Date
());
log
.
info
(
"{}, 此轮淘宝热榜采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
taoBaoList
!=
null
?
taoBaoList
.
size
()
:
0
));
TipsUtils
.
addHotList
(
"快手热榜"
,
taoBaoList
);
log
.
info
(
"淘宝热榜采集结束........"
);
}
}
\ No newline at end of file
src/main/java/com/zhiwei/searchhotcrawler/test/TaoBaoRunTest.java
0 → 100644
View file @
f60e48bb
package
com
.
zhiwei
.
searchhotcrawler
.
test
;
import
com.zhiwei.crawler.core.proxy.ProxyFactory
;
import
com.zhiwei.proxy.config.SimpleConfig
;
import
com.zhiwei.searchhotcrawler.config.ProxyConfig
;
import
com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun
;
import
java.text.ParseException
;
public
class
TaoBaoRunTest
{
public
static
void
main
(
String
[]
args
)
throws
ParseException
{
SimpleConfig
simpleConfig
=
SimpleConfig
.
builder
().
registry
(
ProxyConfig
.
registry
)
.
group
(
ProxyConfig
.
group
).
appId
(
10000013
).
appName
(
"hotsearch"
).
build
();
ProxyFactory
.
init
(
simpleConfig
);
//微博热搜开始采集
// new WeiboHotSearchRun().start();
//快手热榜开始采集
// new KuaiShouHotSearchRun().start();
//百度热搜
// new TaoBaoHotSearchRun().run();
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
f60e48bb
...
@@ -520,5 +520,17 @@ public class GatherTimer {
...
@@ -520,5 +520,17 @@ public class GatherTimer {
TipsUtils
.
addHotList
(
HotSearchType
.
快手热榜
.
name
(),
kuaiShouList
);
TipsUtils
.
addHotList
(
HotSearchType
.
快手热榜
.
name
(),
kuaiShouList
);
logger
.
info
(
"快手热榜采集结束..."
);
logger
.
info
(
"快手热榜采集结束..."
);
}
}
/**
*淘宝热搜采集
*/
@Async
(
value
=
"myScheduler"
)
@Scheduled
(
cron
=
"0 * * * * ? "
)
public
void
crawlerTaoBao
(){
logger
.
info
(
"淘宝热搜开始采集..."
);
Date
date
=
DateUtils
.
getMillSecondTime
(
new
Date
());
List
<
HotSearchList
>
taoBaoList
=
TaoBaoHotSearchCrawler
.
taoBaoHotSearch
(
date
);
logger
.
info
(
"{}, 淘宝热搜此轮采集到的数据量为:{}"
,
new
Date
(),
taoBaoList
!=
null
?
taoBaoList
.
size
()
:
0
);
TipsUtils
.
addHotList
(
HotSearchType
.
淘宝热搜
.
name
(),
taoBaoList
);
logger
.
info
(
"淘宝热搜采集结束..."
);
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/util/TaoBaoUtils.java
View file @
f60e48bb
...
@@ -8,12 +8,13 @@ import javax.script.Invocable;
...
@@ -8,12 +8,13 @@ import javax.script.Invocable;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngine
;
import
javax.script.ScriptEngineManager
;
import
javax.script.ScriptEngineManager
;
import
java.io.FileReader
;
import
java.io.FileReader
;
@Log4j2
@Log4j2
public
class
TaoBaoUtils
{
public
class
TaoBaoUtils
{
public
static
String
parsJSFunction
(
String
sign
)
{
public
static
String
parsJSFunction
(
String
sign
)
{
//脚本的执行结果
String
scriptResult
=
""
;
//脚本的执行结果
String
scriptResult
=
""
;
ScriptEngine
engine
=
new
ScriptEngineManager
().
getEngineByName
(
"JavaScript"
);
//1.得到脚本引擎
ScriptEngine
engine
=
new
ScriptEngineManager
().
getEngineByName
(
"JavaScript"
);
//1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎
try
{
try
{
...
@@ -29,8 +30,8 @@ public class TaoBaoUtils {
...
@@ -29,8 +30,8 @@ public class TaoBaoUtils {
//4.使用 invocable.invokeFunction掉用js脚本里的方法,第一個参数为方法名,后面的参数为被调用的js方法的入参
//4.使用 invocable.invokeFunction掉用js脚本里的方法,第一個参数为方法名,后面的参数为被调用的js方法的入参
scriptResult
=
(
String
)
invocable
.
invokeFunction
(
"h"
,
sign
);
scriptResult
=
(
String
)
invocable
.
invokeFunction
(
"h"
,
sign
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"Error executing script: "
,
e
.
getMessage
());
log
.
error
(
"Error executing script: "
,
e
.
getMessage
());
}
}
return
scriptResult
;
return
scriptResult
;
}
}
...
...
src/main/resources/taobao.js
0 → 100644
View file @
f60e48bb
function
h
(
a
)
{
function
b
(
a
,
b
)
{
return
a
<<
b
|
a
>>>
32
-
b
}
function
c
(
a
,
b
)
{
var
c
,
d
,
e
,
f
,
g
;
return
e
=
2147483648
&
a
,
f
=
2147483648
&
b
,
c
=
1073741824
&
a
,
d
=
1073741824
&
b
,
g
=
(
1073741823
&
a
)
+
(
1073741823
&
b
),
c
&
d
?
2147483648
^
g
^
e
^
f
:
c
|
d
?
1073741824
&
g
?
3221225472
^
g
^
e
^
f
:
1073741824
^
g
^
e
^
f
:
g
^
e
^
f
}
function
d
(
a
,
b
,
c
)
{
return
a
&
b
|
~
a
&
c
}
function
e
(
a
,
b
,
c
)
{
return
a
&
c
|
b
&
~
c
}
function
f
(
a
,
b
,
c
)
{
return
a
^
b
^
c
}
function
g
(
a
,
b
,
c
)
{
return
b
^
(
a
|
~
c
)
}
function
h
(
a
,
e
,
f
,
g
,
h
,
i
,
j
)
{
return
a
=
c
(
a
,
c
(
c
(
d
(
e
,
f
,
g
),
h
),
j
)),
c
(
b
(
a
,
i
),
e
)
}
function
i
(
a
,
d
,
f
,
g
,
h
,
i
,
j
)
{
return
a
=
c
(
a
,
c
(
c
(
e
(
d
,
f
,
g
),
h
),
j
)),
c
(
b
(
a
,
i
),
d
)
}
function
j
(
a
,
d
,
e
,
g
,
h
,
i
,
j
)
{
return
a
=
c
(
a
,
c
(
c
(
f
(
d
,
e
,
g
),
h
),
j
)),
c
(
b
(
a
,
i
),
d
)
}
function
k
(
a
,
d
,
e
,
f
,
h
,
i
,
j
)
{
return
a
=
c
(
a
,
c
(
c
(
g
(
d
,
e
,
f
),
h
),
j
)),
c
(
b
(
a
,
i
),
d
)
}
function
l
(
a
)
{
for
(
var
b
,
c
=
a
.
length
,
d
=
c
+
8
,
e
=
(
d
-
d
%
64
)
/
64
,
f
=
16
*
(
e
+
1
),
g
=
new
Array
(
f
-
1
),
h
=
0
,
i
=
0
;
c
>
i
;)
b
=
(
i
-
i
%
4
)
/
4
,
h
=
i
%
4
*
8
,
g
[
b
]
=
g
[
b
]
|
a
.
charCodeAt
(
i
)
<<
h
,
i
++
;
return
b
=
(
i
-
i
%
4
)
/
4
,
h
=
i
%
4
*
8
,
g
[
b
]
=
g
[
b
]
|
128
<<
h
,
g
[
f
-
2
]
=
c
<<
3
,
g
[
f
-
1
]
=
c
>>>
29
,
g
}
function
m
(
a
)
{
var
b
,
c
,
d
=
""
,
e
=
""
;
for
(
c
=
0
;
3
>=
c
;
c
++
)
b
=
a
>>>
8
*
c
&
255
,
e
=
"0"
+
b
.
toString
(
16
),
d
+=
e
.
substr
(
e
.
length
-
2
,
2
);
return
d
}
function
n
(
a
)
{
a
=
a
.
replace
(
/
\r\n
/g
,
"
\
n"
);
for
(
var
b
=
""
,
c
=
0
;
c
<
a
.
length
;
c
++
)
{
var
d
=
a
.
charCodeAt
(
c
);
128
>
d
?
b
+=
String
.
fromCharCode
(
d
)
:
d
>
127
&&
2048
>
d
?
(
b
+=
String
.
fromCharCode
(
d
>>
6
|
192
),
b
+=
String
.
fromCharCode
(
63
&
d
|
128
))
:
(
b
+=
String
.
fromCharCode
(
d
>>
12
|
224
),
b
+=
String
.
fromCharCode
(
d
>>
6
&
63
|
128
),
b
+=
String
.
fromCharCode
(
63
&
d
|
128
))
}
return
b
}
var
o
,
p
,
q
,
r
,
s
,
t
,
u
,
v
,
w
,
x
=
[],
y
=
7
,
z
=
12
,
A
=
17
,
B
=
22
,
C
=
5
,
D
=
9
,
E
=
14
,
F
=
20
,
G
=
4
,
H
=
11
,
I
=
16
,
J
=
23
,
K
=
6
,
L
=
10
,
M
=
15
,
N
=
21
;
for
(
a
=
n
(
a
),
x
=
l
(
a
),
t
=
1732584193
,
u
=
4023233417
,
v
=
2562383102
,
w
=
271733878
,
o
=
0
;
o
<
x
.
length
;
o
+=
16
)
p
=
t
,
q
=
u
,
r
=
v
,
s
=
w
,
t
=
h
(
t
,
u
,
v
,
w
,
x
[
o
+
0
],
y
,
3614090360
),
w
=
h
(
w
,
t
,
u
,
v
,
x
[
o
+
1
],
z
,
3905402710
),
v
=
h
(
v
,
w
,
t
,
u
,
x
[
o
+
2
],
A
,
606105819
),
u
=
h
(
u
,
v
,
w
,
t
,
x
[
o
+
3
],
B
,
3250441966
),
t
=
h
(
t
,
u
,
v
,
w
,
x
[
o
+
4
],
y
,
4118548399
),
w
=
h
(
w
,
t
,
u
,
v
,
x
[
o
+
5
],
z
,
1200080426
),
v
=
h
(
v
,
w
,
t
,
u
,
x
[
o
+
6
],
A
,
2821735955
),
u
=
h
(
u
,
v
,
w
,
t
,
x
[
o
+
7
],
B
,
4249261313
),
t
=
h
(
t
,
u
,
v
,
w
,
x
[
o
+
8
],
y
,
1770035416
),
w
=
h
(
w
,
t
,
u
,
v
,
x
[
o
+
9
],
z
,
2336552879
),
v
=
h
(
v
,
w
,
t
,
u
,
x
[
o
+
10
],
A
,
4294925233
),
u
=
h
(
u
,
v
,
w
,
t
,
x
[
o
+
11
],
B
,
2304563134
),
t
=
h
(
t
,
u
,
v
,
w
,
x
[
o
+
12
],
y
,
1804603682
),
w
=
h
(
w
,
t
,
u
,
v
,
x
[
o
+
13
],
z
,
4254626195
),
v
=
h
(
v
,
w
,
t
,
u
,
x
[
o
+
14
],
A
,
2792965006
),
u
=
h
(
u
,
v
,
w
,
t
,
x
[
o
+
15
],
B
,
1236535329
),
t
=
i
(
t
,
u
,
v
,
w
,
x
[
o
+
1
],
C
,
4129170786
),
w
=
i
(
w
,
t
,
u
,
v
,
x
[
o
+
6
],
D
,
3225465664
),
v
=
i
(
v
,
w
,
t
,
u
,
x
[
o
+
11
],
E
,
643717713
),
u
=
i
(
u
,
v
,
w
,
t
,
x
[
o
+
0
],
F
,
3921069994
),
t
=
i
(
t
,
u
,
v
,
w
,
x
[
o
+
5
],
C
,
3593408605
),
w
=
i
(
w
,
t
,
u
,
v
,
x
[
o
+
10
],
D
,
38016083
),
v
=
i
(
v
,
w
,
t
,
u
,
x
[
o
+
15
],
E
,
3634488961
),
u
=
i
(
u
,
v
,
w
,
t
,
x
[
o
+
4
],
F
,
3889429448
),
t
=
i
(
t
,
u
,
v
,
w
,
x
[
o
+
9
],
C
,
568446438
),
w
=
i
(
w
,
t
,
u
,
v
,
x
[
o
+
14
],
D
,
3275163606
),
v
=
i
(
v
,
w
,
t
,
u
,
x
[
o
+
3
],
E
,
4107603335
),
u
=
i
(
u
,
v
,
w
,
t
,
x
[
o
+
8
],
F
,
1163531501
),
t
=
i
(
t
,
u
,
v
,
w
,
x
[
o
+
13
],
C
,
2850285829
),
w
=
i
(
w
,
t
,
u
,
v
,
x
[
o
+
2
],
D
,
4243563512
),
v
=
i
(
v
,
w
,
t
,
u
,
x
[
o
+
7
],
E
,
1735328473
),
u
=
i
(
u
,
v
,
w
,
t
,
x
[
o
+
12
],
F
,
2368359562
),
t
=
j
(
t
,
u
,
v
,
w
,
x
[
o
+
5
],
G
,
4294588738
),
w
=
j
(
w
,
t
,
u
,
v
,
x
[
o
+
8
],
H
,
2272392833
),
v
=
j
(
v
,
w
,
t
,
u
,
x
[
o
+
11
],
I
,
1839030562
),
u
=
j
(
u
,
v
,
w
,
t
,
x
[
o
+
14
],
J
,
4259657740
),
t
=
j
(
t
,
u
,
v
,
w
,
x
[
o
+
1
],
G
,
2763975236
),
w
=
j
(
w
,
t
,
u
,
v
,
x
[
o
+
4
],
H
,
1272893353
),
v
=
j
(
v
,
w
,
t
,
u
,
x
[
o
+
7
],
I
,
4139469664
),
u
=
j
(
u
,
v
,
w
,
t
,
x
[
o
+
10
],
J
,
3200236656
),
t
=
j
(
t
,
u
,
v
,
w
,
x
[
o
+
13
],
G
,
681279174
),
w
=
j
(
w
,
t
,
u
,
v
,
x
[
o
+
0
],
H
,
3936430074
),
v
=
j
(
v
,
w
,
t
,
u
,
x
[
o
+
3
],
I
,
3572445317
),
u
=
j
(
u
,
v
,
w
,
t
,
x
[
o
+
6
],
J
,
76029189
),
t
=
j
(
t
,
u
,
v
,
w
,
x
[
o
+
9
],
G
,
3654602809
),
w
=
j
(
w
,
t
,
u
,
v
,
x
[
o
+
12
],
H
,
3873151461
),
v
=
j
(
v
,
w
,
t
,
u
,
x
[
o
+
15
],
I
,
530742520
),
u
=
j
(
u
,
v
,
w
,
t
,
x
[
o
+
2
],
J
,
3299628645
),
t
=
k
(
t
,
u
,
v
,
w
,
x
[
o
+
0
],
K
,
4096336452
),
w
=
k
(
w
,
t
,
u
,
v
,
x
[
o
+
7
],
L
,
1126891415
),
v
=
k
(
v
,
w
,
t
,
u
,
x
[
o
+
14
],
M
,
2878612391
),
u
=
k
(
u
,
v
,
w
,
t
,
x
[
o
+
5
],
N
,
4237533241
),
t
=
k
(
t
,
u
,
v
,
w
,
x
[
o
+
12
],
K
,
1700485571
),
w
=
k
(
w
,
t
,
u
,
v
,
x
[
o
+
3
],
L
,
2399980690
),
v
=
k
(
v
,
w
,
t
,
u
,
x
[
o
+
10
],
M
,
4293915773
),
u
=
k
(
u
,
v
,
w
,
t
,
x
[
o
+
1
],
N
,
2240044497
),
t
=
k
(
t
,
u
,
v
,
w
,
x
[
o
+
8
],
K
,
1873313359
),
w
=
k
(
w
,
t
,
u
,
v
,
x
[
o
+
15
],
L
,
4264355552
),
v
=
k
(
v
,
w
,
t
,
u
,
x
[
o
+
6
],
M
,
2734768916
),
u
=
k
(
u
,
v
,
w
,
t
,
x
[
o
+
13
],
N
,
1309151649
),
t
=
k
(
t
,
u
,
v
,
w
,
x
[
o
+
4
],
K
,
4149444226
),
w
=
k
(
w
,
t
,
u
,
v
,
x
[
o
+
11
],
L
,
3174756917
),
v
=
k
(
v
,
w
,
t
,
u
,
x
[
o
+
2
],
M
,
718787259
),
u
=
k
(
u
,
v
,
w
,
t
,
x
[
o
+
9
],
N
,
3951481745
),
t
=
c
(
t
,
p
),
u
=
c
(
u
,
q
),
v
=
c
(
v
,
r
),
w
=
c
(
w
,
s
);
var
O
=
m
(
t
)
+
m
(
u
)
+
m
(
v
)
+
m
(
w
);
return
O
.
toLowerCase
()
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment