Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
139ff5af
Commit
139ff5af
authored
Sep 23, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
更新讨论量及关于链接 See merge request
!134
parents
15c0ee7f
982502f7
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
12 deletions
+19
-12
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+19
-12
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
139ff5af
...
@@ -125,14 +125,13 @@ public class WeiboHotSearchCrawler {
...
@@ -125,14 +125,13 @@ public class WeiboHotSearchCrawler {
// }
// }
//
/**
/**
//
* @return void 返回类型
* @return void 返回类型
//
* @Title: weiboHotSearchByPhoneTest
* @Title: weiboHotSearchByPhoneTest
//
* @author hero
* @author hero
//
* @Description: TODO(手机端Iphone 微博热搜采集)
* @Description: TODO(手机端Iphone 微博热搜采集)
//
*/
*/
// public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
// public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
// //String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
// String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
// String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
// Map<String, String> headerMap = new HashMap<>();
// Map<String, String> headerMap = new HashMap<>();
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
...
@@ -229,7 +228,6 @@ public class WeiboHotSearchCrawler {
...
@@ -229,7 +228,6 @@ public class WeiboHotSearchCrawler {
*/
*/
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(
Date
date
)
{
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(
Date
date
)
{
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
//String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
String
htmlBody
=
null
;
String
htmlBody
=
null
;
...
@@ -289,7 +287,7 @@ public class WeiboHotSearchCrawler {
...
@@ -289,7 +287,7 @@ public class WeiboHotSearchCrawler {
if
(
Objects
.
nonNull
(
iconUrl
)){
hotSearch
.
setIconUrl
(
iconUrl
);}
if
(
Objects
.
nonNull
(
iconUrl
)){
hotSearch
.
setIconUrl
(
iconUrl
);}
result
.
add
(
hotSearch
);
result
.
add
(
hotSearch
);
rank
++;
rank
++;
//
redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜");
redisDao
.
addDataToSet
(
RedisConfig
.
WEIBO_HOTSEARCHIDS
,
name
+
"_微博热搜"
);
}
}
}
else
{
}
else
{
log
.
info
(
"card 数据结构为:{}"
,
card
);
log
.
info
(
"card 数据结构为:{}"
,
card
);
...
@@ -363,8 +361,17 @@ public class WeiboHotSearchCrawler {
...
@@ -363,8 +361,17 @@ public class WeiboHotSearchCrawler {
*/
*/
public
static
Document
weiboUpdate
(
Document
document
)
{
public
static
Document
weiboUpdate
(
Document
document
)
{
log
.
info
(
"更新微博热搜{}导语阅读量和讨论量"
,
document
.
getString
(
"name"
));
log
.
info
(
"更新微博热搜{}导语阅读量和讨论量"
,
document
.
getString
(
"name"
));
String
url
=
"https://m.weibo.cn/api/container/getIndex?"
+
document
.
getString
(
"url"
).
substring
(
// String url = "https://m.weibo.cn/api/container/getIndex?" + document.getString("url").substring(
document
.
getString
(
"url"
).
indexOf
(
"?"
)
+
1
,
document
.
getString
(
"url"
).
indexOf
(
"&"
));
// document.getString("url").indexOf("?") + 1, document.getString("url").indexOf("&"));
String
topic
=
document
.
getString
(
"name"
);
String
gb
=
"=1&q=#"
+
topic
+
"#"
;
String
encode
=
null
;
try
{
encode
=
URLEncoder
.
encode
(
gb
,
"utf-8"
);
}
catch
(
UnsupportedEncodingException
e
)
{
log
.
error
(
"更新导语时字符解析成URl模式异常"
,
e
);
}
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=100103type"
+
encode
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
for
(
int
count
=
0
;
count
<=
5
;
count
++)
{
...
@@ -458,7 +465,7 @@ public class WeiboHotSearchCrawler {
...
@@ -458,7 +465,7 @@ public class WeiboHotSearchCrawler {
}
catch
(
UnsupportedEncodingException
e
)
{
}
catch
(
UnsupportedEncodingException
e
)
{
log
.
error
(
"字符解析成URl模式异常"
,
e
);
log
.
error
(
"字符解析成URl模式异常"
,
e
);
}
}
String
url
=
"https://s.weibo.com/weibo?q="
+
encode
;
String
url
=
"https://s.weibo.com/weibo?q="
+
encode
+
"&Refer=top"
;
String
htmlBody
=
null
;
String
htmlBody
=
null
;
Request
request
=
RequestUtils
.
wrapGet
(
url
);
Request
request
=
RequestUtils
.
wrapGet
(
url
);
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
try
(
Response
response
=
httpBoot
.
syncCall
(
request
,
ProxyHolder
.
NAT_HEAVY_PROXY
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment