Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
26e162d1
Commit
26e162d1
authored
Sep 25, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微博热搜修改为m端,增加实时上升榜
parent
03050fa3
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
17 deletions
+21
-17
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+19
-15
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
+2
-2
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
26e162d1
...
@@ -2,10 +2,12 @@ package com.zhiwei.searchhotcrawler.crawler;
...
@@ -2,10 +2,12 @@ package com.zhiwei.searchhotcrawler.crawler;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.nodes.Element
;
...
@@ -106,55 +108,57 @@ public class WeiboHotSearchCrawler {
...
@@ -106,55 +108,57 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型
* @return void 返回类型
*/
*/
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
public
static
List
<
HotSearchList
>
weiboHotSearchByPhone
(){
String
url
=
""
;
String
url
=
"https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"Host"
,
"mapi.weibo.com"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
);
headerMap
.
put
(
"User-Agent"
,
"Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)"
);
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
String
htmlBody
;
String
htmlBody
;
try
{
try
{
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
if
(
htmlBody
!=
null
){
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"cards"
)
){
try
{
try
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
)
.
getJSONObject
(
"data"
)
;
JSONArray
cards
=
json
.
getJSONArray
(
"cards"
);
JSONArray
cards
=
json
.
getJSONArray
(
"cards"
);
int
rank
=
1
;
for
(
int
i
=
0
;
i
<
cards
.
size
();
i
++){
for
(
int
i
=
0
;
i
<
cards
.
size
();
i
++){
try
{
try
{
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONObject
card
=
cards
.
getJSONObject
(
i
);
JSONArray
card
_g
roup
=
card
.
getJSONArray
(
"card_group"
);
JSONArray
card
G
roup
=
card
.
getJSONArray
(
"card_group"
);
String
title
=
card
.
getString
(
"title"
);
String
title
=
card
.
getString
(
"title"
);
boolean
hot
=
true
;
boolean
hot
=
true
;
if
(
title
.
contains
(
"实时上升热点"
)){
if
(
title
.
contains
(
"实时上升热点"
)){
hot
=
false
;
hot
=
false
;
rank
=
50
;
}
}
for
(
int
j
=
0
;
j
<
card
_g
roup
.
size
();
j
++){
for
(
int
j
=
0
;
j
<
card
G
roup
.
size
();
j
++){
JSONObject
cardInfo
=
card
_g
roup
.
getJSONObject
(
j
);
JSONObject
cardInfo
=
card
G
roup
.
getJSONObject
(
j
);
String
name
=
cardInfo
.
getString
(
"desc"
);
String
name
=
cardInfo
.
getString
(
"desc"
);
int
hotCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
int
hotCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
int
rankCount
=
cardInfo
.
getIntValue
(
"desc_extr"
);
String
id
=
"http://s.weibo.com/weibo/"
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&Refer=top"
;
String
id
=
"http://s.weibo.com/weibo/"
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&Refer=top"
;
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
hot
,
rank
,
HotSearchType
.
微博热搜
.
name
());
HotSearchList
hotSearch
=
new
HotSearchList
(
id
,
name
,
hotCount
,
hot
,
rankCount
,
HotSearchType
.
微博热搜
.
name
());
logger
.
info
(
"采集到的数据:::{}"
,
hotSearch
);
logger
.
info
(
"采集到的数据:::{}"
,
hotSearch
);
result
.
add
(
hotSearch
);
result
.
add
(
hotSearch
);
rank
++;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现解析错误"
,
e
.
fillInStackTrace
());
continue
;
continue
;
}
}
}
}
return
result
;
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现解析错误,数据不是json结构"
,
e
.
fillInStackTrace
());
return
null
;
return
Collections
.
emptyList
()
;
}
}
}
else
{
}
else
{
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
logger
.
info
(
"解析微博时时热搜时出现解析错误,页面结构有问题"
);
}
}
}
catch
(
IOException
e1
)
{
}
catch
(
IOException
e1
)
{
logger
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
.
fillInStackTrace
());
logger
.
error
(
"解析微博时时热搜时出现连接失败"
,
e1
.
fillInStackTrace
());
return
Collections
.
emptyList
();
}
}
return
result
;
return
Collections
.
emptyList
()
;
}
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/timer/WeiboHotSearchRun.java
View file @
26e162d1
...
@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
...
@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
import
com.mongodb.DBObject
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
@@ -38,7 +37,8 @@ public class WeiboHotSearchRun extends Thread{
...
@@ -38,7 +37,8 @@ public class WeiboHotSearchRun extends Thread{
private
void
getHotList
()
{
private
void
getHotList
()
{
logger
.
info
(
"微博话题采集开始........"
);
logger
.
info
(
"微博话题采集开始........"
);
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearch
();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List
<
HotSearchList
>
list
=
WeiboHotSearchCrawler
.
weiboHotSearchByPhone
();
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
logger
.
info
(
"{}, 微博此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
List
<
DBObject
>
data
=
new
ArrayList
<>();
List
<
DBObject
>
data
=
new
ArrayList
<>();
for
(
HotSearchList
weiboHotSearch
:
list
){
for
(
HotSearchList
weiboHotSearch
:
list
){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment