Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
c56488f1
Commit
c56488f1
authored
Oct 22, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
脉脉新增content字段,存真实文本内容 See merge request
!142
parents
2dabf311
3c0900f2
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
0 deletions
+17
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
+5
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
+6
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+6
-0
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchList.java
View file @
c56488f1
...
@@ -118,6 +118,11 @@ public class HotSearchList implements Serializable{
...
@@ -118,6 +118,11 @@ public class HotSearchList implements Serializable{
*/
*/
private
String
downtext
;
private
String
downtext
;
/**
* 内容
*/
private
String
content
;
public
HotSearchList
(){}
public
HotSearchList
(){}
public
HotSearchList
(
String
url
,
String
name
,
Long
count
,
Boolean
hot
,
Integer
rank
,
String
type
,
String
icon
,
Date
date
){
public
HotSearchList
(
String
url
,
String
name
,
Long
count
,
Boolean
hot
,
Integer
rank
,
String
type
,
String
icon
,
Date
date
){
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/MaiMaiHotSearchCrawler.java
View file @
c56488f1
...
@@ -48,6 +48,8 @@ public class MaiMaiHotSearchCrawler {
...
@@ -48,6 +48,8 @@ public class MaiMaiHotSearchCrawler {
JSONObject
jsonObject
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"style35"
);
JSONObject
jsonObject
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"style35"
);
if
(
jsonObject
!=
null
)
{
if
(
jsonObject
!=
null
)
{
String
name
=
jsonObject
.
getString
(
"text"
);
String
name
=
jsonObject
.
getString
(
"text"
);
//真实文本长度
String
content
=
name
;
// 1024 - 26(时间戳+type)
// 1024 - 26(时间戳+type)
// name.getBytes(StandardCharsets.UTF_8).length<998)
// name.getBytes(StandardCharsets.UTF_8).length<998)
if
(
name
.
length
()>
nameLengthMax
){
if
(
name
.
length
()>
nameLengthMax
){
...
@@ -61,6 +63,10 @@ public class MaiMaiHotSearchCrawler {
...
@@ -61,6 +63,10 @@ public class MaiMaiHotSearchCrawler {
String
hotValue
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"common"
).
getString
(
"hot_info"
);
String
hotValue
=
jsonArray
.
getJSONObject
(
i
).
getJSONObject
(
"common"
).
getString
(
"hot_info"
);
Long
count
=
hotValue
.
length
()
>
0
?
TipsUtils
.
getHotCount
(
hotValue
)
:
0
;
Long
count
=
hotValue
.
length
()
>
0
?
TipsUtils
.
getHotCount
(
hotValue
)
:
0
;
HotSearchList
hotSearchList
=
new
HotSearchList
(
maimaiUrl
,
name
,
count
,
null
,
rank
,
HotSearchType
.
脉脉热榜
.
name
(),
icon
,
date
);
HotSearchList
hotSearchList
=
new
HotSearchList
(
maimaiUrl
,
name
,
count
,
null
,
rank
,
HotSearchType
.
脉脉热榜
.
name
(),
icon
,
date
);
//判断一下真实文本长度,如果大于332字符长度,则存储
if
(
content
.
length
()>
nameLengthMax
){
hotSearchList
.
setContent
(
content
);
}
list
.
add
(
hotSearchList
);
list
.
add
(
hotSearchList
);
}
}
}
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
View file @
c56488f1
...
@@ -52,6 +52,9 @@ public class HotSearchCacheDAO {
...
@@ -52,6 +52,9 @@ public class HotSearchCacheDAO {
document
.
put
(
"topic_lead"
,
hotSearch
.
getTopicLead
());
document
.
put
(
"topic_lead"
,
hotSearch
.
getTopicLead
());
document
.
put
(
"comment_count"
,
hotSearch
.
getCommentCount
());
document
.
put
(
"comment_count"
,
hotSearch
.
getCommentCount
());
}
}
if
(
"脉脉热榜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
put
(
"content"
,
hotSearch
.
getContent
());
}
if
(
"微博热搜"
.
equals
(
hotSearch
.
getType
()))
{
if
(
"微博热搜"
.
equals
(
hotSearch
.
getType
()))
{
document
.
put
(
"iconUrl"
,
hotSearch
.
getIconUrl
());
document
.
put
(
"iconUrl"
,
hotSearch
.
getIconUrl
());
document
.
put
(
"heatLabel"
,
hotSearch
.
getHeatLabel
());
document
.
put
(
"heatLabel"
,
hotSearch
.
getHeatLabel
());
...
@@ -241,6 +244,9 @@ public class HotSearchCacheDAO {
...
@@ -241,6 +244,9 @@ public class HotSearchCacheDAO {
// if(readCount != null){
// if(readCount != null){
// nowDoc.put("readCount",readCount);
// nowDoc.put("readCount",readCount);
// }
// }
if
(
"脉脉热榜"
.
equals
(
type
))
{
nowDoc
.
put
(
"content"
,
document
.
getString
(
"content"
));
}
if
(
"虎嗅热文推荐"
.
equals
(
type
))
{
if
(
"虎嗅热文推荐"
.
equals
(
type
))
{
nowDoc
.
put
(
"comment_count"
,
document
.
getLong
(
"comment_count"
));
nowDoc
.
put
(
"comment_count"
,
document
.
getLong
(
"comment_count"
));
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment