Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
6941070e
Commit
6941070e
authored
Jul 20, 2020
by
马黎滨
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
今日头条采集修改
parent
88b59f64
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
10 deletions
+24
-10
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
+24
-10
No files found.
src/main/java/com/zhiwei/searchhotcrawler/crawler/ToutiaoHotSearchCrawler.java
View file @
6941070e
...
...
@@ -3,12 +3,15 @@ package com.zhiwei.searchhotcrawler.crawler;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchList
;
import
com.zhiwei.searchhotcrawler.bean.HotSearchType
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
lombok.extern.log4j.Log4j2
;
import
org.apache.commons.lang3.StringUtils
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
java.io.IOException
;
import
java.util.*
;
...
...
@@ -32,9 +35,20 @@ public class ToutiaoHotSearchCrawler {
* @return void 返回类型
*/
public
static
List
<
HotSearchList
>
toutiaoHotSearchByPhone
(){
String
origin
=
"hot_board"
;
String
jsUrl
=
"https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js"
;
try
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
jsUrl
)).
body
().
string
();
if
(
htmlBody
.
contains
(
"origin"
)){
String
s
=
htmlBody
.
substring
(
htmlBody
.
indexOf
(
"origin:"
)+
"origin:"
.
length
());
origin
=
s
.
substring
(
1
,
s
.
indexOf
(
"}"
)-
1
);
}
}
catch
(
IOException
e
)
{
log
.
error
(
"获取今日头条实时热搜头部信息标识失败"
,
e
);
}
for
(
int
count
=
0
;
count
<=
5
;
count
++){
String
url
=
"https://i
b.snssdk.com/api/suggest_words/?business_id=10017"
;
String
url
=
"https://i
.snssdk.com/hot-event/hot-board/?origin="
+
origin
;
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"
);
headerMap
.
put
(
"referer"
,
"https://ib.snssdk.com/rogue/aladdin_landingpage/template/aladdin_landingpage/hot_words.html?isBrowser=true&traffic_source="
);
...
...
@@ -42,17 +56,17 @@ public class ToutiaoHotSearchCrawler {
try
{
List
<
HotSearchList
>
result
=
new
ArrayList
<
HotSearchList
>();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"
words
"
)){
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"
data
"
)){
try
{
JSONArray
words
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"data"
)
.
getJSONObject
(
0
).
getJSONArray
(
"words"
)
;
JSONArray
words
=
JSONObject
.
parseObject
(
htmlBody
).
getJSONArray
(
"data"
);
int
rank
=
1
;
for
(
int
i
=
0
;
i
<
words
.
size
();
i
++){
try
{
JSONObject
word
=
words
.
getJSONObject
(
i
);
String
name
=
word
.
getString
(
"
word
"
);
String
name
=
word
.
getString
(
"
Title
"
);
String
link
=
"https://ib.snssdk.com/search/?keyword="
+
URLCodeUtil
.
getURLEncode
(
name
,
"utf-8"
)
+
"&pd=synthesis&source=trending_list&traffic_source="
;
Integer
hotCount
=
word
.
get
JSONObject
(
"params"
).
getInteger
(
"fake_click_cnt
"
);
Integer
wordsType
=
word
.
getInteger
(
"words_type
"
);
Integer
hotCount
=
word
.
get
Integer
(
"HotValue
"
);
String
wordsType
=
word
.
getString
(
"Label
"
);
String
icon
=
getIcon
(
wordsType
);
HotSearchList
hotSearch
=
new
HotSearchList
(
link
,
name
,
hotCount
,
true
,
rank
,
HotSearchType
.
今日头条热搜
.
name
(),
icon
);
...
...
@@ -83,17 +97,17 @@ public class ToutiaoHotSearchCrawler {
* @param wordsType
* @return
*/
private
static
String
getIcon
(
Integer
wordsType
){
private
static
String
getIcon
(
String
wordsType
){
String
icon
=
"无"
;
if
(
Objects
.
nonNull
(
wordsType
)){
switch
(
wordsType
){
case
1
:
case
"new"
:
icon
=
"新"
;
break
;
case
2
:
case
"hot"
:
icon
=
"热"
;
break
;
case
3
:
case
"explode"
:
icon
=
"爆"
;
break
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment