Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
7d05be2a
Commit
7d05be2a
authored
Nov 15, 2021
by
leiliangliang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
更新微博搜索框采集程序类名
parent
007cfcb4
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
44 additions
and
46 deletions
+44
-46
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoSearchBoxHotWords.java
+6
-7
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
+2
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiBoSearchBoxHotWordsCrawler.java
+16
-17
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoSearchBoxHotWordsDao.java
+6
-6
src/main/java/com/zhiwei/searchhotcrawler/timer/ToutiaoHotSearchRun.java
+13
-13
src/main/resources/db.properties
+1
-1
No files found.
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoSearch.java
→
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoSearch
BoxHotWords
.java
View file @
7d05be2a
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
/**
* @ClassName: WeiBo
User
* @Description: 微博
用户
* @ClassName: WeiBo
SearchBoxHotWords
* @Description: 微博
搜索框关键词实体类
* @author ll
* @date 2021年
5月27日 下午3:26:1
1
* @date 2021年
11月12日 上午11:35:3
1
*/
import
lombok.Data
;
import
lombok.ToString
;
import
java.io.Serializable
;
import
java.util.Date
;
@Data
@ToString
public
class
WeiBoSearch
{
public
class
WeiBoSearch
BoxHotWords
{
/**
...
...
@@ -40,10 +39,10 @@ public class WeiBoSearch {
private
Date
time
;
public
WeiBoSearch
()
{
public
WeiBoSearch
BoxHotWords
()
{
}
public
WeiBoSearch
(
String
name
,
String
ext
,
String
word
,
String
type
,
Date
time
)
{
public
WeiBoSearch
BoxHotWords
(
String
name
,
String
ext
,
String
word
,
String
type
,
Date
time
)
{
this
.
id
=
name
+
"_大家正在搜"
;
this
.
name
=
name
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
View file @
7d05be2a
...
...
@@ -21,7 +21,7 @@ public class DBConfig {
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
weiBoMassageCollName
=
conf
.
getProperty
(
"weiBoMassageCollName"
);
weiBoUserCollName
=
conf
.
getProperty
(
"weiBoUserCollName"
);
weiBoSearch
CollName
=
conf
.
getProperty
(
"weiBoSearch
CollName"
);
weiBoSearch
BoxHotWordsCollName
=
conf
.
getProperty
(
"weiBoSearchBoxHotWords
CollName"
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
...
...
@@ -38,5 +38,5 @@ public class DBConfig {
public
static
String
collWechatUserName
;
public
static
String
weiBoMassageCollName
;
public
static
String
weiBoUserCollName
;
public
static
String
weiBoSearchCollName
;
public
static
String
weiBoSearch
BoxHotWords
CollName
;
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/
weiBoSearch
Crawler.java
→
src/main/java/com/zhiwei/searchhotcrawler/crawler/
WeiBoSearchBoxHotWords
Crawler.java
View file @
7d05be2a
...
...
@@ -5,8 +5,8 @@ import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearch
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoSearchDao
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearch
BoxHotWords
;
import
com.zhiwei.searchhotcrawler.dao.WeiBoSearch
BoxHotWords
Dao
;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
...
...
@@ -19,17 +19,17 @@ import java.util.Objects;
/**
* @author: ll
* @ClassName:
weiBoSearchCrawlerTest
* @Description: 移动端微博搜索框
数据
采集
* @ClassName:
WeiBoSearchBoxHotWordsCrawler
* @Description: 移动端微博搜索框
热词
采集
* @date: 2021年11月12日 上午11:35:31
* @Title:
weiBoSearch
Crawler
* @Title:
WeiBoSearchBoxHotWords
Crawler
*/
@Log4j2
public
class
weiBoSearch
Crawler
{
public
class
WeiBoSearchBoxHotWords
Crawler
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
throwException
(
false
).
retryTimes
(
3
).
connectTimeout
(
Duration
.
ofSeconds
(
60
)).
build
();
static
WeiBoSearch
Dao
weiBoSearchDao
=
new
WeiBoSearch
Dao
();
static
WeiBoSearch
BoxHotWordsDao
weiBoSearchDao
=
new
WeiBoSearchBoxHotWords
Dao
();
public
static
void
weiBoSearch
(
Date
date
){
public
static
void
weiBoSearch
BoxHotWords
(
Date
date
){
String
url
=
"https://api.weibo.cn/2/guest/cardlist?networktype=wifi&image_type=heif&launchid=10000365--x&uicode=10000512&ul_hid=dfa73128-2705-4483-bda9-063cd789e44e&ul_sid=cef2538c-9b16-486e-b49f-db9c387b8384&moduleID=708&checktoken=ea8044f2cc7f0a44a9ad159526fd7186&wb_version=5293&refresh_type=0&c=android&s=0b69e4f6&ft=0&ua=Xiaomi-Redmi%208__weibo__11.11.1__android__android9&wm=20005_0002&aid=01AxK-lU0KWwz9mhMfL6gTb37upA4rmFhPxq5T1hrsYVnDdks.&did=0f607264fc6318a92b9e13c65db7cd3cbce74dcd&fid=231278_plaza&uid=2004639399897&v_f=2&v_p=89&from=10BB195010&gsid=_2AkMW0UMLf8NhqwFRmPwTz2LhZYR_ww_EieKgjbLQJRM3HRl-wT_nqksFtRV6PfAyN6rPTMzBcJo_-h6X0zli7DSuUqw-&imsi=&lang=zh_CN&page=1&skin=default&count=20&oldwm=20005_0002&sflag=1&containerid=231289type%3D1&ignore_inturrpted_error=true&no_location_permission=1&android_id=0febc80e083662a7&client_key=c2f5393732c75e52b85b1da27a8e20ae&need_new_pop=1&ul_ctime=1636683060289&need_head_cards=0&cum=53EC532B"
;
String
htmlBody
=
null
;
...
...
@@ -39,7 +39,7 @@ public class weiBoSearchCrawler {
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博搜索时出现解析错误,页面结构有问题"
,
e
);
log
.
error
(
"解析微博搜索
框热词
时出现解析错误,页面结构有问题"
,
e
);
}
if
(
htmlBody
!=
null
&&
htmlBody
.
contains
(
"hotwords"
))
{
int
num
=
ansysData
(
htmlBody
,
date
);
...
...
@@ -47,8 +47,7 @@ public class weiBoSearchCrawler {
break
;
}
}
else
{
log
.
info
(
"解析微博"
+
"搜索时出现解析错误,页面结构有问题"
);
log
.
info
(
"解析微博搜索框热词时出现解析错误,页面结构有问题"
);
continue
;
}
}
...
...
@@ -58,9 +57,9 @@ public class weiBoSearchCrawler {
private
static
int
ansysData
(
String
htmlBody
,
Date
date
)
{
//使用静态WeiBoSearchDao,防止频繁连数据库
if
(
Objects
.
isNull
(
weiBoSearchDao
))
{
weiBoSearchDao
=
new
WeiBoSearchDao
();
weiBoSearchDao
=
new
WeiBoSearch
BoxHotWords
Dao
();
}
List
<
WeiBoSearch
>
list
=
new
ArrayList
<>();
List
<
WeiBoSearch
BoxHotWords
>
list
=
new
ArrayList
<>();
try
{
//解析htmlBody
JSONObject
object
=
JSONObject
.
parseObject
(
htmlBody
);
...
...
@@ -75,16 +74,16 @@ public class weiBoSearchCrawler {
String
word
=
card
.
getString
(
"word"
);
//获取标题
String
name
=
card
.
getString
(
"note"
);
WeiBoSearch
weiBoSearch
=
new
WeiBoSearch
(
name
,
ext
,
word
,
type
,
date
);
WeiBoSearch
BoxHotWords
weiBoSearch
=
new
WeiBoSearchBoxHotWords
(
name
,
ext
,
word
,
type
,
date
);
list
.
add
(
weiBoSearch
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"解析微博搜索时出现解析错误,数据不是json结构"
,
e
);
log
.
error
(
"解析微博搜索
框热词
时出现解析错误,数据不是json结构"
,
e
);
}
log
.
info
(
"{}, 此轮微博搜索采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
log
.
info
(
"{}, 此轮微博搜索
框热词
采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
//数据传给dao
weiBoSearchDao
.
addWeiBo
User
(
list
);
weiBoSearchDao
.
addWeiBo
SearchBoxHotWords
(
list
);
return
list
.
size
();
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoSearchDao.java
→
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoSearch
BoxHotWords
Dao.java
View file @
7d05be2a
...
...
@@ -4,7 +4,7 @@ package com.zhiwei.searchhotcrawler.dao;
import
com.mongodb.MongoWriteException
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearch
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoSearch
BoxHotWords
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
...
...
@@ -16,13 +16,13 @@ import java.util.List;
import
static
java
.
util
.
Objects
.
nonNull
;
@Log4j2
public
class
WeiBoSearchDao
{
public
class
WeiBoSearch
BoxHotWords
Dao
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
WeiBoSearchDao
()
{
String
collName
=
DBConfig
.
weiBoSearchCollName
;
public
WeiBoSearch
BoxHotWords
Dao
()
{
String
collName
=
DBConfig
.
weiBoSearch
BoxHotWords
CollName
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
...
...
@@ -32,9 +32,9 @@ public class WeiBoSearchDao {
* 添加数据入库
* @param weiBoSearch
*/
public
void
addWeiBo
User
(
List
<
WeiBoSearch
>
weiBoSearch
){
public
void
addWeiBo
SearchBoxHotWords
(
List
<
WeiBoSearchBoxHotWords
>
weiBoSearch
){
for
(
WeiBoSearch
search
:
weiBoSearch
)
{
for
(
WeiBoSearch
BoxHotWords
search
:
weiBoSearch
)
{
try
{
//获取时间
Date
time
=
search
.
getTime
();
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ToutiaoHotSearchRun.java
View file @
7d05be2a
...
...
@@ -35,19 +35,19 @@ public class ToutiaoHotSearchRun extends Thread{
private
void
getHotList
()
{
//
log.info("今日头条热搜采集开始........");
//
HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
//
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> list = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(
);
//
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
//
if(list == null || list.size() == 0){
//
TipsUtils.sendTips("今日头条热搜",new Date());
//
}else {
//
List<Document> data = hotSearchCacheDAO.addData(list);
//
hotSearchDAO.addHotSearchList(data);
//
TipsUtils.recoveryTips("今日头条热搜",new Date());
//
}
//
log.info("今日头条热搜采集结束........");
log
.
info
(
"今日头条热搜采集开始........"
);
HotSearchListDAO
hotSearchDAO
=
new
HotSearchListDAO
();
HotSearchCacheDAO
hotSearchCacheDAO
=
new
HotSearchCacheDAO
();
List
<
HotSearchList
>
list
=
ToutiaoHotSearchCrawler
.
toutiaoHotSearchByPhone
(
new
Date
()
);
log
.
info
(
"{}, 今日头条此轮采集到的数据量为:{}"
,
new
Date
(),
Integer
.
valueOf
(
list
!=
null
?
list
.
size
()
:
0
));
if
(
list
==
null
||
list
.
size
()
==
0
){
TipsUtils
.
sendTips
(
"今日头条热搜"
,
new
Date
());
}
else
{
List
<
Document
>
data
=
hotSearchCacheDAO
.
addData
(
list
);
hotSearchDAO
.
addHotSearchList
(
data
);
TipsUtils
.
recoveryTips
(
"今日头条热搜"
,
new
Date
());
}
log
.
info
(
"今日头条热搜采集结束........"
);
}
}
src/main/resources/db.properties
View file @
7d05be2a
...
...
@@ -20,5 +20,5 @@ topicCollName=topic_list
collWechatUserName
=
wechat_user
weiBoMassageCollName
=
weibo_massage
weiBoUserCollName
=
weibo_user
weiBoSearch
CollName
=
weibo_search
weiBoSearch
BoxHotWordsCollName
=
weiBoSearchBoxHotWord
#
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment