Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
c495fcc6
Commit
c495fcc6
authored
Jun 04, 2021
by
leiliangliang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微博话题解析新增采集微博信息和微博用户
parent
f01e39b6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
367 additions
and
0 deletions
+367
-0
dependency-reduced-pom.xml
+26
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoMassage.java
+131
-0
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
+65
-0
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
+5
-0
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
+0
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
+81
-0
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
+59
-0
src/test/java/weiboTest/WeiboHotSearchTest.java
+0
-0
No files found.
dependency-reduced-pom.xml
View file @
c495fcc6
...
@@ -71,6 +71,32 @@
...
@@ -71,6 +71,32 @@
</plugin>
</plugin>
</plugins>
</plugins>
</build>
</build>
<dependencies>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.6.7.2-RELEASE
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
junit
</groupId>
<artifactId>
junit
</artifactId>
<version>
4.13
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.projectlombok
</groupId>
<artifactId>
lombok
</artifactId>
<version>
1.18.20
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.springframework
</groupId>
<artifactId>
spring-test
</artifactId>
<version>
5.3.6
</version>
<scope>
test
</scope>
</dependency>
</dependencies>
<properties>
<properties>
<project.reporting.outputEncoding>
UTF-8
</project.reporting.outputEncoding>
<project.reporting.outputEncoding>
UTF-8
</project.reporting.outputEncoding>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
<project.build.sourceEncoding>
UTF-8
</project.build.sourceEncoding>
...
...
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoMassage.java
0 → 100644
View file @
c495fcc6
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
/**
* @ClassName: WeiBoMassage
* @Description: 微博主要信息
* @author ll
* @date 2021年5月27日 下午2:26:11
*/
import
lombok.Data
;
import
lombok.ToString
;
import
java.io.Serializable
;
import
java.util.Date
;
import
java.util.List
;
@Data
@ToString
public
class
WeiBoMassage
implements
Serializable
{
private
static
final
long
serialVersionUID
=
5640606453392799871L
;
/**
* 主键
*/
private
String
id
;
/**
* 用户id
*/
private
String
userId
;
/**
* 内容
*/
private
String
text
;
/**
* 用户名
*/
private
String
userName
;
/**
*
*/
private
String
mid
;
/**
* 创建时间
*/
private
Date
creatTime
;
/**
* 编辑时间
*/
private
Date
editTime
;
/**
*
*/
private
Integer
cardType
;
/**
* 显示类型
*/
private
Integer
showType
;
/**
* 转发数
*/
private
Long
repostCount
;
/**
* 评论数
*/
private
Long
commentCount
;
/**
* 点赞数
*/
private
Long
attitudeCount
;
/**
* 播放量
*/
private
Long
playCount
;
/**
* 图片地址
*/
private
List
<
String
>
pictureUrlList
;
/**
* 来源
*/
private
String
source
;
/**
* 类型
*/
private
String
type
;
/**
* 话题
*/
private
String
topic
;
//是否转发
private
Integer
forward
;
//转发 源微博mid
private
String
root_mid
;
//转发 源微博user信息
//转发 源id
private
String
root_id
;
//转发 源name
private
String
root_name
;
//转发 源微博text
private
String
root_text
;
//转发 源来源
private
String
root_source
;
public
WeiBoMassage
()
{
}
public
WeiBoMassage
(
String
userId
,
String
text
,
String
userName
,
String
mid
,
Date
creatTime
,
Date
editTime
,
Integer
cardType
,
Integer
showType
,
Long
repostCount
,
Long
commentCount
,
Long
attitudeCount
,
String
source
,
String
type
,
String
topic
)
{
this
.
id
=
mid
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
userId
=
userId
;
this
.
text
=
text
;
this
.
userName
=
userName
;
this
.
mid
=
mid
;
this
.
creatTime
=
creatTime
;
this
.
editTime
=
editTime
;
this
.
cardType
=
cardType
;
this
.
showType
=
showType
;
this
.
repostCount
=
repostCount
;
this
.
commentCount
=
commentCount
;
this
.
attitudeCount
=
attitudeCount
;
this
.
source
=
source
;
this
.
type
=
type
;
this
.
topic
=
topic
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/bean/WeiBoUser.java
0 → 100644
View file @
c495fcc6
package
com
.
zhiwei
.
searchhotcrawler
.
bean
;
/**
* @ClassName: WeiBoUser
* @Description: 微博用户
* @author ll
* @date 2021年5月27日 下午3:26:11
*/
import
lombok.Data
;
import
lombok.ToString
;
import
java.io.Serializable
;
import
java.util.Date
;
@Data
@ToString
public
class
WeiBoUser
implements
Serializable
{
private
static
final
long
serialVersionUID
=
-
2856936638431788899L
;
/**
* 主键
*/
private
String
id
;
/**
* 用户id
*/
private
String
userId
;
/**
* 认证信息
*/
private
String
attestationMassage
;
/**
* 用户名
*/
private
String
userName
;
/**
* 话题
*/
private
String
topic
;
/**
*时间
*/
private
Date
time
;
/**
* 粉丝数
*/
private
Long
followerCount
;
public
WeiBoUser
()
{
}
public
WeiBoUser
(
String
userId
,
String
attestationMassage
,
String
userName
,
String
topic
,
Date
time
,
Long
followerCount
)
{
this
.
id
=
userId
+
"_"
+
HotSearchType
.
微博热搜
.
name
()+
"_"
+
topic
;
this
.
userId
=
userId
;
this
.
attestationMassage
=
attestationMassage
;
this
.
userName
=
userName
;
this
.
topic
=
topic
;
this
.
time
=
time
;
this
.
followerCount
=
followerCount
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/config/DBConfig.java
View file @
c495fcc6
...
@@ -19,6 +19,9 @@ public class DBConfig {
...
@@ -19,6 +19,9 @@ public class DBConfig {
searchCacheCollName
=
conf
.
getProperty
(
"searchCacheCollName"
);
searchCacheCollName
=
conf
.
getProperty
(
"searchCacheCollName"
);
topicCollName
=
conf
.
getProperty
(
"topicCollName"
);
topicCollName
=
conf
.
getProperty
(
"topicCollName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
collWechatUserName
=
conf
.
getProperty
(
"collWechatUserName"
);
weiBoMassageCollName
=
conf
.
getProperty
(
"weiBoMassageCollName"
);
weiBoUserCollName
=
conf
.
getProperty
(
"weiBoUserCollName"
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
}
}
...
@@ -32,4 +35,6 @@ public class DBConfig {
...
@@ -32,4 +35,6 @@ public class DBConfig {
public
static
String
searchCacheCollName
;
public
static
String
searchCacheCollName
;
public
static
String
topicCollName
;
public
static
String
topicCollName
;
public
static
String
collWechatUserName
;
public
static
String
collWechatUserName
;
public
static
String
weiBoMassageCollName
;
public
static
String
weiBoUserCollName
;
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboHotSearchCrawler.java
View file @
c495fcc6
This diff is collapsed.
Click to expand it.
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoMassageDao.java
0 → 100644
View file @
c495fcc6
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoMassage
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
/**
*微博信息入库
*/
@Log4j2
public
class
WeiBoMassageDao
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
WeiBoMassageDao
()
{
String
collName
=
DBConfig
.
weiBoMassageCollName
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
/**
* 添加数据入库
* @param weiBoMassage
*/
public
void
addWeiBoMassage
(
WeiBoMassage
weiBoMassage
){
log
.
info
(
"weiBoMassage对象开始转document对象"
);
Document
document
=
new
Document
();
document
.
put
(
"_id"
,
weiBoMassage
.
getId
());
document
.
put
(
"userId"
,
weiBoMassage
.
getUserId
());
document
.
put
(
"text"
,
weiBoMassage
.
getText
());
document
.
put
(
"userName"
,
weiBoMassage
.
getUserName
());
document
.
put
(
"mid"
,
weiBoMassage
.
getMid
());
document
.
put
(
"creatTime"
,
weiBoMassage
.
getCreatTime
());
if
(
Objects
.
nonNull
(
weiBoMassage
.
getEditTime
())){
document
.
put
(
"editTime"
,
weiBoMassage
.
getEditTime
());
}
document
.
put
(
"cardType"
,
weiBoMassage
.
getCardType
());
document
.
put
(
"showType"
,
weiBoMassage
.
getShowType
());
document
.
put
(
"repostCount"
,
weiBoMassage
.
getRepostCount
());
document
.
put
(
"commentCount"
,
weiBoMassage
.
getCommentCount
());
document
.
put
(
"attitudeCount"
,
weiBoMassage
.
getAttitudeCount
());
if
(
Objects
.
nonNull
(
weiBoMassage
.
getPlayCount
())){
document
.
put
(
"playCount"
,
weiBoMassage
.
getPlayCount
());
}
if
(
weiBoMassage
.
getPictureUrlList
().
size
()!=
0
){
document
.
put
(
"pictureUrlList"
,
weiBoMassage
.
getPictureUrlList
());
}
document
.
put
(
"source"
,
weiBoMassage
.
getSource
());
document
.
put
(
"type"
,
weiBoMassage
.
getType
());
document
.
put
(
"topic"
,
weiBoMassage
.
getTopic
());
document
.
put
(
"forward"
,
weiBoMassage
.
getForward
());
if
(
0
!=
weiBoMassage
.
getForward
()){
document
.
put
(
"root_mid"
,
weiBoMassage
.
getRoot_mid
());
document
.
put
(
"root_id"
,
weiBoMassage
.
getRoot_id
());
document
.
put
(
"root_name"
,
weiBoMassage
.
getRoot_name
());
document
.
put
(
"root_text"
,
weiBoMassage
.
getRoot_text
());
document
.
put
(
"root_source"
,
weiBoMassage
.
getRoot_source
());
}
log
.
info
(
"weiBoMassage对象转document对象完成"
);
try
{
mongoCollection
.
insertOne
(
document
);
log
.
info
(
"数据插入成功"
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
src/main/java/com/zhiwei/searchhotcrawler/dao/WeiBoUserDao.java
0 → 100644
View file @
c495fcc6
package
com
.
zhiwei
.
searchhotcrawler
.
dao
;
import
com.mongodb.client.MongoCollection
;
import
com.mongodb.client.MongoDatabase
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoMassage
;
import
com.zhiwei.searchhotcrawler.bean.WeiBoUser
;
import
com.zhiwei.searchhotcrawler.config.DBConfig
;
import
com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
@Log4j2
public
class
WeiBoUserDao
{
public
static
MongoDatabase
mongoDatabase
=
MongoDBTemplate
.
getDB
(
DBConfig
.
dbName
);
public
static
MongoCollection
mongoCollection
;
public
WeiBoUserDao
()
{
String
collName
=
DBConfig
.
weiBoUserCollName
;
mongoCollection
=
mongoDatabase
.
getCollection
(
collName
);
//给数据表创建索引
MongoDBTemplate
.
createIndex
(
DBConfig
.
dbName
,
collName
);
}
/**
* 添加数据入库
* @param weiBoUser
*/
public
void
addWeiBoUser
(
WeiBoUser
weiBoUser
){
log
.
info
(
"WeiBoUser对象开始转document对象"
);
Document
document
=
new
Document
();
document
.
put
(
"_id"
,
weiBoUser
.
getId
());
document
.
put
(
"userId"
,
weiBoUser
.
getUserId
());
if
(
Objects
.
nonNull
(
weiBoUser
.
getAttestationMassage
())){
document
.
put
(
"attestationMassage"
,
weiBoUser
.
getAttestationMassage
());
}
document
.
put
(
"userName"
,
weiBoUser
.
getUserName
());
document
.
put
(
"topic"
,
weiBoUser
.
getTopic
());
document
.
put
(
"time"
,
weiBoUser
.
getTime
());
document
.
put
(
"followerCount"
,
weiBoUser
.
getFollowerCount
());
log
.
info
(
"WeiBoUser对象转document对象完成"
);
try
{
mongoCollection
.
insertOne
(
document
);
log
.
info
(
"数据插入成功"
);
}
catch
(
Exception
e
)
{
log
.
error
(
"存储数据时出错,错误为:{}"
,
e
);
}
}
}
src/test/java/weiboTest/WeiboHotSearchTest.java
View file @
c495fcc6
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment