Commit c495fcc6 by leiliangliang

微博话题解析新增采集微博信息和微博用户

parent f01e39b6
...@@ -71,6 +71,32 @@ ...@@ -71,6 +71,32 @@
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
<dependencies>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.6.7.2-RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>5.3.6</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties> <properties>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
......
package com.zhiwei.searchhotcrawler.bean;
/**
* @ClassName: WeiBoMassage
* @Description: 微博主要信息
* @author ll
* @date 2021年5月27日 下午2:26:11
*/
import lombok.Data;
import lombok.ToString;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
@Data
@ToString
public class WeiBoMassage implements Serializable {
private static final long serialVersionUID = 5640606453392799871L;
/**
* 主键
*/
private String id;
/**
* 用户id
*/
private String userId;
/**
* 内容
*/
private String text;
/**
* 用户名
*/
private String userName;
/**
*
*/
private String mid;
/**
* 创建时间
*/
private Date creatTime;
/**
* 编辑时间
*/
private Date editTime;
/**
*
*/
private Integer cardType;
/**
* 显示类型
*/
private Integer showType;
/**
* 转发数
*/
private Long repostCount;
/**
* 评论数
*/
private Long commentCount;
/**
* 点赞数
*/
private Long attitudeCount;
/**
* 播放量
*/
private Long playCount;
/**
* 图片地址
*/
private List<String> pictureUrlList;
/**
* 来源
*/
private String source;
/**
* 类型
*/
private String type;
/**
* 话题
*/
private String topic;
//是否转发
private Integer forward;
//转发 源微博mid
private String root_mid;
//转发 源微博user信息
//转发 源id
private String root_id;
//转发 源name
private String root_name;
//转发 源微博text
private String root_text;
//转发 源来源
private String root_source;
public WeiBoMassage() {
}
public WeiBoMassage(String userId, String text, String userName, String mid,
Date creatTime, Date editTime, Integer cardType, Integer showType, Long repostCount,
Long commentCount, Long attitudeCount, String source, String type, String topic) {
this.id =mid+"_"+HotSearchType.微博热搜.name()+"_"+topic;
this.userId = userId;
this.text = text;
this.userName = userName;
this.mid = mid;
this.creatTime = creatTime;
this.editTime = editTime;
this.cardType = cardType;
this.showType = showType;
this.repostCount = repostCount;
this.commentCount = commentCount;
this.attitudeCount = attitudeCount;
this.source = source;
this.type = type;
this.topic = topic;
}
}
package com.zhiwei.searchhotcrawler.bean;
/**
* @ClassName: WeiBoUser
* @Description: 微博用户
* @author ll
* @date 2021年5月27日 下午3:26:11
*/
import lombok.Data;
import lombok.ToString;
import java.io.Serializable;
import java.util.Date;
@Data
@ToString
public class WeiBoUser implements Serializable {
private static final long serialVersionUID = -2856936638431788899L;
/**
* 主键
*/
private String id;
/**
* 用户id
*/
private String userId;
/**
* 认证信息
*/
private String attestationMassage;
/**
* 用户名
*/
private String userName;
/**
* 话题
*/
private String topic;
/**
*时间
*/
private Date time;
/**
* 粉丝数
*/
private Long followerCount;
public WeiBoUser() {
}
public WeiBoUser(String userId, String attestationMassage, String userName,String topic,Date time,Long followerCount) {
this.id = userId+"_"+HotSearchType.微博热搜.name()+"_"+topic;
this.userId = userId;
this.attestationMassage = attestationMassage;
this.userName = userName;
this.topic=topic;
this.time=time;
this.followerCount=followerCount;
}
}
...@@ -19,6 +19,9 @@ public class DBConfig { ...@@ -19,6 +19,9 @@ public class DBConfig {
searchCacheCollName = conf.getProperty("searchCacheCollName"); searchCacheCollName = conf.getProperty("searchCacheCollName");
topicCollName = conf.getProperty("topicCollName"); topicCollName = conf.getProperty("topicCollName");
collWechatUserName = conf.getProperty("collWechatUserName"); collWechatUserName = conf.getProperty("collWechatUserName");
weiBoMassageCollName = conf.getProperty("weiBoMassageCollName");
weiBoUserCollName = conf.getProperty("weiBoUserCollName");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
...@@ -32,4 +35,6 @@ public class DBConfig { ...@@ -32,4 +35,6 @@ public class DBConfig {
public static String searchCacheCollName; public static String searchCacheCollName;
public static String topicCollName; public static String topicCollName;
public static String collWechatUserName; public static String collWechatUserName;
public static String weiBoMassageCollName;
public static String weiBoUserCollName;
} }
package com.zhiwei.searchhotcrawler.dao;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.searchhotcrawler.bean.WeiBoMassage;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
*微博信息入库
*/
@Log4j2
public class WeiBoMassageDao {
public static MongoDatabase mongoDatabase = MongoDBTemplate.getDB(DBConfig.dbName);
public static MongoCollection mongoCollection;
public WeiBoMassageDao() {
String collName = DBConfig.weiBoMassageCollName;
mongoCollection = mongoDatabase.getCollection(collName);
//给数据表创建索引
MongoDBTemplate.createIndex(DBConfig.dbName, collName);
}
/**
* 添加数据入库
* @param weiBoMassage
*/
public void addWeiBoMassage(WeiBoMassage weiBoMassage){
log.info("weiBoMassage对象开始转document对象");
Document document = new Document();
document.put("_id",weiBoMassage.getId());
document.put("userId",weiBoMassage.getUserId());
document.put("text",weiBoMassage.getText());
document.put("userName",weiBoMassage.getUserName());
document.put("mid",weiBoMassage.getMid());
document.put("creatTime",weiBoMassage.getCreatTime());
if (Objects.nonNull(weiBoMassage.getEditTime())){
document.put("editTime",weiBoMassage.getEditTime());
}
document.put("cardType",weiBoMassage.getCardType());
document.put("showType",weiBoMassage.getShowType());
document.put("repostCount",weiBoMassage.getRepostCount());
document.put("commentCount",weiBoMassage.getCommentCount());
document.put("attitudeCount",weiBoMassage.getAttitudeCount());
if (Objects.nonNull(weiBoMassage.getPlayCount())){
document.put("playCount",weiBoMassage.getPlayCount());
}
if (weiBoMassage.getPictureUrlList().size()!=0){
document.put("pictureUrlList",weiBoMassage.getPictureUrlList());
}
document.put("source",weiBoMassage.getSource());
document.put("type",weiBoMassage.getType());
document.put("topic",weiBoMassage.getTopic());
document.put("forward",weiBoMassage.getForward());
if (0!=weiBoMassage.getForward()){
document.put("root_mid",weiBoMassage.getRoot_mid());
document.put("root_id",weiBoMassage.getRoot_id());
document.put("root_name",weiBoMassage.getRoot_name());
document.put("root_text",weiBoMassage.getRoot_text());
document.put("root_source",weiBoMassage.getRoot_source());
}
log.info("weiBoMassage对象转document对象完成");
try {
mongoCollection.insertOne(document);
log.info("数据插入成功");
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e);
}
}
}
package com.zhiwei.searchhotcrawler.dao;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.searchhotcrawler.bean.WeiBoMassage;
import com.zhiwei.searchhotcrawler.bean.WeiBoUser;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import lombok.extern.log4j.Log4j2;
import org.bson.Document;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@Log4j2
public class WeiBoUserDao {
public static MongoDatabase mongoDatabase = MongoDBTemplate.getDB(DBConfig.dbName);
public static MongoCollection mongoCollection;
public WeiBoUserDao() {
String collName = DBConfig.weiBoUserCollName;
mongoCollection = mongoDatabase.getCollection(collName);
//给数据表创建索引
MongoDBTemplate.createIndex(DBConfig.dbName, collName);
}
/**
* 添加数据入库
* @param weiBoUser
*/
public void addWeiBoUser(WeiBoUser weiBoUser){
log.info("WeiBoUser对象开始转document对象");
Document document = new Document();
document.put("_id",weiBoUser.getId());
document.put("userId",weiBoUser.getUserId());
if (Objects.nonNull(weiBoUser.getAttestationMassage())){
document.put("attestationMassage",weiBoUser.getAttestationMassage());
}
document.put("userName",weiBoUser.getUserName());
document.put("topic",weiBoUser.getTopic());
document.put("time",weiBoUser.getTime());
document.put("followerCount",weiBoUser.getFollowerCount());
log.info("WeiBoUser对象转document对象完成");
try {
mongoCollection.insertOne(document);
log.info("数据插入成功");
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment