Commit 600d1086 by chenweitao

Revert "Revert "新增微博话题采集话题贡献者,关于功能""

This reverts commit 37d43810
parent 9e1b3d9f
......@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this.profileImageUrl = profileImageUrl;
}
public WeiBoUser(String userId, String userName,String topic,Date time) {
this.userId = userId;
this.userName = userName;
this.topic=topic;
this.time=time;
}
}
......@@ -328,15 +328,16 @@ public class WeiboHotSearchCrawler {
}
/**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于
* 微博热搜数据更新话题贡献者排行,关于
*
* @param document
* @return
*/
public static Document weiboUpdatePC(Document document) {
document.getString("name");
String name = document.getString("name");
String gb = "#" + name + "#";
String encode =null;
String topic = document.getString("name");
String gb = "#" + topic + "#";
String encode = null;
try {
encode = URLEncoder.encode(gb, "utf-8");
} catch (UnsupportedEncodingException e) {
......@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org.jsoup.nodes.Document documen = Jsoup.parse(htmlBody);
//获取贡献者信息
try {
Elements li = documen.select("ul.card-user-list-a").select("li");
if (Objects.isNull(weiBoUserDao)) {
weiBoUserDao = new WeiBoUserDao();
}
Elements cardUser = documen.select("div.card-user");
for (Element element : cardUser) {
if (!element.select("div.card-head").text().isEmpty()) {
Elements li = element.select("ul.card-user-list-a").select("li");
if (Objects.nonNull(li)) {
Date date = new Date();
for (Element element : li) {
WeiBoUser weiBoUser = new WeiBoUser();
//获取用户名
String userName = element.select("a.name").text();
//获取用户id
String attr = element.select("span.avator").select("a").first().attr("href");
String userId = attr.substring(14);
//循环获取话题贡献者相关信息
for (Element eleme : li) {
String type = "话题贡献者";
String id = userId + "_" + type + "_" + name;
weiBoUser.setType(type);
weiBoUser.setId(id);
weiBoUser.setUserName(userName);
weiBoUser.setUserId(userId);
weiBoUser.setTopic(name);
weiBoUser.setTime(date);
weiBoUserDao.addWeiBoUser(weiBoUser);
writeUser(eleme, type, topic);
}
}
} else {
Elements li = element.select("ul.card-user-list-a").select("li");
if (Objects.nonNull(li)) {
//循环获取话题贡献者相关信息
for (Element eleme : li) {
String type = "当事人";
writeUser(eleme, type, topic);
}
}
}
}
} catch (Exception e) {
log.error("话题贡献者排行采集异常",e);
log.error("话题贡献者排行采集异常", e);
}
Elements dt = documen.select("div.card-about").select("dt");
if (Objects.nonNull(dt)) {
//获取微博关于的相关信息
Elements dd = documen.select("div.card-about").select("dd");
Document dtDocument = new Document();
Document ddDocument = new Document();
......@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
}
return docm;
} catch (Exception e) {
log.error("解析微博话题时出现解析错误",e);
log.error("解析微博话题时出现解析错误", e);
}
}
return document;
}
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private static void writeUser(Element eleme, String type, String topic) {
Date date = new Date();
if (Objects.isNull(weiBoUserDao)) {
weiBoUserDao = new WeiBoUserDao();
}
//获取用户名
String userName = eleme.select("a.name").text();
String attr = eleme.select("span.avator").select("a").first().attr("href");
//获取用户id
String userId = attr.substring(14);
String id = userId + "_" + type + "_" + topic;
WeiBoUser weiBoUser = new WeiBoUser(userName, userId, topic, date);
weiBoUser.setType(type);
weiBoUser.setId(id);
weiBoUserDao.addWeiBoUser(weiBoUser);
}
/**
* 解析微博信息
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment