Commit 7e156432 by leiliangliang

新增微博话题采集话题贡献者,关于功能

parent f986b5c8
...@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable { ...@@ -73,4 +73,10 @@ public class WeiBoUser implements Serializable {
this.profileImageUrl = profileImageUrl; this.profileImageUrl = profileImageUrl;
} }
public WeiBoUser(String userId, String userName,String topic,Date time) {
this.userId = userId;
this.userName = userName;
this.topic=topic;
this.time=time;
}
} }
...@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler { ...@@ -328,17 +328,18 @@ public class WeiboHotSearchCrawler {
} }
/** /**
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于 * 微博热搜数据更新话题贡献者排行,关于
*
* @param document * @param document
* @return * @return
*/ */
public static Document weiboUpdatePC(Document document) { public static Document weiboUpdatePC(Document document) {
document.getString("name"); document.getString("name");
String name = document.getString("name"); String topic = document.getString("name");
String gb = "#" + name + "#"; String gb = "#" + topic + "#";
String encode =null; String encode = null;
try { try {
encode = URLEncoder.encode(gb, "utf-8"); encode = URLEncoder.encode(gb, "utf-8");
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
log.error("字符解析成URl模式异常", e); log.error("字符解析成URl模式异常", e);
} }
...@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler { ...@@ -356,35 +357,37 @@ public class WeiboHotSearchCrawler {
org.jsoup.nodes.Document documen = Jsoup.parse(htmlBody); org.jsoup.nodes.Document documen = Jsoup.parse(htmlBody);
//获取贡献者信息 //获取贡献者信息
try { try {
Elements li = documen.select("ul.card-user-list-a").select("li");
if (Objects.isNull(weiBoUserDao)) { Elements cardUser = documen.select("div.card-user");
weiBoUserDao = new WeiBoUserDao(); for (Element element : cardUser) {
} if (!element.select("div.card-head").text().isEmpty()) {
if (Objects.nonNull(li)) { Elements li = element.select("ul.card-user-list-a").select("li");
Date date = new Date(); if (Objects.nonNull(li)) {
for (Element element : li) { //循环获取话题贡献者相关信息
WeiBoUser weiBoUser = new WeiBoUser(); for (Element eleme : li) {
//获取用户名 String type = "话题贡献者";
String userName = element.select("a.name").text(); writeUser(eleme, type, topic);
//获取用户id }
String attr = element.select("span.avator").select("a").first().attr("href"); }
String userId = attr.substring(14); } else {
String type = "话题贡献者"; Elements li = element.select("ul.card-user-list-a").select("li");
String id = userId + "_" + type + "_" + name; if (Objects.nonNull(li)) {
weiBoUser.setType(type);
weiBoUser.setId(id); //循环获取话题贡献者相关信息
weiBoUser.setUserName(userName); for (Element eleme : li) {
weiBoUser.setUserId(userId); String type = "当事人";
weiBoUser.setTopic(name); writeUser(eleme, type, topic);
weiBoUser.setTime(date); }
weiBoUserDao.addWeiBoUser(weiBoUser); }
} }
} }
} catch (Exception e) { } catch (Exception e) {
log.error("话题贡献者排行采集异常",e); log.error("话题贡献者排行采集异常", e);
} }
Elements dt = documen.select("div.card-about").select("dt"); Elements dt = documen.select("div.card-about").select("dt");
if (Objects.nonNull(dt)) { if (Objects.nonNull(dt)) {
//获取微博关于的相关信息
Elements dd = documen.select("div.card-about").select("dd"); Elements dd = documen.select("div.card-about").select("dd");
Document dtDocument = new Document(); Document dtDocument = new Document();
Document ddDocument = new Document(); Document ddDocument = new Document();
...@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler { ...@@ -407,12 +410,36 @@ public class WeiboHotSearchCrawler {
} }
return docm; return docm;
} catch (Exception e) { } catch (Exception e) {
log.error("解析微博话题时出现解析错误",e); log.error("解析微博话题时出现解析错误", e);
} }
} }
return document; return document;
} }
/**
* 写入user数据
*
* @param eleme
* @param type
*/
private static void writeUser(Element eleme, String type, String topic) {
Date date = new Date();
if (Objects.isNull(weiBoUserDao)) {
weiBoUserDao = new WeiBoUserDao();
}
//获取用户名
String userName = eleme.select("a.name").text();
String attr = eleme.select("span.avator").select("a").first().attr("href");
//获取用户id
String userId = attr.substring(14);
String id = userId + "_" + type + "_" + topic;
WeiBoUser weiBoUser = new WeiBoUser(userName, userId, topic, date);
weiBoUser.setType(type);
weiBoUser.setId(id);
weiBoUserDao.addWeiBoUser(weiBoUser);
}
/** /**
* 解析微博信息 * 解析微博信息
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment