Commit 116eb891 by leiliangliang

更新微博话题解析

parent 7bbc8832
...@@ -18,6 +18,7 @@ import okhttp3.Request; ...@@ -18,6 +18,7 @@ import okhttp3.Request;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.bson.Document; import org.bson.Document;
import org.checkerframework.checker.units.qual.C;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
...@@ -47,6 +48,9 @@ public class WeiboHotSearchCrawler { ...@@ -47,6 +48,9 @@ public class WeiboHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static RedisDao redisDao = new RedisDao(); private static RedisDao redisDao = new RedisDao();
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
* @author hero * @author hero
...@@ -274,9 +278,13 @@ public class WeiboHotSearchCrawler { ...@@ -274,9 +278,13 @@ public class WeiboHotSearchCrawler {
} }
try { try {
//调用weiBoMassageDao添加数据
WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//解析cards,获取热门微博、人物 //解析cards,获取热门微博、人物
if (Objects.isNull(weiBoMassageDao)){
weiBoMassageDao = new WeiBoMassageDao();
}
if (Objects.isNull(weiBoUserDao)){
weiBoUserDao = new WeiBoUserDao();
}
for (JSONObject jsonObject : cardsJsons) { for (JSONObject jsonObject : cardsJsons) {
if (nonNull(jsonObject) && !jsonObject.isEmpty()) { if (nonNull(jsonObject) && !jsonObject.isEmpty()) {
if (jsonObject.containsKey("mblog")) { if (jsonObject.containsKey("mblog")) {
...@@ -292,14 +300,18 @@ public class WeiboHotSearchCrawler { ...@@ -292,14 +300,18 @@ public class WeiboHotSearchCrawler {
if (Objects.nonNull(weiBoMassage)) { if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage); weiBoMassageDao.addWeiBoMassage(weiBoMassage);
} }
analysisWeiBoUsers(cardGroup, document.getString("name")); List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name"));
if (!weiBoUserList.isEmpty()){
for (int i = 0; i < weiBoUserList.size(); i++) {
weiBoUserDao.addWeiBoUser(weiBoUserList.get(i));
}
}
} }
} }
} }
} catch (Exception e) { } catch (Exception e) {
log.error("解析cards失败,未获得热门微博,人物信息",e); log.error("解析cards失败,未获得热门微博,人物信息",e);
} }
return document; return document;
} }
} }
...@@ -333,10 +345,9 @@ public class WeiboHotSearchCrawler { ...@@ -333,10 +345,9 @@ public class WeiboHotSearchCrawler {
* @param topic * @param topic
* @return * @return
*/ */
public static void analysisWeiBoUsers(JSONArray cardGroup, String topic) { public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList();
//解析weibo人物信息 //解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
Date date = new Date(); Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) { for (int i = 0; i < cardGroup.size(); i++) {
if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
...@@ -361,15 +372,10 @@ public class WeiboHotSearchCrawler { ...@@ -361,15 +372,10 @@ public class WeiboHotSearchCrawler {
} }
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount); WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据 weiBoUserList.add(weiBoUser);
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
} }
} }
return weiBoUserList;
} else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { } else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")){ if (cardGroup.getJSONObject(i).containsKey("user")){
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user"); JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
...@@ -379,7 +385,6 @@ public class WeiboHotSearchCrawler { ...@@ -379,7 +385,6 @@ public class WeiboHotSearchCrawler {
String userName = user.getString("screen_name"); String userName = user.getString("screen_name");
//获取认证信息 //获取认证信息
String attestationMassage = user.getString("verified_reason"); String attestationMassage = user.getString("verified_reason");
//获取粉丝数 //获取粉丝数
String followers_count = user.getString("followers_count"); String followers_count = user.getString("followers_count");
Long followerCount =null; Long followerCount =null;
...@@ -391,16 +396,12 @@ public class WeiboHotSearchCrawler { ...@@ -391,16 +396,12 @@ public class WeiboHotSearchCrawler {
} }
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount); WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据 weiBoUserList.add(weiBoUser);
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
} }
return weiBoUserList;
} }
} }
return Collections.emptyList();
} }
......
...@@ -33,8 +33,7 @@ public class WeiBoMassageDao { ...@@ -33,8 +33,7 @@ public class WeiBoMassageDao {
* @param weiBoMassage * @param weiBoMassage
*/ */
public void addWeiBoMassage(WeiBoMassage weiBoMassage){ public void addWeiBoMassage(WeiBoMassage weiBoMassage){
try {
log.info("weiBoMassage对象开始转document对象");
Document document = new Document(); Document document = new Document();
document.put("_id",weiBoMassage.getId()); document.put("_id",weiBoMassage.getId());
document.put("userId",weiBoMassage.getUserId()); document.put("userId",weiBoMassage.getUserId());
...@@ -67,13 +66,13 @@ public class WeiBoMassageDao { ...@@ -67,13 +66,13 @@ public class WeiBoMassageDao {
document.put("root_text",weiBoMassage.getRoot_text()); document.put("root_text",weiBoMassage.getRoot_text());
document.put("root_source",weiBoMassage.getRoot_source()); document.put("root_source",weiBoMassage.getRoot_source());
} }
try {
log.info("weiBoMassage对象转document对象完成"); mongoCollection.insertOne(document);
try { } catch (Exception e) {
mongoCollection.insertOne(document); log.error("存储数据时出错,错误为:{}",e);
log.info("数据插入成功"); }
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e); log.error("WeiBoMassage对象转Document对象异常",e);
} }
} }
......
...@@ -23,7 +23,6 @@ public class WeiBoUserDao { ...@@ -23,7 +23,6 @@ public class WeiBoUserDao {
public WeiBoUserDao() { public WeiBoUserDao() {
String collName = DBConfig.weiBoUserCollName; String collName = DBConfig.weiBoUserCollName;
mongoCollection = mongoDatabase.getCollection(collName); mongoCollection = mongoDatabase.getCollection(collName);
//给数据表创建索引 //给数据表创建索引
MongoDBTemplate.createIndex(DBConfig.dbName, collName); MongoDBTemplate.createIndex(DBConfig.dbName, collName);
} }
...@@ -33,9 +32,7 @@ public class WeiBoUserDao { ...@@ -33,9 +32,7 @@ public class WeiBoUserDao {
* @param weiBoUser * @param weiBoUser
*/ */
public void addWeiBoUser(WeiBoUser weiBoUser){ public void addWeiBoUser(WeiBoUser weiBoUser){
try {
log.info("WeiBoUser对象开始转document对象");
Document document = new Document(); Document document = new Document();
document.put("_id",weiBoUser.getId()); document.put("_id",weiBoUser.getId());
document.put("userId",weiBoUser.getUserId()); document.put("userId",weiBoUser.getUserId());
...@@ -46,13 +43,13 @@ public class WeiBoUserDao { ...@@ -46,13 +43,13 @@ public class WeiBoUserDao {
document.put("topic",weiBoUser.getTopic()); document.put("topic",weiBoUser.getTopic());
document.put("time",weiBoUser.getTime()); document.put("time",weiBoUser.getTime());
document.put("followerCount",weiBoUser.getFollowerCount()); document.put("followerCount",weiBoUser.getFollowerCount());
log.info("WeiBoUser对象转document对象完成"); try {
mongoCollection.insertOne(document);
try { } catch (Exception e) {
mongoCollection.insertOne(document); log.error("存储数据时出错,错误为:{}",e);
log.info("数据插入成功"); }
} catch (Exception e) { } catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e); log.error("WeiBoUser对象转Document对象异常",e);
} }
} }
......
...@@ -46,7 +46,10 @@ import static java.util.Objects.nonNull; ...@@ -46,7 +46,10 @@ import static java.util.Objects.nonNull;
{"classpath:applicationContext.xml"}) {"classpath:applicationContext.xml"})
public class WeiboHotSearchTest { public class WeiboHotSearchTest {
//调用weiBoMassageDao添加数据
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//调用weiBoUserDao添加数据
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
@Test @Test
...@@ -154,29 +157,41 @@ public class WeiboHotSearchTest { ...@@ -154,29 +157,41 @@ public class WeiboHotSearchTest {
} }
} }
//调用weiBoMassageDao添加数据 try {
WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//解析cards,获取热门微博、人物 if (Objects.isNull(weiBoMassageDao)){
for (JSONObject jsonObject : cardsJsons) { weiBoMassageDao = new WeiBoMassageDao();
if (nonNull(jsonObject) && !jsonObject.isEmpty()) { }
if (jsonObject.containsKey("mblog")) { if (Objects.isNull(weiBoUserDao)){
if (jsonObject.getJSONObject("mblog").containsKey("title")) { weiBoUserDao = new WeiBoUserDao();
WeiBoMassage weiBoMassage = analysisWeiboMBlog(jsonObject, document.getString("name")); }
//解析cards,获取热门微博、人物
for (JSONObject jsonObject : cardsJsons) {
if (nonNull(jsonObject) && !jsonObject.isEmpty()) {
if (jsonObject.containsKey("mblog")) {
if (jsonObject.getJSONObject("mblog").containsKey("title")) {
WeiBoMassage weiBoMassage = analysisWeiboMBlog(jsonObject, document.getString("name"));
if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage);
}
}
} else if (jsonObject.containsKey("card_group")) {
JSONArray cardGroup = jsonObject.getJSONArray("card_group");
WeiBoMassage weiBoMassage = analysisWeiboMassage(cardGroup, document.getString("name"));
if (Objects.nonNull(weiBoMassage)) { if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage); weiBoMassageDao.addWeiBoMassage(weiBoMassage);
} }
List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name"));
if (!weiBoUserList.isEmpty()){
for (int i = 0; i < weiBoUserList.size(); i++) {
weiBoUserDao.addWeiBoUser(weiBoUserList.get(i));
}
}
} }
} else if (jsonObject.containsKey("card_group")) {
JSONArray cardGroup = jsonObject.getJSONArray("card_group");
WeiBoMassage weiBoMassage = analysisWeiboMassage(cardGroup, document.getString("name"));
if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage);
}
analysisWeiBoUsers(cardGroup, document.getString("name"));
} }
} else {
log.info("获取数据失败");
} }
} catch (Exception e) {
log.error("解析cards失败,未获取热门微博、人物信息",e);
} }
break; break;
...@@ -294,10 +309,9 @@ public class WeiboHotSearchTest { ...@@ -294,10 +309,9 @@ public class WeiboHotSearchTest {
* @param topic * @param topic
* @return * @return
*/ */
public static void analysisWeiBoUsers(JSONArray cardGroup, String topic) { public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList<>();
//解析weibo人物信息 //解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
Date date = new Date(); Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) { for (int i = 0; i < cardGroup.size(); i++) {
if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
...@@ -320,15 +334,10 @@ public class WeiboHotSearchTest { ...@@ -320,15 +334,10 @@ public class WeiboHotSearchTest {
followerCount = Long.valueOf(split[0])*10000; followerCount = Long.valueOf(split[0])*10000;
} }
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount); WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据 weiBoUserList.add(weiBoUser);
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
} }
} }
return weiBoUserList;
} else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { } else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")){ if (cardGroup.getJSONObject(i).containsKey("user")){
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user"); JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
...@@ -348,18 +357,14 @@ public class WeiboHotSearchTest { ...@@ -348,18 +357,14 @@ public class WeiboHotSearchTest {
followerCount = Long.valueOf(followers_count); followerCount = Long.valueOf(followers_count);
} }
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount); WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据 weiBoUserList.add( weiBoUser);
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
} }
return weiBoUserList;
} }
} }
return Collections.emptyList();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment