Commit d59803e9 by leiliangliang

增加微博信息及用户信息异常捕获

parent 402290c1
......@@ -623,68 +623,72 @@ public class WeiboHotSearchCrawler {
public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList();
//解析weibo人物信息
Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) {
Integer cardType = Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"));
if (24 == cardType||3 == cardType) {
if (cardGroup.getJSONObject(i).containsKey("users")) {
JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users");
for (int i1 = 0; i1 < users.size(); i1++) {
try {
Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) {
Integer cardType = Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"));
if (24 == cardType||3 == cardType) {
if (cardGroup.getJSONObject(i).containsKey("users")) {
JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users");
for (int i1 = 0; i1 < users.size(); i1++) {
//获取用户id
String userId = users.getJSONObject(i1).getString("id");
//获取用户名
String userName = users.getJSONObject(i1).getString("screen_name");
//获取认证信息
String attestationMassage = users.getJSONObject(i1).getString("verified_reason");
//获取粉丝数量
String followers_count = users.getJSONObject(i1).getString("followers_count");
Long followerCount = null;
if (!followers_count.contains("万")) {
followerCount = Long.valueOf(followers_count);
} else {
String[] split = followers_count.split("万");
double foll = Double.parseDouble(split[0]);
followerCount =new Double(foll*10000).longValue();
// followerCount = Long.valueOf(split[0]) * 10000;
}
//用户头像地址
String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser);
}
}
return weiBoUserList;
} else if (10 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")) {
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
//获取用户id
String userId = users.getJSONObject(i1).getString("id");
String userId = user.getString("id");
//获取用户名
String userName = users.getJSONObject(i1).getString("screen_name");
String userName = user.getString("screen_name");
//获取认证信息
String attestationMassage = users.getJSONObject(i1).getString("verified_reason");
//获取粉丝数量
String followers_count = users.getJSONObject(i1).getString("followers_count");
String attestationMassage = user.getString("verified_reason");
//获取粉丝数
String followers_count = user.getString("followers_count");
Long followerCount = null;
if (!followers_count.contains("万")) {
followerCount = Long.valueOf(followers_count);
} else {
if (followers_count.contains("万")) {
String[] split = followers_count.split("万");
double foll = Double.parseDouble(split[0]);
followerCount =new Double(foll*10000).longValue();
// followerCount = Long.valueOf(split[0]) * 10000;
Double aDouble = Double.valueOf(split[0]) * 10000;
followerCount = new Double(aDouble).longValue();
} else if (followers_count.contains("亿")) {
String[] split = followers_count.split("亿");
Double aDouble = Double.valueOf(split[0]) * 100000000;
followerCount = new Double(aDouble).longValue();
} else {
followerCount = Long.valueOf(followers_count);
}
//用户头像地址
String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url");
String profileImageUrl = user.getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser);
}
return weiBoUserList;
}
return weiBoUserList;
} else if (10 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")) {
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
//获取用户id
String userId = user.getString("id");
//获取用户名
String userName = user.getString("screen_name");
//获取认证信息
String attestationMassage = user.getString("verified_reason");
//获取粉丝数
String followers_count = user.getString("followers_count");
Long followerCount = null;
if (followers_count.contains("万")) {
String[] split = followers_count.split("万");
Double aDouble = Double.valueOf(split[0]) * 10000;
followerCount = new Double(aDouble).longValue();
} else if (followers_count.contains("亿")) {
String[] split = followers_count.split("亿");
Double aDouble = Double.valueOf(split[0]) * 100000000;
followerCount = new Double(aDouble).longValue();
} else {
followerCount = Long.valueOf(followers_count);
}
//用户头像地址
String profileImageUrl = user.getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser);
}
return weiBoUserList;
}
} catch (Exception e) {
log.error("解析人物信息失败", e);
}
return Collections.emptyList();
}
......@@ -698,137 +702,142 @@ public class WeiboHotSearchCrawler {
* @return
*/
public static WeiBoMassage analysisWeiboMBlog(JSONObject jsonObject, String topic) {
JSONObject mblog = jsonObject.getJSONObject("mblog");
String type = mblog.getJSONObject("title").getString("text");
String card_type = jsonObject.getString("card_type");
Integer cardType = Integer.valueOf(card_type);
String show_type = jsonObject.getString("show_type");
Integer showType = Integer.valueOf(show_type);
//点赞数
String attitudes_count = mblog.getString("attitudes_count");
Long attitudeCount = null;
if (attitudes_count.contains("万")) {
String[] split = attitudes_count.split("万");
attitudeCount = Long.valueOf(split[0]) * 10000;
} else {
attitudeCount = Long.valueOf(attitudes_count);
}
WeiBoMassage weiBoMassage = null;
try {
JSONObject mblog = jsonObject.getJSONObject("mblog");
String type = mblog.getJSONObject("title").getString("text");
String card_type = jsonObject.getString("card_type");
Integer cardType = Integer.valueOf(card_type);
String show_type = jsonObject.getString("show_type");
Integer showType = Integer.valueOf(show_type);
//点赞数
String attitudes_count = mblog.getString("attitudes_count");
Long attitudeCount = null;
if (attitudes_count.contains("万")) {
String[] split = attitudes_count.split("万");
attitudeCount = Long.valueOf(split[0]) * 10000;
} else {
attitudeCount = Long.valueOf(attitudes_count);
}
//评论数
String comments_count = mblog.getString("comments_count");
Long commentCount = null;
if (comments_count.contains("万")) {
String[] split = comments_count.split("万");
commentCount = Long.valueOf(split[0]) * 10000;
} else {
commentCount = Long.valueOf(comments_count);
}
//评论数
String comments_count = mblog.getString("comments_count");
Long commentCount = null;
if (comments_count.contains("万")) {
String[] split = comments_count.split("万");
commentCount = Long.valueOf(split[0]) * 10000;
} else {
commentCount = Long.valueOf(comments_count);
}
//转发数
String reposts_count = mblog.getString("reposts_count");
Long repostCount = null;
if (reposts_count.contains("万")) {
String[] split = reposts_count.split("万");
repostCount = Long.valueOf(split[0]) * 10000;
} else {
repostCount = Long.valueOf(reposts_count);
}
Date createTime = null;
Date editTime = null;
//转发数
String reposts_count = mblog.getString("reposts_count");
Long repostCount = null;
if (reposts_count.contains("万")) {
String[] split = reposts_count.split("万");
repostCount = Long.valueOf(split[0]) * 10000;
} else {
repostCount = Long.valueOf(reposts_count);
}
Date createTime = null;
Date editTime = null;
try {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", java.util.Locale.US);
//创建时间
String created_at = mblog.getString("created_at");
createTime = simpleDateFormat.parse(created_at);
//编辑时间
if (mblog.containsKey("edit_at")) {
String edit_at = mblog.getString("edit_at");
editTime = simpleDateFormat.parse(edit_at);
try {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.US);
//创建时间
String created_at = mblog.getString("created_at");
createTime = simpleDateFormat.parse(created_at);
//编辑时间
if (mblog.containsKey("edit_at")) {
String edit_at = mblog.getString("edit_at");
editTime = simpleDateFormat.parse(edit_at);
}
} catch (ParseException e) {
log.error("创建时间和编辑时间解析异常", e);
}
} catch (ParseException e) {
log.error("创建时间和编辑时间解析异常", e);
}
String mid = mblog.getString("mid");
//用户id
String userId = mblog.getJSONObject("user").getString("id");
//用户名
String userName = mblog.getJSONObject("user").getString("screen_name");
//来源
String source = mblog.getString("source");
//用户头像地址
String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url");
//内容
String content = null;
if (mblog.getString("text").contains("<")) {
String text = mblog.getString("text");
org.jsoup.nodes.Document parse = Jsoup.parse(text);
content = parse.text();
} else {
content = mblog.getString("text");
}
String mid = mblog.getString("mid");
//用户id
String userId = mblog.getJSONObject("user").getString("id");
//用户名
String userName = mblog.getJSONObject("user").getString("screen_name");
//来源
String source = mblog.getString("source");
//用户头像地址
String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url");
//内容
String content = null;
if (mblog.getString("text").contains("<")) {
String text = mblog.getString("text");
org.jsoup.nodes.Document parse = Jsoup.parse(text);
content = parse.text();
WeiBoMassage weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType,
repostCount, commentCount, attitudeCount, source, type, topic, profileImageUrl);
//默认不转发为0
weiBoMassage.setForward(0);
JSONObject weiboJson = null;
//微博实体 是否转发
if (mblog.containsKey("retweeted_status")) {
weiboJson = mblog.getJSONObject("retweeted_status");
//处理转发特有的
//weiBoMassage.set
//源mid
String rootMid = weiboJson.getString("mid");
//源来源
String rootSource = weiboJson.getString("source");
//源text
String text = weiboJson.getString("text");
//解析
org.jsoup.nodes.Document parse = Jsoup.parse(text);
String rootText = parse.text();
//源用户id
String rootId = weiboJson.getJSONObject("user").getString("id");
//源用户名
String rootName = weiboJson.getJSONObject("user").getString("screen_name");
//数据保存到对象中
weiBoMassage.setRoot_mid(rootMid);
weiBoMassage.setRoot_id(rootId);
weiBoMassage.setRoot_source(rootSource);
weiBoMassage.setRoot_text(rootText);
weiBoMassage.setRoot_name(rootName);
//转发为1
weiBoMassage.setForward(1);
} else {
weiboJson = mblog;
}
List<String> pictureUrlList = new ArrayList();
Long playCount = null;
//获取播放量和图片链接
if (weiboJson.getJSONArray("pic_ids").size() > 0) {
JSONArray jsonArray = weiboJson.getJSONArray("pics");
for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
pictureUrlList.add(picUrl);
} else {
content = mblog.getString("text");
}
} else if (weiboJson.containsKey("page_info")) {
if (weiboJson.getJSONObject("page_info").containsKey("play_count")) {
String play = weiboJson.getJSONObject("page_info").getString("play_count");
if (play.contains("万")) {
String[] split = play.split("万");
playCount = Long.valueOf(split[0]) * 10000;
} else if (play.contains("次")) {
String[] split = play.split("次");
playCount = Long.valueOf(split[0]);
weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType,
repostCount, commentCount, attitudeCount, source, type, topic, profileImageUrl);
//默认不转发为0
weiBoMassage.setForward(0);
JSONObject weiboJson = null;
//微博实体 是否转发
if (mblog.containsKey("retweeted_status")) {
weiboJson = mblog.getJSONObject("retweeted_status");
//处理转发特有的
//weiBoMassage.set
//源mid
String rootMid = weiboJson.getString("mid");
//源来源
String rootSource = weiboJson.getString("source");
//源text
String text = weiboJson.getString("text");
//解析
org.jsoup.nodes.Document parse = Jsoup.parse(text);
String rootText = parse.text();
//源用户id
String rootId = weiboJson.getJSONObject("user").getString("id");
//源用户名
String rootName = weiboJson.getJSONObject("user").getString("screen_name");
//数据保存到对象中
weiBoMassage.setRoot_mid(rootMid);
weiBoMassage.setRoot_id(rootId);
weiBoMassage.setRoot_source(rootSource);
weiBoMassage.setRoot_text(rootText);
weiBoMassage.setRoot_name(rootName);
//转发为1
weiBoMassage.setForward(1);
} else {
weiboJson = mblog;
}
List<String> pictureUrlList = new ArrayList();
Long playCount = null;
//获取播放量和图片链接
if (weiboJson.getJSONArray("pic_ids").size() > 0) {
JSONArray jsonArray = weiboJson.getJSONArray("pics");
for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
pictureUrlList.add(picUrl);
}
} else if (weiboJson.containsKey("page_info")) {
if (weiboJson.getJSONObject("page_info").containsKey("play_count")) {
String play = weiboJson.getJSONObject("page_info").getString("play_count");
if (play.contains("万")) {
String[] split = play.split("万");
playCount = Long.valueOf(split[0]) * 10000;
} else if (play.contains("次")) {
String[] split = play.split("次");
playCount = Long.valueOf(split[0]);
}
}
}
weiBoMassage.setPlayCount(playCount);
weiBoMassage.setPictureUrlList(pictureUrlList);
} catch (Exception e) {
log.error("解析微博信息失败", e);
}
weiBoMassage.setPlayCount(playCount);
weiBoMassage.setPictureUrlList(pictureUrlList);
return weiBoMassage;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment