Commit 36e2a228 by chenweitao

Merge branch 'working' into 'master'

增加微博信息及用户信息异常捕获

See merge request !166
parents feb24133 d59803e9
...@@ -623,68 +623,72 @@ public class WeiboHotSearchCrawler { ...@@ -623,68 +623,72 @@ public class WeiboHotSearchCrawler {
public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) { public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList(); List<WeiBoUser> weiBoUserList = new ArrayList();
//解析weibo人物信息 //解析weibo人物信息
Date date = new Date(); try {
for (int i = 0; i < cardGroup.size(); i++) { Date date = new Date();
Integer cardType = Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type")); for (int i = 0; i < cardGroup.size(); i++) {
if (24 == cardType||3 == cardType) { Integer cardType = Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"));
if (cardGroup.getJSONObject(i).containsKey("users")) { if (24 == cardType||3 == cardType) {
JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users"); if (cardGroup.getJSONObject(i).containsKey("users")) {
for (int i1 = 0; i1 < users.size(); i1++) { JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users");
for (int i1 = 0; i1 < users.size(); i1++) {
//获取用户id
String userId = users.getJSONObject(i1).getString("id");
//获取用户名
String userName = users.getJSONObject(i1).getString("screen_name");
//获取认证信息
String attestationMassage = users.getJSONObject(i1).getString("verified_reason");
//获取粉丝数量
String followers_count = users.getJSONObject(i1).getString("followers_count");
Long followerCount = null;
if (!followers_count.contains("万")) {
followerCount = Long.valueOf(followers_count);
} else {
String[] split = followers_count.split("万");
double foll = Double.parseDouble(split[0]);
followerCount =new Double(foll*10000).longValue();
// followerCount = Long.valueOf(split[0]) * 10000;
}
//用户头像地址
String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser);
}
}
return weiBoUserList;
} else if (10 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")) {
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
//获取用户id //获取用户id
String userId = users.getJSONObject(i1).getString("id"); String userId = user.getString("id");
//获取用户名 //获取用户名
String userName = users.getJSONObject(i1).getString("screen_name"); String userName = user.getString("screen_name");
//获取认证信息 //获取认证信息
String attestationMassage = users.getJSONObject(i1).getString("verified_reason"); String attestationMassage = user.getString("verified_reason");
//获取粉丝数
//获取粉丝数量 String followers_count = user.getString("followers_count");
String followers_count = users.getJSONObject(i1).getString("followers_count");
Long followerCount = null; Long followerCount = null;
if (!followers_count.contains("万")) { if (followers_count.contains("万")) {
followerCount = Long.valueOf(followers_count);
} else {
String[] split = followers_count.split("万"); String[] split = followers_count.split("万");
double foll = Double.parseDouble(split[0]); Double aDouble = Double.valueOf(split[0]) * 10000;
followerCount =new Double(foll*10000).longValue(); followerCount = new Double(aDouble).longValue();
// followerCount = Long.valueOf(split[0]) * 10000; } else if (followers_count.contains("亿")) {
String[] split = followers_count.split("亿");
Double aDouble = Double.valueOf(split[0]) * 100000000;
followerCount = new Double(aDouble).longValue();
} else {
followerCount = Long.valueOf(followers_count);
} }
//用户头像地址 //用户头像地址
String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url"); String profileImageUrl = user.getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl); WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser); weiBoUserList.add(weiBoUser);
} }
return weiBoUserList;
} }
return weiBoUserList;
} else if (10 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")) {
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
//获取用户id
String userId = user.getString("id");
//获取用户名
String userName = user.getString("screen_name");
//获取认证信息
String attestationMassage = user.getString("verified_reason");
//获取粉丝数
String followers_count = user.getString("followers_count");
Long followerCount = null;
if (followers_count.contains("万")) {
String[] split = followers_count.split("万");
Double aDouble = Double.valueOf(split[0]) * 10000;
followerCount = new Double(aDouble).longValue();
} else if (followers_count.contains("亿")) {
String[] split = followers_count.split("亿");
Double aDouble = Double.valueOf(split[0]) * 100000000;
followerCount = new Double(aDouble).longValue();
} else {
followerCount = Long.valueOf(followers_count);
}
//用户头像地址
String profileImageUrl = user.getString("profile_image_url");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl);
weiBoUserList.add(weiBoUser);
}
return weiBoUserList;
} }
} catch (Exception e) {
log.error("解析人物信息失败", e);
} }
return Collections.emptyList(); return Collections.emptyList();
} }
...@@ -698,137 +702,142 @@ public class WeiboHotSearchCrawler { ...@@ -698,137 +702,142 @@ public class WeiboHotSearchCrawler {
* @return * @return
*/ */
public static WeiBoMassage analysisWeiboMBlog(JSONObject jsonObject, String topic) { public static WeiBoMassage analysisWeiboMBlog(JSONObject jsonObject, String topic) {
JSONObject mblog = jsonObject.getJSONObject("mblog"); WeiBoMassage weiBoMassage = null;
String type = mblog.getJSONObject("title").getString("text"); try {
String card_type = jsonObject.getString("card_type"); JSONObject mblog = jsonObject.getJSONObject("mblog");
Integer cardType = Integer.valueOf(card_type); String type = mblog.getJSONObject("title").getString("text");
String show_type = jsonObject.getString("show_type"); String card_type = jsonObject.getString("card_type");
Integer showType = Integer.valueOf(show_type); Integer cardType = Integer.valueOf(card_type);
//点赞数 String show_type = jsonObject.getString("show_type");
String attitudes_count = mblog.getString("attitudes_count"); Integer showType = Integer.valueOf(show_type);
Long attitudeCount = null; //点赞数
if (attitudes_count.contains("万")) { String attitudes_count = mblog.getString("attitudes_count");
String[] split = attitudes_count.split("万"); Long attitudeCount = null;
attitudeCount = Long.valueOf(split[0]) * 10000; if (attitudes_count.contains("万")) {
} else { String[] split = attitudes_count.split("万");
attitudeCount = Long.valueOf(attitudes_count); attitudeCount = Long.valueOf(split[0]) * 10000;
} } else {
attitudeCount = Long.valueOf(attitudes_count);
}
//评论数 //评论数
String comments_count = mblog.getString("comments_count"); String comments_count = mblog.getString("comments_count");
Long commentCount = null; Long commentCount = null;
if (comments_count.contains("万")) { if (comments_count.contains("万")) {
String[] split = comments_count.split("万"); String[] split = comments_count.split("万");
commentCount = Long.valueOf(split[0]) * 10000; commentCount = Long.valueOf(split[0]) * 10000;
} else { } else {
commentCount = Long.valueOf(comments_count); commentCount = Long.valueOf(comments_count);
} }
//转发数 //转发数
String reposts_count = mblog.getString("reposts_count"); String reposts_count = mblog.getString("reposts_count");
Long repostCount = null; Long repostCount = null;
if (reposts_count.contains("万")) { if (reposts_count.contains("万")) {
String[] split = reposts_count.split("万"); String[] split = reposts_count.split("万");
repostCount = Long.valueOf(split[0]) * 10000; repostCount = Long.valueOf(split[0]) * 10000;
} else { } else {
repostCount = Long.valueOf(reposts_count); repostCount = Long.valueOf(reposts_count);
} }
Date createTime = null; Date createTime = null;
Date editTime = null; Date editTime = null;
try { try {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", java.util.Locale.US); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", Locale.US);
//创建时间 //创建时间
String created_at = mblog.getString("created_at"); String created_at = mblog.getString("created_at");
createTime = simpleDateFormat.parse(created_at); createTime = simpleDateFormat.parse(created_at);
//编辑时间 //编辑时间
if (mblog.containsKey("edit_at")) { if (mblog.containsKey("edit_at")) {
String edit_at = mblog.getString("edit_at"); String edit_at = mblog.getString("edit_at");
editTime = simpleDateFormat.parse(edit_at); editTime = simpleDateFormat.parse(edit_at);
}
} catch (ParseException e) {
log.error("创建时间和编辑时间解析异常", e);
} }
} catch (ParseException e) {
log.error("创建时间和编辑时间解析异常", e);
}
String mid = mblog.getString("mid"); String mid = mblog.getString("mid");
//用户id //用户id
String userId = mblog.getJSONObject("user").getString("id"); String userId = mblog.getJSONObject("user").getString("id");
//用户名 //用户名
String userName = mblog.getJSONObject("user").getString("screen_name"); String userName = mblog.getJSONObject("user").getString("screen_name");
//来源 //来源
String source = mblog.getString("source"); String source = mblog.getString("source");
//用户头像地址 //用户头像地址
String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url"); String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url");
//内容 //内容
String content = null; String content = null;
if (mblog.getString("text").contains("<")) { if (mblog.getString("text").contains("<")) {
String text = mblog.getString("text"); String text = mblog.getString("text");
org.jsoup.nodes.Document parse = Jsoup.parse(text); org.jsoup.nodes.Document parse = Jsoup.parse(text);
content = parse.text(); content = parse.text();
} else {
content = mblog.getString("text");
}
WeiBoMassage weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType, } else {
repostCount, commentCount, attitudeCount, source, type, topic, profileImageUrl); content = mblog.getString("text");
//默认不转发为0
weiBoMassage.setForward(0);
JSONObject weiboJson = null;
//微博实体 是否转发
if (mblog.containsKey("retweeted_status")) {
weiboJson = mblog.getJSONObject("retweeted_status");
//处理转发特有的
//weiBoMassage.set
//源mid
String rootMid = weiboJson.getString("mid");
//源来源
String rootSource = weiboJson.getString("source");
//源text
String text = weiboJson.getString("text");
//解析
org.jsoup.nodes.Document parse = Jsoup.parse(text);
String rootText = parse.text();
//源用户id
String rootId = weiboJson.getJSONObject("user").getString("id");
//源用户名
String rootName = weiboJson.getJSONObject("user").getString("screen_name");
//数据保存到对象中
weiBoMassage.setRoot_mid(rootMid);
weiBoMassage.setRoot_id(rootId);
weiBoMassage.setRoot_source(rootSource);
weiBoMassage.setRoot_text(rootText);
weiBoMassage.setRoot_name(rootName);
//转发为1
weiBoMassage.setForward(1);
} else {
weiboJson = mblog;
}
List<String> pictureUrlList = new ArrayList();
Long playCount = null;
//获取播放量和图片链接
if (weiboJson.getJSONArray("pic_ids").size() > 0) {
JSONArray jsonArray = weiboJson.getJSONArray("pics");
for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
pictureUrlList.add(picUrl);
} }
} else if (weiboJson.containsKey("page_info")) {
if (weiboJson.getJSONObject("page_info").containsKey("play_count")) { weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType,
String play = weiboJson.getJSONObject("page_info").getString("play_count"); repostCount, commentCount, attitudeCount, source, type, topic, profileImageUrl);
if (play.contains("万")) { //默认不转发为0
String[] split = play.split("万"); weiBoMassage.setForward(0);
playCount = Long.valueOf(split[0]) * 10000;
} else if (play.contains("次")) { JSONObject weiboJson = null;
String[] split = play.split("次"); //微博实体 是否转发
playCount = Long.valueOf(split[0]); if (mblog.containsKey("retweeted_status")) {
weiboJson = mblog.getJSONObject("retweeted_status");
//处理转发特有的
//weiBoMassage.set
//源mid
String rootMid = weiboJson.getString("mid");
//源来源
String rootSource = weiboJson.getString("source");
//源text
String text = weiboJson.getString("text");
//解析
org.jsoup.nodes.Document parse = Jsoup.parse(text);
String rootText = parse.text();
//源用户id
String rootId = weiboJson.getJSONObject("user").getString("id");
//源用户名
String rootName = weiboJson.getJSONObject("user").getString("screen_name");
//数据保存到对象中
weiBoMassage.setRoot_mid(rootMid);
weiBoMassage.setRoot_id(rootId);
weiBoMassage.setRoot_source(rootSource);
weiBoMassage.setRoot_text(rootText);
weiBoMassage.setRoot_name(rootName);
//转发为1
weiBoMassage.setForward(1);
} else {
weiboJson = mblog;
}
List<String> pictureUrlList = new ArrayList();
Long playCount = null;
//获取播放量和图片链接
if (weiboJson.getJSONArray("pic_ids").size() > 0) {
JSONArray jsonArray = weiboJson.getJSONArray("pics");
for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
pictureUrlList.add(picUrl);
}
} else if (weiboJson.containsKey("page_info")) {
if (weiboJson.getJSONObject("page_info").containsKey("play_count")) {
String play = weiboJson.getJSONObject("page_info").getString("play_count");
if (play.contains("万")) {
String[] split = play.split("万");
playCount = Long.valueOf(split[0]) * 10000;
} else if (play.contains("次")) {
String[] split = play.split("次");
playCount = Long.valueOf(split[0]);
}
} }
} }
weiBoMassage.setPlayCount(playCount);
weiBoMassage.setPictureUrlList(pictureUrlList);
} catch (Exception e) {
log.error("解析微博信息失败", e);
} }
weiBoMassage.setPlayCount(playCount);
weiBoMassage.setPictureUrlList(pictureUrlList);
return weiBoMassage; return weiBoMassage;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment