Commit 14470085 by leiliangliang

更改移动端话题采集程序

parent 206c358e
...@@ -131,90 +131,90 @@ public class WeiboTopicCrawler { ...@@ -131,90 +131,90 @@ public class WeiboTopicCrawler {
/** /**
* 微博平话题榜采集 * 微博平话题榜采集
*/ */
// public static List<HotSearchList> startCrawlerByPhone(Date date){ public static List<HotSearchList> startCrawlerByPhone(Date date){
// List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
// for(int page=1; page<=3; page++){ for(int page=1; page<=3; page++){
// String pageUrl = "https://api.weibo.cn/2/page?st_bottom_bar_new_style_enable=1&c=android&s=34dc160d&from=10A9295010&gsid=_2A25NH7inDeRxGeNH4lUX9ifIzTWIHXVvjUtvrDV6PUJbkdANLRjfkWpNSk7RXJ9vYwBfAr66TNj0zcFmOBPKZDuI&containerid=231648_-_4&page=" + page; String pageUrl = "https://api.weibo.cn/2/page?st_bottom_bar_new_style_enable=1&c=android&s=34dc160d&from=10A9295010&gsid=_2A25NH7inDeRxGeNH4lUX9ifIzTWIHXVvjUtvrDV6PUJbkdANLRjfkWpNSk7RXJ9vYwBfAr66TNj0zcFmOBPKZDuI&containerid=231648_-_4&page=" + page;
// Request request = RequestUtils.wrapGet(pageUrl); Request request = RequestUtils.wrapGet(pageUrl);
// String htmlBody = null; String htmlBody = null;
// //重试三次 //重试三次
// for(int retryTimes = 1; retryTimes<=5; retryTimes++) { for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
// Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY); Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
// if (response.hasCause()){ if (response.hasCause()){
// Throwable cause = response.cause(); Throwable cause = response.cause();
// log.error("下载榜单列表页面时出现错误,错误为:{}", cause); log.error("下载榜单列表页面时出现错误,错误为:{}", cause);
// continue; continue;
// }else { }else {
// htmlBody = response.bodyString(); htmlBody = response.bodyString();
// } }
// if (StringUtils.isNotBlank(htmlBody)) { if (StringUtils.isNotBlank(htmlBody)) {
// topicList.addAll(parseTopicHtml(htmlBody,date)); topicList.addAll(parseTopicHtml(htmlBody,date));
// break; break;
// } else { } else {
// log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody); log.info("下载榜单列表页面时数据格式错误,页面为:{}", htmlBody);
// } }
// } }
// } }
// return topicList; return topicList;
// } }
//
//
// private static List<HotSearchList> parseTopicHtml(String htmlBody,Date date) { private static List<HotSearchList> parseTopicHtml(String htmlBody,Date date) {
// try { try {
// JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("cards"); JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("cards");
// if(Objects.nonNull(jsonArray) && !jsonArray.isEmpty()) { if(Objects.nonNull(jsonArray) && !jsonArray.isEmpty()) {
// for (int j=0; j< jsonArray.size(); j++){ for (int j=0; j< jsonArray.size(); j++){
// JSONObject card = jsonArray.getJSONObject(j); JSONObject card = jsonArray.getJSONObject(j);
// if(card.containsKey("card_group")){ if(card.containsKey("card_group")){
// JSONArray cards = card.getJSONArray("card_group"); JSONArray cards = card.getJSONArray("card_group");
// List<HotSearchList> topicList = new ArrayList<>(); List<HotSearchList> topicList = new ArrayList<>();
// Integer rank = null; Integer rank = null;
// String topicName = null; String topicName = null;
// String url = null; String url = null;
// String description = null; String description = null;
// Long commentNum = null; Long commentNum = null;
// Long readNum = null; Long readNum = null;
// String desc2 = null; String desc2 = null;
// for(int i=0; i<cards.size(); i++) { for(int i=0; i<cards.size(); i++) {
// JSONObject cardGroup = cards.getJSONObject(i); JSONObject cardGroup = cards.getJSONObject(i);
// rank = cardGroup.getInteger("top_mark_text"); rank = cardGroup.getInteger("top_mark_text");
// topicName = cardGroup.getString("title_sub"); topicName = cardGroup.getString("title_sub");
// url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8"); url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8");
// description = null; description = null;
// if(cardGroup.containsKey("card_expand")){ if(cardGroup.containsKey("card_expand")){
// description = cardGroup.getJSONObject("card_expand").getString("content"); description = cardGroup.getJSONObject("card_expand").getString("content");
// } }
// desc2 = cardGroup.getString("desc"); desc2 = cardGroup.getString("desc");
// String commentNumStr = desc2.replaceAll("讨论.*", "").trim(); String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
// String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim(); String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
// try { try {
// commentNum = TipsUtils.getHotCount(commentNumStr); commentNum = TipsUtils.getHotCount(commentNumStr);
// readNum = TipsUtils.getHotCount(readNumStr); readNum = TipsUtils.getHotCount(readNumStr);
// }catch (Exception e){ }catch (Exception e){
// e.printStackTrace(); e.printStackTrace();
// } }
// HotSearchList topic = new HotSearchList(url, topicName, readNum, rank, HotSearchType.微博话题.name(), commentNum, description,date); HotSearchList topic = new HotSearchList(url, topicName, readNum, rank, HotSearchType.微博话题.name(), commentNum, description,date);
// if(cardGroup.containsKey("title_flag_pic")){ if(cardGroup.containsKey("title_flag_pic")){
// String titlePic = cardGroup.getString("title_flag_pic"); String titlePic = cardGroup.getString("title_flag_pic");
// if(titlePic.contains("new")){ if(titlePic.contains("new")){
// topic.setIcon("新"); topic.setIcon("新");
// }else if(titlePic.contains("hot")){ }else if(titlePic.contains("hot")){
// topic.setIcon("热"); topic.setIcon("热");
// } }
// } }
// topicList.add(topic); topicList.add(topic);
// } }
// return topicList; return topicList;
// } }
// } }
// }else{ }else{
//// log.info("html:{}",htmlBody); // log.info("html:{}",htmlBody);
// } }
// } catch (Exception e) { } catch (Exception e) {
// log.error("解析榜单列表页面时出现错误,错误为:{}", e); log.error("解析榜单列表页面时出现错误,错误为:{}", e);
// } }
// return Collections.emptyList(); return Collections.emptyList();
// } }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment