Commit 8ec17aa9 by leiliangliang

更新微博pc端采集程序

parent 5ab03924
...@@ -52,109 +52,105 @@ public class WeiboHotSearchCrawler { ...@@ -52,109 +52,105 @@ public class WeiboHotSearchCrawler {
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao(); static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao(); static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
// /** /**
// * @return void 返回类型 * @return void 返回类型
// * @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
// * @author hero * @author hero
// * @Description: TODO(PC端微博热搜采集) * @Description: TODO(PC端微博热搜采集)
// */ */
// public static List<HotSearchList> weiboHotSearch() { public static List<HotSearchList> weiboHotSearch() {
// String url = "https://s.weibo.com/top/summary?cate=realtimehot"; String url = "https://s.weibo.com/top/summary?cate=realtimehot";
// Map<String, String> headerMap = new HashMap<>(); Map<String, String> headerMap = new HashMap<>();
// headerMap.put("Cookie", "SUB=_2AkMWEQNHf8NxqwFRmPwdzmrnaYl_zgzEieKgTfKcJRMxHRl-yT9jqmkjtRB6PZEtqE0muNq5OZJPytvesIwD-Kh1dwIz; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFKfwIoPvvYaew277IR3CUN"); headerMap.put("Cookie", "SUB=_2AkMWEQNHf8NxqwFRmPwdzmrnaYl_zgzEieKgTfKcJRMxHRl-yT9jqmkjtRB6PZEtqE0muNq5OZJPytvesIwD-Kh1dwIz; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFKfwIoPvvYaew277IR3CUN");
// List<HotSearchList> list = new ArrayList<HotSearchList>(); List<HotSearchList> list = new ArrayList<HotSearchList>();
// for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
// String htmlBody = null; String htmlBody = null;
// Request request = RequestUtils.wrapGet(url, headerMap); Request request = RequestUtils.wrapGet(url, headerMap);
// try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
// htmlBody = response.body().string(); htmlBody = response.body().string();
// } catch (Exception e) { } catch (Exception e) {
// if (i == 2) { if (i == 2) {
// return list; return list;
// } else { } else {
// continue; continue;
// } }
// } }
// if (htmlBody != null && htmlBody.contains("pl_top_realtimehot")) { if (htmlBody != null && htmlBody.contains("pl_top_realtimehot")) {
// try { try {
//// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0]; Date date = new Date();
//// script = script.replace("(", "").replace(")", ""); org.jsoup.nodes.Document document = Jsoup.parse(htmlBody);
//// JSONObject json = JSONObject.parseObject(script); Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
//// String html = for (Element element : elements) {
// Date date = new Date(); try {
// org.jsoup.nodes.Document document = Jsoup.parse(htmlBody); //获取链接
// Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr"); String id = "http://s.weibo.com" + element.select("td.td-02").select("a").attr("href");
// for (Element element : elements) { //获取标题
// try { String name = element.select("td.td-02").select("a").text();
// String id = "http://s.weibo.com" + element.select("td.td-02").select("a").attr("href"); //获取热度值
// String name = element.select("td.td-02").select("a").text(); String num = element.select("td.td-02").select("span").text();
// //String num = !element.select("td.td-02").select("span").text().equals("") ? element.select("td.td-02").select("span").text() : "0"; //获取排名
// String num = element.select("td.td-02").select("span").text(); String rank = element.select("td.td-01").text();
// //String rank = !element.select("td[class=\"td-01 ranktop\"]").text().equals("") ? element.select("td[class=\"td-01 ranktop\"]").text() : "-1"; Integer rankCount = null;
// //获取排名 //默认推荐位排名为0 置顶为-1
// String rank = element.select("td.td-01").text(); if ("•".equals(rank)) {
// Integer rankCount = null; rankCount = 0;
// //默认推荐位排名为0 置顶为-1 id = "http://s.weibo.com" + element.select("td.td-02").select("a").attr("href_to");
// if ("•".equals(rank)) { } else if (StringUtils.isEmpty(rank)) {
// rankCount = 0; rankCount = -1;
// id = "http://s.weibo.com" + element.select("td.td-02").select("a").attr("href_to"); } else {
// } else if (StringUtils.isEmpty(rank)) { rankCount = Integer.valueOf(rank);
// rankCount = -1; }
// } else { //获取icon
// rankCount = Integer.valueOf(rank); String text = element.select("td.td-03").text();
// } String icon = null;
// //获取icon if (StringUtils.isNotEmpty(text) && nonNull(text)) {
// String text = element.select("td.td-03").text(); if ("商".equals(text)) {
// String icon = null; icon = "jian";
// if (StringUtils.isNotEmpty(text) && nonNull(text)) { } else if ("新".equals(text)) {
// if ("商".equals(text)) { icon = "new";
// icon = "jian"; } else if ("热".equals(text)) {
// } else if ("新".equals(text)) { icon = "hot";
// icon = "new"; } else if ("沸".equals(text)) {
// } else if ("热".equals(text)) { icon = "fei";
// icon = "hot"; } else if ("爆".equals(text)) {
// } else if ("沸".equals(text)) { icon = "boom";
// icon = "fei"; }
// } else if ("爆".equals(text)) { }
// icon = "boom"; //获取热度标签
// } String heatLabel = null;
// } //获取热度值 置顶 推荐位 默认值为0
// //获取热度标签 Long hotCount =0L;
// String heatLabel = null; if (StringUtils.isNotEmpty(num) && Objects.nonNull(num)) {
// //获取热度值 置顶 推荐位 默认值为0 String[] split = num.split(" ");
// Long hotCount =0L; if (split.length > 1) {
// if (StringUtils.isNotEmpty(num) && Objects.nonNull(num)) { heatLabel = split[0].trim();
// String[] split = num.split(" "); hotCount = Long.valueOf(split[1].trim());
// if (split.length > 1) { }else {
// heatLabel = split[0].trim(); hotCount = Long.valueOf(num);
// hotCount = Long.valueOf(split[1].trim()); }
// }else { }
// hotCount = Long.valueOf(num); HotSearchList hotSearch = new HotSearchList(id, name, hotCount, true, rankCount, HotSearchType.微博热搜.name(), icon, date);
// } hotSearch.setHeatLabel(heatLabel);
// } list.add(hotSearch);
// // Long hotCount = Long.valueOf(num); } catch (Exception e) {
// HotSearchList hotSearch = new HotSearchList(id, name, hotCount, true, rankCount, HotSearchType.微博热搜.name(), icon, date); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
// hotSearch.setHeatLabel(heatLabel); log.error("解析微博时时热搜时出现解析错误", e);
// list.add(hotSearch); continue;
// } catch (Exception e) { }
// SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); }
// log.error("解析微博时时热搜时出现解析错误", e); } catch (Exception e) {
// continue; log.error("解析微博时时热搜时出现解析错误,数据不是json结构", e.fillInStackTrace());
// } SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
// } return null;
// } catch (Exception e) { }
// log.error("解析微博时时热搜时出现解析错误,数据不是json结构", e.fillInStackTrace()); } else {
// SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com");
// return null; log.info("解析微博时时热搜时出现解析错误,页面结构有问题");
// } }
// } else { break;
// SendMailWeibo.sendMail("微博热搜采集出现问题", "859548429@qq.com"); }
// log.info("解析微博时时热搜时出现解析错误,页面结构有问题"); return list;
// } }
// break;
// }
// return list;
// }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment