Commit 139ff5af by chenweitao

Merge branch 'working' into 'master'

更新讨论量及关于链接

See merge request !134
parents 15c0ee7f 982502f7
......@@ -125,14 +125,13 @@ public class WeiboHotSearchCrawler {
// }
// /**
// * @return void 返回类型
// * @Title: weiboHotSearchByPhoneTest
// * @author hero
// * @Description: TODO(手机端Iphone 微博热搜采集)
// */
/**
* @return void 返回类型
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
*/
// public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
// //String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
// String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
// Map<String, String> headerMap = new HashMap<>();
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
......@@ -229,7 +228,6 @@ public class WeiboHotSearchCrawler {
*/
public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
//String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
Map<String, String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody = null;
......@@ -289,7 +287,7 @@ public class WeiboHotSearchCrawler {
if (Objects.nonNull(iconUrl)){hotSearch.setIconUrl(iconUrl);}
result.add(hotSearch);
rank++;
//redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜");
redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜");
}
} else {
log.info("card 数据结构为:{}", card);
......@@ -363,8 +361,17 @@ public class WeiboHotSearchCrawler {
*/
public static Document weiboUpdate(Document document) {
log.info("更新微博热搜{}导语阅读量和讨论量", document.getString("name"));
String url = "https://m.weibo.cn/api/container/getIndex?" + document.getString("url").substring(
document.getString("url").indexOf("?") + 1, document.getString("url").indexOf("&"));
// String url = "https://m.weibo.cn/api/container/getIndex?" + document.getString("url").substring(
// document.getString("url").indexOf("?") + 1, document.getString("url").indexOf("&"));
String topic = document.getString("name");
String gb = "=1&q=#" + topic + "#";
String encode = null;
try {
encode = URLEncoder.encode(gb, "utf-8");
} catch (UnsupportedEncodingException e) {
log.error("更新导语时字符解析成URl模式异常", e);
}
String url = "https://m.weibo.cn/api/container/getIndex?containerid=100103type"+encode;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
......@@ -458,7 +465,7 @@ public class WeiboHotSearchCrawler {
} catch (UnsupportedEncodingException e) {
log.error("字符解析成URl模式异常", e);
}
String url = "https://s.weibo.com/weibo?q=" + encode;
String url = "https://s.weibo.com/weibo?q="+encode+"&Refer=top";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment