Commit 34004178 by 马黎滨

Merge branch 'mlbWork' into 'master'

Mlb work

See merge request !3
parents 355bdd18 65b7efe4
......@@ -100,6 +100,10 @@ public class BaiDuHotSearchCrawler {
count = Integer.valueOf(hot);
}
if (Objects.nonNull(rank)) {
if(count == 0){
log.info(hot);
log.info(element);
}
HotSearchList hotSearch = new HotSearchList(everurl, kw, count, rank, HotSearchType.百度热搜.name());
list.add(hotSearch);
}
......
......@@ -129,14 +129,14 @@ public class WeiboTopicCrawler {
*/
public static List<HotSearchList> startCrawlerByPhone(){
List<HotSearchList> topicList = new ArrayList<>();
for(int page=1; page<=7; page++){
for(int page=1; page<=6; page++){
String pageUrl = "https://api.weibo.cn/2/page?gsid=_2A25zJX_EDeRxGedH71YS8CzKzzmIHXVuc_QMrDV6PUJbkdANLXPbkWpNUK3OyitGCJsX8exvua-vfubUqCiaA4lb&from=10A1193010&c=iphone&s=2827eebe&count=20&containerid=106003type%253D25%2526t%253D3%2526disable_hot%253D1%2526filter_type%253Dtopicscene&page=" + page;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
try {
// log.info("pageUrl::{}", pageUrl);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(pageUrl), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("top_mark_text")) {
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
topicList.addAll(parseTopicHtml(htmlBody));
break;
}else {
......@@ -170,8 +170,11 @@ public class WeiboTopicCrawler {
rank = cardGroup.getInteger("top_mark_text");
topicName = cardGroup.getString("title_sub");
url = "https://s.weibo.com/weibo?q="+ URLCodeUtil.getURLEncode(topicName, "utf-8");
description = cardGroup.getString("desc1");
desc2 = cardGroup.getString("desc2");
description = null;
if(cardGroup.containsKey("card_expand")){
description = cardGroup.getJSONObject("card_expand").getString("content");
}
desc2 = cardGroup.getString("desc");
String commentNumStr = desc2.replaceAll("讨论.*", "").trim();
String readNumStr = desc2.replaceAll(".*讨论|阅读", "").trim();
try {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment