Commit 26e162d1 by zhiwei

微博热搜修改为m端,增加实时上升榜

parent 03050fa3
......@@ -2,10 +2,12 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
......@@ -106,55 +108,57 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearchByPhone(){
String url = "";
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Host", "mapi.weibo.com");
headerMap.put("User-Agent", "Weibo/8789 (iPhone; iOS 10.3.3; Scale/2.00)");
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
List<HotSearchList> result = new ArrayList<HotSearchList>();
String htmlBody;
try {
List<HotSearchList> result = new ArrayList<HotSearchList>();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody!=null){
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){
try {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
int rank = 1;
for(int i=0;i<cards.size();i++){
try {
JSONObject card = cards.getJSONObject(i);
JSONArray card_group = card.getJSONArray("card_group");
JSONArray cardGroup = card.getJSONArray("card_group");
String title = card.getString("title");
boolean hot = true;
if(title.contains("实时上升热点")){
hot = false;
rank = 50;
}
for(int j=0; j<card_group.size(); j++){
JSONObject cardInfo = card_group.getJSONObject(j);
for(int j=0; j<cardGroup.size(); j++){
JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
int rankCount = cardInfo.getIntValue("desc_extr");
String id = "http://s.weibo.com/weibo/"+URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rankCount, HotSearchType.微博热搜.name());
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name());
logger.info("采集到的数据:::{}", hotSearch);
result.add(hotSearch);
rank++;
}
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误",e.fillInStackTrace());
continue;
}
}
return result;
} catch (Exception e) {
logger.error("解析微博时时热搜时出现解析错误,数据不是json结构",e.fillInStackTrace());
return null;
return Collections.emptyList();
}
}else{
logger.info("解析微博时时热搜时出现解析错误,页面结构有问题");
}
} catch (IOException e1) {
logger.error("解析微博时时热搜时出现连接失败",e1.fillInStackTrace());
return Collections.emptyList();
}
return result;
return Collections.emptyList();
}
}
......@@ -11,7 +11,6 @@ import org.slf4j.LoggerFactory;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import com.zhiwei.tools.tools.ZhiWeiTools;
......@@ -38,7 +37,8 @@ public class WeiboHotSearchRun extends Thread{
private void getHotList() {
logger.info("微博话题采集开始........");
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearch();
List<HotSearchList> list = WeiboHotSearchCrawler.weiboHotSearchByPhone();
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<DBObject> data = new ArrayList<>();
for(HotSearchList weiboHotSearch : list){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment