Commit e2f0cb6f by 马黎滨

新浪热榜采集更新

parent fe7db006
......@@ -8,6 +8,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
......@@ -15,6 +16,7 @@ import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
......@@ -43,37 +45,44 @@ public class XinLangHotSearchCrawler {
}
if(htmlBody!=null) {
Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").last().html();
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
// log.info(jsonObject);
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
if (jsonArray != null) {
for (int i = 0; i < jsonArray.size(); i++) {
String name = jsonArray.getJSONObject(i).getString("text");
Integer rank = i + 1;
String hotValue = jsonArray.getJSONObject(i).getString("hotValue");
Integer count = TipsUtils.getHotCount(hotValue);
String showTags = jsonArray.getJSONObject(i).getString("showTags");
String routeUri = jsonArray.getJSONObject(i).getString("routeUri");
String xinLangUrl = null;
if(routeUri.contains("groupId")){
xinLangUrl = "https://super.sina.cn/shequn/forum/detail_" +
routeUri.substring(routeUri.indexOf("groupId=")+8) + ".html";
}
String icon = null;
if (showTags.contains("新")) {
icon = "新";
} else if (showTags.contains("热")) {
icon = "热";
} else if (showTags.contains("沸")) {
icon = "沸";
Elements elements = document.getElementsByTag("script");
for (Element element : elements) {
String html = element.html();
log.info(html);
if (html.contains("SM =")) {
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
if (jsonArray != null) {
for (int i = 0; i < jsonArray.size(); i++) {
String name = jsonArray.getJSONObject(i).getString("text");
Integer rank = i + 1;
String hotValue = jsonArray.getJSONObject(i).getString("hotValue");
Integer count = TipsUtils.getHotCount(hotValue);
String showTags = jsonArray.getJSONObject(i).getString("showTags");
String routeUri = jsonArray.getJSONObject(i).getString("routeUri");
String xinLangUrl = null;
if (routeUri.contains("groupId")) {
xinLangUrl = "https://super.sina.cn/shequn/forum/detail_" +
routeUri.substring(routeUri.indexOf("groupId=") + 8) + ".html";
}else{
xinLangUrl = "https://m.so.com/s?q="+ URLCodeUtil.getURLEncode(name, "utf-8")+"&src=dfttrc&srcg=sina_shoulang_act";
}
String icon = null;
if (showTags.contains("新")) {
icon = "新";
} else if (showTags.contains("热")) {
icon = "热";
} else if (showTags.contains("沸")) {
icon = "沸";
}
HotSearchList hotSearchList = new HotSearchList(xinLangUrl, name, count, true, rank, HotSearchType.新浪热榜.name(), icon, date);
hotSearchLists.add(hotSearchList);
}
log.info("{}, 此轮新浪热榜采集到的数据量为:{}", new Date(), Integer.valueOf(hotSearchLists != null ? hotSearchLists.size() : 0));
log.info("新浪热榜采集结束");
return hotSearchLists;
}
HotSearchList hotSearchList = new HotSearchList(xinLangUrl, name, count, true, rank, HotSearchType.新浪热榜.name(), icon, date);
hotSearchLists.add(hotSearchList);
}
log.info("{}, 此轮新浪热榜采集到的数据量为:{}", new Date(), Integer.valueOf(hotSearchLists != null ? hotSearchLists.size() : 0));
log.info("新浪热榜采集结束");
return hotSearchLists;
}
}
ZhiWeiTools.sleep(3000L);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment