Commit e2f0cb6f by 马黎滨

新浪热榜采集更新

parent fe7db006
...@@ -8,6 +8,7 @@ import com.zhiwei.crawler.utils.RequestUtils; ...@@ -8,6 +8,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
...@@ -15,6 +16,7 @@ import okhttp3.Response; ...@@ -15,6 +16,7 @@ import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import java.io.IOException; import java.io.IOException;
...@@ -43,37 +45,44 @@ public class XinLangHotSearchCrawler { ...@@ -43,37 +45,44 @@ public class XinLangHotSearchCrawler {
} }
if(htmlBody!=null) { if(htmlBody!=null) {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").last().html(); Elements elements = document.getElementsByTag("script");
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1)); for (Element element : elements) {
// log.info(jsonObject); String html = element.html();
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result"); log.info(html);
if (jsonArray != null) { if (html.contains("SM =")) {
for (int i = 0; i < jsonArray.size(); i++) { jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
String name = jsonArray.getJSONObject(i).getString("text"); JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
Integer rank = i + 1; if (jsonArray != null) {
String hotValue = jsonArray.getJSONObject(i).getString("hotValue"); for (int i = 0; i < jsonArray.size(); i++) {
Integer count = TipsUtils.getHotCount(hotValue); String name = jsonArray.getJSONObject(i).getString("text");
String showTags = jsonArray.getJSONObject(i).getString("showTags"); Integer rank = i + 1;
String routeUri = jsonArray.getJSONObject(i).getString("routeUri"); String hotValue = jsonArray.getJSONObject(i).getString("hotValue");
String xinLangUrl = null; Integer count = TipsUtils.getHotCount(hotValue);
if(routeUri.contains("groupId")){ String showTags = jsonArray.getJSONObject(i).getString("showTags");
xinLangUrl = "https://super.sina.cn/shequn/forum/detail_" + String routeUri = jsonArray.getJSONObject(i).getString("routeUri");
routeUri.substring(routeUri.indexOf("groupId=")+8) + ".html"; String xinLangUrl = null;
} if (routeUri.contains("groupId")) {
String icon = null; xinLangUrl = "https://super.sina.cn/shequn/forum/detail_" +
if (showTags.contains("新")) { routeUri.substring(routeUri.indexOf("groupId=") + 8) + ".html";
icon = "新"; }else{
} else if (showTags.contains("热")) { xinLangUrl = "https://m.so.com/s?q="+ URLCodeUtil.getURLEncode(name, "utf-8")+"&src=dfttrc&srcg=sina_shoulang_act";
icon = "热"; }
} else if (showTags.contains("沸")) { String icon = null;
icon = "沸"; if (showTags.contains("新")) {
icon = "新";
} else if (showTags.contains("热")) {
icon = "热";
} else if (showTags.contains("沸")) {
icon = "沸";
}
HotSearchList hotSearchList = new HotSearchList(xinLangUrl, name, count, true, rank, HotSearchType.新浪热榜.name(), icon, date);
hotSearchLists.add(hotSearchList);
}
log.info("{}, 此轮新浪热榜采集到的数据量为:{}", new Date(), Integer.valueOf(hotSearchLists != null ? hotSearchLists.size() : 0));
log.info("新浪热榜采集结束");
return hotSearchLists;
} }
HotSearchList hotSearchList = new HotSearchList(xinLangUrl, name, count, true, rank, HotSearchType.新浪热榜.name(), icon, date);
hotSearchLists.add(hotSearchList);
} }
log.info("{}, 此轮新浪热榜采集到的数据量为:{}", new Date(), Integer.valueOf(hotSearchLists != null ? hotSearchLists.size() : 0));
log.info("新浪热榜采集结束");
return hotSearchLists;
} }
} }
ZhiWeiTools.sleep(3000L); ZhiWeiTools.sleep(3000L);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment