Commit e2f0cb6f by 马黎滨

新浪热榜采集更新

parent fe7db006
......@@ -8,6 +8,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
......@@ -15,6 +16,7 @@ import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
......@@ -43,9 +45,12 @@ public class XinLangHotSearchCrawler {
}
if(htmlBody!=null) {
Document document = Jsoup.parse(htmlBody);
String html = document.getElementsByTag("script").last().html();
Elements elements = document.getElementsByTag("script");
for (Element element : elements) {
String html = element.html();
log.info(html);
if (html.contains("SM =")) {
jsonObject = JSONObject.parseObject(html.substring(html.indexOf("{"), html.length() - 1));
// log.info(jsonObject);
JSONArray jsonArray = jsonObject.getJSONObject("data").getJSONObject("data").getJSONArray("result");
if (jsonArray != null) {
for (int i = 0; i < jsonArray.size(); i++) {
......@@ -56,9 +61,11 @@ public class XinLangHotSearchCrawler {
String showTags = jsonArray.getJSONObject(i).getString("showTags");
String routeUri = jsonArray.getJSONObject(i).getString("routeUri");
String xinLangUrl = null;
if(routeUri.contains("groupId")){
if (routeUri.contains("groupId")) {
xinLangUrl = "https://super.sina.cn/shequn/forum/detail_" +
routeUri.substring(routeUri.indexOf("groupId=")+8) + ".html";
routeUri.substring(routeUri.indexOf("groupId=") + 8) + ".html";
}else{
xinLangUrl = "https://m.so.com/s?q="+ URLCodeUtil.getURLEncode(name, "utf-8")+"&src=dfttrc&srcg=sina_shoulang_act";
}
String icon = null;
if (showTags.contains("新")) {
......@@ -76,6 +83,8 @@ public class XinLangHotSearchCrawler {
return hotSearchLists;
}
}
}
}
ZhiWeiTools.sleep(3000L);
}
return hotSearchLists;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment