增加36氪人气榜和虎嗅热文推荐

f01e39b6 · leiliangliang · b20cc34b · f01e39b6 · f01e39b6 · f01e39b6
Commit f01e39b6 authored Jun 02, 2021 by leiliangliang
8 changed files
--- a/pom.xml
+++ b/pom.xml
@@ -5,7 +5,7 @@
 	<version>0.0.6-SNAPSHOT</version>
 	<name>各平台热搜榜单采集程序</name>
 	<description>各平台热搜榜单采集程序
-目前包含：1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
+		目前包含：1.微博时时热搜采集程序、2.知乎热搜采集程序</description>
 	<properties>
 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -51,16 +51,16 @@
 			<artifactId>lombok</artifactId>
 			<version>1.18.8</version>
 		</dependency>
-<!--	  <dependency>-->
+		<!--	  <dependency>-->
-<!--		  <groupId>org.quartz-scheduler</groupId>-->
+		<!--		  <groupId>org.quartz-scheduler</groupId>-->
-<!--		  <artifactId>quartz</artifactId>-->
+		<!--		  <artifactId>quartz</artifactId>-->
-<!--		  <version>${quartz.version}</version>-->
+		<!--		  <version>${quartz.version}</version>-->
-<!--	  </dependency>-->
+		<!--	  </dependency>-->
-<!--	  <dependency>-->
+		<!--	  <dependency>-->
-<!--		  <groupId>org.quartz-scheduler</groupId>-->
+		<!--		  <groupId>org.quartz-scheduler</groupId>-->
-<!--		  <artifactId>quartz-jobs</artifactId>-->
+		<!--		  <artifactId>quartz-jobs</artifactId>-->
-<!--		  <version>${quartz.version}</version>-->
+		<!--		  <version>${quartz.version}</version>-->
-<!--	  </dependency>-->
+		<!--	  </dependency>-->
 		<!-- Spring文件配置 -->
 		<dependency>
 			<groupId>org.springframework</groupId>
@@ -119,6 +119,11 @@
 			<version>4.12</version>
 			<scope>test</scope>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.httpcomponents</groupId>
+			<artifactId>httpclient</artifactId>
+			<version>4.5.6</version>
+		</dependency>
 	</dependencies>
@@ -147,10 +152,10 @@
 							</filters>
 							<transformers>
-<!--								<transformer-->
+								<!--								<transformer-->
-<!--									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">-->
+								<!--									implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">-->
-<!--									<mainClass>com.zhiwei.searchhotcrawler.run.HotSearchRun</mainClass>-->
+								<!--									<mainClass>com.zhiwei.searchhotcrawler.run.HotSearchRun</mainClass>-->
-<!--								</transformer>-->
+								<!--								</transformer>-->
 								<!-- 不覆盖同名文件，而是追加合并同名文件 -->
 								<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
 									<resource>META-INF/spring.handlers</resource>

--- a/src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/bean/HotSearchType.java
@@ -22,5 +22,7 @@ public enum HotSearchType {
 	 腾讯较真榜,
 	 脉脉热榜,
 	 B站排行榜,
-	 B站热搜
+	 B站热搜,
+	人气榜36氪,
+	虎嗅热文推荐,
 }
--- a/src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+package com.zhiwei.searchhotcrawler.crawler;
+import com.zhiwei.crawler.core.HttpBoot;
+import com.zhiwei.crawler.core.proxy.ProxyHolder;
+import com.zhiwei.crawler.core.utils.RequestUtils;
+import com.zhiwei.searchhotcrawler.bean.HotSearchList;
+import com.zhiwei.searchhotcrawler.bean.HotSearchType;
+import lombok.extern.log4j.Log4j2;
+import okhttp3.Request;
+import okhttp3.Response;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.time.Duration;
+import java.util.*;
+/**
+ * @author ll
+ * @ClassName:HotSearch36KrCrawler
+ * @Description:
+ * @date 2021年5月21日 上午11:54:31
+ */
+@Log4j2
+public class HotSearch36KrCrawler {
+    private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
+    /**
+     * @return void 返回类型
+     * @Title: hotSearch36KrCrawler
+     * @author hero
+     * @Description: PC端36Kr人气榜采集
+     */
+	public static List<HotSearchList> hotSearch36Kr(Date date) {
+		String url = "https://www.36kr.com/hot-list/catalog";
+		String htmlBody = null;
+		Request request = RequestUtils.wrapGet(url);
+		try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
+			htmlBody = response.body().string();
+		} catch (Exception e) {
+			log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", e);
+		}
+		if (htmlBody != null && htmlBody.contains("article-list")) {
+			return ansysData(htmlBody,date);
+		} else {
+			log.info("解析36Kr人气榜时出现解析错误,页面结构有问题");
+		}
+		return Collections.emptyList();
+	}
+//    public static List<HotSearchList> hotSearch36Kr(Date date) {
+//        String url = "https://www.36kr.com/hot-list/catalog";
+//        //建立一个新的客户端请求(创建HttpClient对象)
+//        CloseableHttpClient httpClient = HttpClients.createDefault();
+//        //创建请求对象实例
+//        HttpGet httpGet = new HttpGet(url);
+//        httpGet.addHeader("User-Agent", "spider");
+//        //获取响应的结果
+//        CloseableHttpResponse response = null;
+//        try {
+//            //调用HttpClient对象的execute方法发送请求
+//            response = httpClient.execute(httpGet);
+//
+//            if (Objects.nonNull(response)) {
+//                //获取HttpEntity对象其中包含了响应内容(响应头)
+//                HttpEntity entity = response.getEntity();
+//
+//                    String htmlBody = EntityUtils.toString(entity);
+//                    return ansysData(htmlBody,date);
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+//        return Collections.emptyList();
+//    }
+    /**
+     * 解析数据
+     *
+     * @param htmlBody
+     * @return
+     */
+    private static List<HotSearchList> ansysData(String htmlBody, Date date) {
+        List<HotSearchList> list = new ArrayList<>();
+        String webSite = "https://www.36kr.com";
+        try {
+            Document document = Jsoup.parse(htmlBody);
+            Elements elements = document.select("div.article-list").first().select("div.article-wrapper");
+            if (Objects.nonNull(elements) && !elements.isEmpty()) {
+                // 获取排名rank
+                int rank = 0;
+                for (Element element : elements) {
+                    try {
+                        rank++;
+                        // 获取关键词(String)
+                        String keyWord = element.select("p.title-wrapper").select("a.article-item-title").text();
+//						logger.info("关键词:{}", kw);
+                        // 获取关键词相关链接everurl(String)
+                        String everurl = element.select("p.title-wrapper").select("a.article-item-title").attr("href");
+                        // 获取搜索指数count（int）
+                        String url = webSite + everurl;
+                        String hot = null;
+                        // 判断热度值所在的规则是否为null
+                        if (!element.select("span").isEmpty()) {
+                            hot = element.select("span").text();
+                        }
+                        Long count = 0L;
+                        // 判断hot是否为空
+                        if (StringUtils.isNotBlank(hot)) {
+                            String[] hots = hot.split("热度");
+                            String trim = hots[1].trim();
+                            Double  num = Double.valueOf(trim);
+                             count = Math.round(num);
+                        }
+                        if (Objects.nonNull(rank)) {
+                            if (count == 0) {
+                                log.info(htmlBody);
+                                log.info(hot);
+                                log.info(element);
+                            } else {
+                                HotSearchList hotSearch = new HotSearchList(url, keyWord, count, rank, HotSearchType.人气榜36氪.name(), date);
+                                list.add(hotSearch);
+                            }
+                        }
+                    } catch (Exception e) {
+                    log.error("解析36Kr人气榜时出现解析错误", e);
+                }
+                }
+            }
+        } catch (Exception e) {
+            log.error("解析36Kr人气榜时出现解析错误,数据不是json结构", e);
+        }
+        return list;
+    }
+}
--- a/src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/crawler/HuXiuHotSearchCrawler.java
+package com.zhiwei.searchhotcrawler.crawler;
+import com.zhiwei.crawler.core.HttpBoot;
+import com.zhiwei.crawler.core.proxy.ProxyHolder;
+import com.zhiwei.crawler.core.utils.RequestUtils;
+import com.zhiwei.searchhotcrawler.bean.HotSearchList;
+import com.zhiwei.searchhotcrawler.bean.HotSearchType;
+import lombok.extern.log4j.Log4j2;
+import okhttp3.Request;
+import okhttp3.Response;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.time.Duration;
+import java.util.*;
+/**
+ * @author: ll
+ * @ClassName: HuXiuHotSearchCrawler
+ * @Description: pc端虎嗅热文推荐采集
+ * @date: 2021年5月24日 下午16:35:31
+ * @Title: HuXiuHotSearchCrawler
+ */
+@Log4j2
+public class HuXiuHotSearchCrawler {
+    private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
+    public static List<HotSearchList>  HuXiuHotArticleRecommended(Date date){
+        String url = "https://www.huxiu.com/";
+        String htmlBody = null;
+        Request request = RequestUtils.wrapGet(url);
+        try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
+            htmlBody = response.body().string();
+        } catch (Exception e) {
+            log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", e);
+        }
+        if (htmlBody != null && htmlBody.contains("hot__list")) {
+            return ansysData(htmlBody,date);
+        } else {
+            log.info("解析虎嗅热文推荐时出现解析错误,页面结构有问题");
+        }
+        return Collections.emptyList();
+    }
+//        String url="https://www.huxiu.com/";
+//        //创建客户端请求对象
+//        CloseableHttpClient httpClient = HttpClients.createDefault();
+//        //创建请求对象实例
+//        HttpGet httpGet = new HttpGet(url);
+//        //设置头信息
+//        httpGet.addHeader("User-Agent","spider");
+//
+//        //获取响应结果
+//        try {
+//            CloseableHttpResponse response = httpClient.execute(httpGet);
+//            //判断响应结果是否为空
+//            if (Objects.nonNull(response)) {
+//                //获取HttpEntity对象其中包含了响应内容(响应头)
+//                HttpEntity entity = response.getEntity();
+//                String htmlBody = EntityUtils.toString(entity);
+//                return ansysData(htmlBody,date);
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+//
+//        return Collections.emptyList();
+//    }
+    //解析页面数据
+    private static List<HotSearchList> ansysData(String htmlBody, Date date) {
+        ArrayList<HotSearchList> list = new ArrayList<>();
+        String webSite="https://www.huxiu.com";
+        try {
+            //获取Document文档对象
+            Document document = Jsoup.parse(htmlBody);
+            //获取元素集合
+            Elements elements = document.select("div.hot__list").select("div.focus-item");
+            if (Objects.nonNull(elements) && !elements.isEmpty()){
+                // 获取排名rank
+                Integer rank = 0;
+                for (Element element : elements) {
+                    try {
+                        rank++;
+                        //获取关键词
+                        String keyWord= element.select("p").text();
+                        //获取关键词相关链接
+                        String href = element.select("a.focus-item__left").attr("href");
+                        String url=webSite+href;
+                        //获取讨论量
+                        String comment = element.select("i").first().text();
+                        Long commentCount = Long.valueOf(comment);
+                        String topicLead =null;
+                        long count=0L;
+                        HotSearchList hotSearchList = new HotSearchList(url, keyWord,count, rank,HotSearchType.虎嗅热文推荐.name(),commentCount, topicLead, date);
+                        list.add(hotSearchList);
+                    } catch (NumberFormatException e) {
+                        log.error("解析虎嗅热文推荐时出现解析错误",e);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error("解析虎嗅热文推荐时出现解析错误,数据不是json结构",e);
+        }
+        return list;
+    }
+}
--- a/src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/dao/HotSearchCacheDAO.java
 package com.zhiwei.searchhotcrawler.dao;
-import com.mongodb.BasicDBObject;
-import com.mongodb.client.FindIterable;
 import com.mongodb.client.MongoCollection;
-import com.mongodb.client.MongoCursor;
 import com.zhiwei.searchhotcrawler.bean.HotSearchList;
 import com.zhiwei.searchhotcrawler.config.DBConfig;
 import com.zhiwei.searchhotcrawler.crawler.WeiboHotSearchCrawler;
@@ -52,6 +50,10 @@ public class HotSearchCacheDAO  {
 //           if("今日头条热搜".equals(hotSearch.getType())){
 //               document.put("comment_count", hotSearch.getCommentCount());
 //           }
+           if("虎嗅热文推荐".equals(hotSearch.getType())){
+               document.put("comment_count", hotSearch.getCommentCount());
+           }
           if("腾讯较真榜".equals(hotSearch.getType())){
               document.put("topic_result",hotSearch.getTopicResult());
           }
@@ -125,7 +127,7 @@ public class HotSearchCacheDAO  {
                   //计算上升速度
                   double riseSpeed = nowDoc.containsKey("riseSpeed")?nowDoc.getDouble("riseSpeed"):0.00;
                   if(nonNull(lastCount) && nowDoc.containsKey("firstCount")) {
-                       long firstCount = Long.parseLong(nowDoc.get("firstCount").toString());
+                       long firstCount = nowDoc.getLong("firstCount");
                       riseSpeed = ((double)(lastCount - firstCount)/(double)firstCount)*1000/((double)duration);
                   }
 //                   endTime = getEndTime(type, new Date());
@@ -181,6 +183,10 @@ public class HotSearchCacheDAO  {
 //                   if(readCount != null){
 //                       nowDoc.put("readCount",readCount);
 //                   }
+                   if("虎嗅热文推荐".equals(type)){
+                       nowDoc.put("comment_count",document.getLong("comment_count"));
+                   }
                   if(topicResult != null){
                       nowDoc.put("topicResult",topicResult);
                   }
@@ -207,7 +213,7 @@ public class HotSearchCacheDAO  {
               }
           }
       }catch (Exception e){
-           log.error("数据存储时出错:", e);
+           log.error("数据存储时出错:{}", e);
       }
   }

--- a/src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
+package com.zhiwei.searchhotcrawler.test;
+import com.zhiwei.crawler.core.HttpBoot;
+import com.zhiwei.crawler.core.proxy.ProxyHolder;
+import com.zhiwei.crawler.core.utils.RequestUtils;
+import com.zhiwei.searchhotcrawler.bean.HotSearchList;
+import com.zhiwei.searchhotcrawler.bean.HotSearchType;
+import lombok.extern.log4j.Log4j2;
+import okhttp3.Request;
+import okhttp3.Response;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpEntity;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.time.Duration;
+import java.util.*;
+/**
+ * @author ll
+ * @ClassName:HotSearch36KrCrawler
+ * @Description:
+ * @date 2021年5月21日 上午11:54:31
+ */
+@Log4j2
+public class HotSearch36KrCrawlerTest {
+    private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
+    /**
+     * @return void 返回类型
+     * @Title: hotSearch36KrCrawler
+     * @author hero
+     * @Description: PC端36Kr人气榜采集
+     */
+	public static List<HotSearchList> hotSearch36Kr(Date date) {
+		String url = "https://www.36kr.com/hot-list/catalog";
+		String htmlBody = null;
+		Request request = RequestUtils.wrapGet(url);
+		try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
+			htmlBody = response.body().string();
+		} catch (Exception e) {
+			log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", e);
+		}
+		if (htmlBody != null && htmlBody.contains("article-list")) {
+			return ansysData(htmlBody,date);
+		} else {
+			log.info("解析36Kr人气榜时出现解析错误,页面结构有问题");
+		}
+		return Collections.emptyList();
+	}
+//    public static List<HotSearchList> hotSearch36Kr(Date date) {
+//        String url = "https://www.36kr.com/hot-list/catalog";
+//        //建立一个新的客户端请求(创建HttpClient对象)
+//        CloseableHttpClient httpClient = HttpClients.createDefault();
+//        //创建请求对象实例
+//        HttpGet httpGet = new HttpGet(url);
+//        httpGet.addHeader("User-Agent", "spider");
+//        //获取响应的结果
+//        CloseableHttpResponse response = null;
+//        try {
+//            //调用HttpClient对象的execute方法发送请求
+//            response = httpClient.execute(httpGet);
+//
+//            if (Objects.nonNull(response)) {
+//                //获取HttpEntity对象其中包含了响应内容(响应头)
+//                HttpEntity entity = response.getEntity();
+//
+//                    String htmlBody = EntityUtils.toString(entity);
+//                    return ansysData(htmlBody,date);
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+//        return Collections.emptyList();
+//    }
+    /**
+     * 解析数据
+     *
+     * @param htmlBody
+     * @return
+     */
+    private static List<HotSearchList> ansysData(String htmlBody, Date date) {
+        List<HotSearchList> list = new ArrayList<>();
+        String webSite = "https://www.36kr.com";
+        try {
+            Document document = Jsoup.parse(htmlBody);
+            Elements elements = document.select("div.article-list").first().select("div.article-wrapper");
+            if (Objects.nonNull(elements) && !elements.isEmpty()) {
+                // 获取排名rank
+                int rank = 0;
+                for (Element element : elements) {
+                    try {
+                        rank++;
+                        // 获取关键词(String)
+                        String keyWord = element.select("p.title-wrapper").select("a.article-item-title").text();
+//						logger.info("关键词:{}", kw);
+                        // 获取关键词相关链接everurl(String)
+                        String everurl = element.select("p.title-wrapper").select("a.article-item-title").attr("href");
+                        // 获取搜索指数count（int）
+                        String url = webSite + everurl;
+                        String hot = null;
+                        // 判断热度值所在的规则是否为null
+                        if (!element.select("span").isEmpty()) {
+                            hot = element.select("span").text();
+                        }
+                        Long count = 0L;
+                        // 判断hot是否为空
+                        if (StringUtils.isNotBlank(hot)) {
+                            String[] hots = hot.split("热度");
+                            String trim = hots[1].trim();
+                            Double  num = Double.valueOf(trim);
+                             count = Math.round(num);
+                        }
+                        if (Objects.nonNull(rank)) {
+                            if (count == 0) {
+                                log.info(htmlBody);
+                                log.info(hot);
+                                log.info(element);
+                            } else {
+                                HotSearchList hotSearch = new HotSearchList(url, keyWord, count, rank, HotSearchType.人气榜36氪.name(), date);
+                                list.add(hotSearch);
+                            }
+                        }
+                    } catch (Exception e) {
+                    log.error("解析36Kr人气榜时出现解析错误", e);
+                }
+                }
+            }
+        } catch (Exception e) {
+            log.error("解析36Kr人气榜时出现解析错误,数据不是json结构", e);
+        }
+        return list;
+    }
+}
--- a/src/main/java/com/zhiwei/searchhotcrawler/test/HuXiuHotSearchCrawlerTest.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/test/HuXiuHotSearchCrawlerTest.java
+package com.zhiwei.searchhotcrawler.test;
+import com.zhiwei.crawler.core.HttpBoot;
+import com.zhiwei.crawler.core.proxy.ProxyHolder;
+import com.zhiwei.crawler.core.utils.RequestUtils;
+import com.zhiwei.searchhotcrawler.bean.HotSearchList;
+import com.zhiwei.searchhotcrawler.bean.HotSearchType;
+import lombok.extern.log4j.Log4j2;
+import okhttp3.Request;
+import okhttp3.Response;
+import org.apache.http.HttpEntity;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.time.Duration;
+import java.util.*;
+/**
+ * @author: ll
+ * @ClassName: HuXiuHotSearchCrawler
+ * @Description: pc端虎嗅热文推荐采集
+ * @date: 2021年5月24日 下午16:35:31
+ * @Title: HuXiuHotSearchCrawler
+ */
+@Log4j2
+public class HuXiuHotSearchCrawlerTest {
+    private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
+    public static List<HotSearchList>  HuXiuHotArticleRecommended(Date date){
+        String url = "https://www.huxiu.com/";
+        String htmlBody = null;
+        Request request = RequestUtils.wrapGet(url);
+        try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
+            htmlBody = response.body().string();
+        } catch (Exception e) {
+            log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", e);
+        }
+        if (htmlBody != null && htmlBody.contains("hot__list")) {
+            return ansysData(htmlBody,date);
+        } else {
+            log.info("解析虎嗅热文推荐时出现解析错误,页面结构有问题");
+        }
+        return Collections.emptyList();
+    }
+//        String url="https://www.huxiu.com/";
+//        //创建客户端请求对象
+//        CloseableHttpClient httpClient = HttpClients.createDefault();
+//        //创建请求对象实例
+//        HttpGet httpGet = new HttpGet(url);
+//        //设置头信息
+//        httpGet.addHeader("User-Agent","spider");
+//
+//        //获取响应结果
+//        try {
+//            CloseableHttpResponse response = httpClient.execute(httpGet);
+//            //判断响应结果是否为空
+//            if (Objects.nonNull(response)) {
+//                //获取HttpEntity对象其中包含了响应内容(响应头)
+//                HttpEntity entity = response.getEntity();
+//                String htmlBody = EntityUtils.toString(entity);
+//                return ansysData(htmlBody,date);
+//            }
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
+//
+//        return Collections.emptyList();
+//    }
+    //解析页面数据
+    private static List<HotSearchList> ansysData(String htmlBody, Date date) {
+        ArrayList<HotSearchList> list = new ArrayList<>();
+        String webSite="https://www.huxiu.com";
+        try {
+            //获取Document文档对象
+            Document document = Jsoup.parse(htmlBody);
+            //获取元素集合
+            Elements elements = document.select("div.hot__list").select("div.focus-item");
+            if (Objects.nonNull(elements) && !elements.isEmpty()){
+                // 获取排名rank
+                Integer rank = 0;
+                for (Element element : elements) {
+                    try {
+                        rank++;
+                        //获取关键词
+                        String keyWord= element.select("p").text();
+                        //获取关键词相关链接
+                        String href = element.select("a.focus-item__left").attr("href");
+                        String url=webSite+href;
+                        //获取讨论量
+                        String comment = element.select("i").first().text();
+                        Long commentCount = Long.valueOf(comment);
+                        String topicLead =null;
+                        long count=0L;
+                        HotSearchList hotSearchList = new HotSearchList(url, keyWord,count, rank,HotSearchType.虎嗅热文推荐.name(),commentCount, topicLead, date);
+                        list.add(hotSearchList);
+                    } catch (NumberFormatException e) {
+                        log.error("解析虎嗅热文推荐时出现解析错误",e);
+                    }
+                }
+            }
+        } catch (Exception e) {
+            log.error("解析虎嗅热文推荐时出现解析错误,数据不是json结构",e);
+        }
+        return list;
+    }
+}
--- a/src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+++ b/src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
@@ -8,6 +8,8 @@ import com.zhiwei.searchhotcrawler.crawler.*;
 import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
 import com.zhiwei.searchhotcrawler.dao.RedisDao;
 import com.zhiwei.searchhotcrawler.dao.WeiboSuperTopicDAO;
+import com.zhiwei.searchhotcrawler.crawler.HotSearch36KrCrawler;
+import com.zhiwei.searchhotcrawler.crawler.HuXiuHotSearchCrawler;
 import com.zhiwei.searchhotcrawler.timer.TouTiaoExecutor;
 import com.zhiwei.searchhotcrawler.util.DateUtils;
 import com.zhiwei.searchhotcrawler.util.TipsUtils;
@@ -38,6 +40,30 @@ public class GatherTimer {
    /** 知乎时事子分类 */
    private String DEPTH = "depth";
+    /**
+     * 虎嗅热文推荐的采集
+     */
+    @Async(value = "myScheduler")
+    @Scheduled(cron = "0 * * * * ?")
+    public void crawlerHuXiu() {
+        logger.info("虎嗅热文推荐开始采集...");
+        Date date = DateUtils.getMillSecondTime(new Date());
+        List<HotSearchList> huXiuList = HuXiuHotSearchCrawler.HuXiuHotArticleRecommended(date);
+        logger.info("{}, 虎嗅热文推荐此轮采集到的数据量为:{}", new Date(), Integer.valueOf(huXiuList != null ? huXiuList.size() : 0));
+        TipsUtils.addHotList(HotSearchType.虎嗅热文推荐.name(), huXiuList);
+        logger.info("虎嗅热文推荐采集结束...");
+        /**
+         * 36氪人气榜的采集
+         */
+        logger.info("36氪人气榜开始采集...");
+        List<HotSearchList> list36Kr = HotSearch36KrCrawler.hotSearch36Kr(date);
+        logger.info("{}, 36氪人气榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list36Kr != null ? list36Kr.size() : 0));
+        TipsUtils.addHotList(HotSearchType.人气榜36氪.name(), list36Kr);
+        logger.info("36氪人气榜采集结束...");
+    }
    /**
     * 微博热搜的采集
     */