Commit b9b6305c by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !79
parents 6f3b50b7 1c04790d
...@@ -113,6 +113,12 @@ ...@@ -113,6 +113,12 @@
<artifactId>jedis</artifactId> <artifactId>jedis</artifactId>
<version>2.8.1</version> <version>2.8.1</version>
</dependency> </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies> </dependencies>
......
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:3.12.2" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei:sendmail:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: javax.mail:mail:1.4.7" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.tools:zhiwei-tools:0.1.6-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.58" level="project" />
<orderEntry type="library" name="Maven: de.ruedigermoeller:fst:2.57" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.8.8" level="project" />
<orderEntry type="library" name="Maven: org.javassist:javassist:3.21.0-GA" level="project" />
<orderEntry type="library" name="Maven: org.objenesis:objenesis:2.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.8.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:crawler-core:0.6.7.4-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.14.9" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.17.2" level="project" />
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.13.1" level="project" />
<orderEntry type="library" name="Maven: cn.wanghaomiao:JsoupXpath:2.3.2" level="project" />
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.7" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.20" level="project" />
<orderEntry type="library" name="Maven: org.brotli:dec:0.1.2" level="project" />
<orderEntry type="library" name="Maven: com.ibm.icu:icu4j:67.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:29.0-jre" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:failureaccess:1.0.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava" level="project" />
<orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:3.0.2" level="project" />
<orderEntry type="library" name="Maven: org.checkerframework:checker-qual:2.11.1" level="project" />
<orderEntry type="library" name="Maven: com.google.errorprone:error_prone_annotations:2.3.4" level="project" />
<orderEntry type="library" name="Maven: com.google.j2objc:j2objc-annotations:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-1.2-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.async:task-boot:0.0.3-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:proxy-client:1.0.5-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.apache.dubbo:dubbo:2.7.4.1" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-all:4.1.25.Final" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.8" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.7.0.Final" level="project" />
<orderEntry type="library" name="Maven: com.kohlschutter.boilerpipe:boilerpipe-extractor:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.8" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: aopalliance:aopalliance:1.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-test:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context-support:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-web:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.8.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
</component>
</module>
\ No newline at end of file
...@@ -33,12 +33,12 @@ public class HotSearchCache { ...@@ -33,12 +33,12 @@ public class HotSearchCache {
/** /**
* 最高热搜值 * 最高热搜值
*/ */
private Integer highestCount; private Long highestCount;
/** /**
* 最新热搜热度值 * 最新热搜热度值
*/ */
private Integer lastCount; private Long lastCount;
/** /**
* 状态(true 为热搜; false为时时上升) * 状态(true 为热搜; false为时时上升)
...@@ -83,12 +83,12 @@ public class HotSearchCache { ...@@ -83,12 +83,12 @@ public class HotSearchCache {
/** /**
* 阅读量 * 阅读量
*/ */
private Integer readCount; private Long readCount;
/** /**
* 讨论量 * 讨论量
*/ */
private Integer discussCount; private Long discussCount;
/** /**
* 话题真假(腾讯较真榜使用) * 话题真假(腾讯较真榜使用)
...@@ -98,7 +98,7 @@ public class HotSearchCache { ...@@ -98,7 +98,7 @@ public class HotSearchCache {
/** /**
* 首次上榜热度 * 首次上榜热度
*/ */
private Integer firstCount; private Long firstCount;
/** 详情页图片(微博平台) */ /** 详情页图片(微博平台) */
private String pictureUrl; private String pictureUrl;
...@@ -113,7 +113,7 @@ public class HotSearchCache { ...@@ -113,7 +113,7 @@ public class HotSearchCache {
*/ */
private String downtext; private String downtext;
public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot, public HotSearchCache(String url, String name, String topicLead, Long highestCount, Long lastCount, Boolean hot,
Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){ Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){
this.id = name + "_" + type; this.id = name + "_" + type;
this.url = url; this.url = url;
...@@ -135,21 +135,21 @@ public class HotSearchCache { ...@@ -135,21 +135,21 @@ public class HotSearchCache {
public void setRecommend(Boolean recommend) { this.recommend = recommend; } public void setRecommend(Boolean recommend) { this.recommend = recommend; }
public Integer getReadCount() { return readCount; } public Long getReadCount() { return readCount; }
public void setReadCount(Integer readCount) { this.readCount = readCount; } public void setReadCount(Long readCount) { this.readCount = readCount; }
public Integer getDiscussCount() { return discussCount; } public Long getDiscussCount() { return discussCount; }
public void setDiscussCount(Integer discussCount) { this.discussCount = discussCount; } public void setDiscussCount(Long discussCount) { this.discussCount = discussCount; }
public String getTopicLead() { return topicLead; } public String getTopicLead() { return topicLead; }
public void setTopicLead(String topicLead) { this.topicLead = topicLead; } public void setTopicLead(String topicLead) { this.topicLead = topicLead; }
public Integer getFirstCount() { return firstCount; } public Long getFirstCount() { return firstCount; }
public void setFirstCount(Integer firstCount) { this.firstCount = firstCount; } public void setFirstCount(Long firstCount) { this.firstCount = firstCount; }
public String getPictureUrl() { return pictureUrl; } public String getPictureUrl() { return pictureUrl; }
......
package com.zhiwei.searchhotcrawler.bean; package com.zhiwei.searchhotcrawler.bean;
/** /**
* @ClassName: WeiboHotSearch * @ClassName: WeiboHotSearch
* @Description: TODO(微博时时热搜) * @Description: TODO(微博时时热搜)
* @author hero * @author hero
* @date 2017年9月26日 下午5:41:11 * @date 2017年9月26日 下午5:41:11
*/ */
...@@ -42,7 +42,7 @@ public class HotSearchList implements Serializable{ ...@@ -42,7 +42,7 @@ public class HotSearchList implements Serializable{
/** /**
* 热搜量 * 热搜量
*/ */
private Integer count; private Long count;
/** /**
* 状态(true 为热搜; false为时时上升) * 状态(true 为热搜; false为时时上升)
...@@ -77,7 +77,7 @@ public class HotSearchList implements Serializable{ ...@@ -77,7 +77,7 @@ public class HotSearchList implements Serializable{
/** /**
* 话题讨论量或阅读量 * 话题讨论量或阅读量
*/ */
private Integer commentCount; private Long commentCount;
/** /**
* 话题真假结果(腾讯较真榜使用) * 话题真假结果(腾讯较真榜使用)
...@@ -87,12 +87,12 @@ public class HotSearchList implements Serializable{ ...@@ -87,12 +87,12 @@ public class HotSearchList implements Serializable{
/** /**
* 观看数(目前近B站排行榜使用) * 观看数(目前近B站排行榜使用)
*/ */
private Integer view; private Long view;
/** /**
* 弹幕数(目前仅B站排行榜使用) * 弹幕数(目前仅B站排行榜使用)
*/ */
private Integer barrage; private Long barrage;
/** /**
* 图片地址 * 图片地址
...@@ -100,9 +100,9 @@ public class HotSearchList implements Serializable{ ...@@ -100,9 +100,9 @@ public class HotSearchList implements Serializable{
private String pictureUrl; private String pictureUrl;
public HotSearchList(){} public HotSearchList(){}
public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){ public HotSearchList(String url, String name, Long count,Boolean hot,Integer rank,String type,String icon,Date date){
this.id = name + "_" + new Date().getTime() + "_" + type; this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -113,10 +113,10 @@ public class HotSearchList implements Serializable{ ...@@ -113,10 +113,10 @@ public class HotSearchList implements Serializable{
this.type = type; this.type = type;
this.icon = icon; this.icon = icon;
} }
public HotSearchList(String url, String name, Integer count,Integer rank,String type,Date date){ public HotSearchList(String url, String name, Long count,Integer rank,String type,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -128,8 +128,8 @@ public class HotSearchList implements Serializable{ ...@@ -128,8 +128,8 @@ public class HotSearchList implements Serializable{
} }
public HotSearchList(String url, String name, Integer count,Integer rank,String type, Integer commentCount, String topicLead,Date date){ public HotSearchList(String url, String name, Long count,Integer rank,String type, Long commentCount, String topicLead,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.count = count; this.count = count;
...@@ -142,8 +142,8 @@ public class HotSearchList implements Serializable{ ...@@ -142,8 +142,8 @@ public class HotSearchList implements Serializable{
this.topicLead = topicLead; this.topicLead = topicLead;
} }
public HotSearchList(String url, String name, Integer count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){ public HotSearchList(String url, String name, Long count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){
this.id = name + "_" + new Date().getTime() + "_" + type; this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.hot = hot; this.hot = hot;
...@@ -156,8 +156,8 @@ public class HotSearchList implements Serializable{ ...@@ -156,8 +156,8 @@ public class HotSearchList implements Serializable{
this.topicResult = topicResult; this.topicResult = topicResult;
} }
public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage, String pictureUrl) { public HotSearchList(String url, String name, String topicLead, Long count, Boolean hot, Date time, Integer rank, String type, Long view, Long barrage, String pictureUrl) {
this.id = name + "_" + new Date().getTime()+ "_" + type; this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url; this.url = url;
this.name = name; this.name = name;
this.topicLead = topicLead; this.topicLead = topicLead;
......
...@@ -3,6 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,6 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.time.Duration; import java.time.Duration;
import java.util.*; import java.util.*;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.Response; import okhttp3.Response;
...@@ -14,8 +16,6 @@ import org.slf4j.Logger; ...@@ -14,8 +16,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
...@@ -29,7 +29,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -29,7 +29,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
public class BaiDuHotSearchCrawler { public class BaiDuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/** /**
* @Title: BaiDuHotSearchTest * @Title: BaiDuHotSearchTest
* @author hero * @author hero
...@@ -40,7 +40,7 @@ public class BaiDuHotSearchCrawler { ...@@ -40,7 +40,7 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex"; String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null; String htmlBody = null;
Request request = RequestUtils.wrapGet(url); Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) { try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string(); htmlBody = response.body().string();
} catch (Exception e) { } catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e); log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
...@@ -52,8 +52,8 @@ public class BaiDuHotSearchCrawler { ...@@ -52,8 +52,8 @@ public class BaiDuHotSearchCrawler {
} }
return Collections.emptyList(); return Collections.emptyList();
} }
/** /**
* 解析数据 * 解析数据
* @param htmlBody * @param htmlBody
...@@ -96,7 +96,7 @@ public class BaiDuHotSearchCrawler { ...@@ -96,7 +96,7 @@ public class BaiDuHotSearchCrawler {
else if (!element.select("td.last").select("span.icon-fair").isEmpty()) { else if (!element.select("td.last").select("span.icon-fair").isEmpty()) {
hot = element.select("td.last").select("span.icon-fair").text(); hot = element.select("td.last").select("span.icon-fair").text();
} }
int count = 0; long count = 0;
// 判断hot是否为空 // 判断hot是否为空
if (StringUtils.isNotBlank(hot)) { if (StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot); count = Integer.valueOf(hot);
...@@ -122,4 +122,4 @@ public class BaiDuHotSearchCrawler { ...@@ -122,4 +122,4 @@ public class BaiDuHotSearchCrawler {
return list; return list;
} }
} }
\ No newline at end of file
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -52,16 +52,16 @@ public class BililiCrawler { ...@@ -52,16 +52,16 @@ public class BililiCrawler {
int rank = i+1; int rank = i+1;
String name = data.getString("title"); String name = data.getString("title");
String topicLead = data.getString("desc"); String topicLead = data.getString("desc");
int count = data.getIntValue("score"); long count = data.getLongValue("score");
String bvid = data.getString("bvid"); String bvid = data.getString("bvid");
String pic = data.getString("pic"); String pic = data.getString("pic");
String bUrl = "https://www.bilibili.com/video/"+bvid; String bUrl = "https://www.bilibili.com/video/"+bvid;
Integer view = null; Long view = null;
Integer barrage = null; Long barrage = null;
if(data.containsKey("stat")) { if(data.containsKey("stat")) {
JSONObject stat = data.getJSONObject("stat"); JSONObject stat = data.getJSONObject("stat");
view = stat.getIntValue("view"); view = stat.getLongValue("view");
barrage = stat.getIntValue("danmaku"); barrage = stat.getLongValue("danmaku");
} }
HotSearchList hotSearchList = new HotSearchList(bUrl,name,topicLead,count,null,date,rank,HotSearchType.B站排行榜.name(),view,barrage,pic); HotSearchList hotSearchList = new HotSearchList(bUrl,name,topicLead,count,null,date,rank,HotSearchType.B站排行榜.name(),view,barrage,pic);
hotSearchLists.add(hotSearchList); hotSearchLists.add(hotSearchList);
......
...@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory; ...@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
...@@ -30,16 +30,16 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType; ...@@ -30,16 +30,16 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
*/ */
@Log4j2 @Log4j2
public class DouyinHotSearchCrawler { public class DouyinHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
public static List<HotSearchList> list = new ArrayList<>(); public static List<HotSearchList> list = new ArrayList<>();
/** /**
* @Title: getMobileDouyinHotList * @Title: getMobileDouyinHotList
* @author hero * @author hero
* @Description: 移动端抖音热搜榜 * @Description: 移动端抖音热搜榜
* @param @return 设定文件 * @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<HotSearchList> getMobileDouyinHotList(Date date){ public static List<HotSearchList> getMobileDouyinHotList(Date date){
...@@ -69,8 +69,8 @@ public class DouyinHotSearchCrawler { ...@@ -69,8 +69,8 @@ public class DouyinHotSearchCrawler {
word = wl.getString("word"); word = wl.getString("word");
//获取热度值 //获取热度值
hotValueStr = wl.getString("hot_value"); hotValueStr = wl.getString("hot_value");
Integer hotValue = null; Long hotValue = null;
hotValue = Integer.valueOf(hotValueStr); hotValue = Long.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value); // logger.info("热度为:::{}", hot_value);
HotSearchList douyin = new HotSearchList(null, word, hotValue, position, HotSearchType.抖音热搜.name(),date); HotSearchList douyin = new HotSearchList(null, word, hotValue, position, HotSearchType.抖音热搜.name(),date);
list.add(douyin); list.add(douyin);
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -47,8 +47,8 @@ public class FengHuangSearchCrawler { ...@@ -47,8 +47,8 @@ public class FengHuangSearchCrawler {
String topicLead = jsonArray.getJSONObject(i).getString("title"); String topicLead = jsonArray.getJSONObject(i).getString("title");
String fenghuangUrl = jsonArray.getJSONObject(i).getJSONObject("link").getString("weburl"); String fenghuangUrl = jsonArray.getJSONObject(i).getJSONObject("link").getString("weburl");
String hotValue = jsonArray.getJSONObject(i).getJSONObject("hotLabel").getString("hotGrade"); String hotValue = jsonArray.getJSONObject(i).getJSONObject("hotLabel").getString("hotGrade");
Integer count = hotValue.length()>0 ? TipsUtils.getHotCount(hotValue) : 0; Long count = hotValue.length()>0 ? TipsUtils.getHotCount(hotValue) : 0;
Integer commentCount = jsonArray.getJSONObject(i).getIntValue("commentsall"); Long commentCount = jsonArray.getJSONObject(i).getLongValue("commentsall");
HotSearchList hotSearchList = new HotSearchList(fenghuangUrl,name,count, HotSearchList hotSearchList = new HotSearchList(fenghuangUrl,name,count,
rank,HotSearchType.凤凰新闻热榜.name(),commentCount,topicLead,date); rank,HotSearchType.凤凰新闻热榜.name(),commentCount,topicLead,date);
list.add(hotSearchList); list.add(hotSearchList);
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -52,7 +52,7 @@ public class MaiMaiHotSearchCrawler { ...@@ -52,7 +52,7 @@ public class MaiMaiHotSearchCrawler {
icon = jsonObject.getJSONObject("hot_type_card").getString("text"); icon = jsonObject.getJSONObject("hot_type_card").getString("text");
} }
String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info"); String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info");
Integer count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0; Long count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0;
HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date); HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date);
list.add(hotSearchList); list.add(hotSearchList);
} }
......
...@@ -17,8 +17,8 @@ import org.slf4j.Logger; ...@@ -17,8 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -120,7 +120,7 @@ public class SougoHotSearchCrawler { ...@@ -120,7 +120,7 @@ public class SougoHotSearchCrawler {
JSONObject object = jsonArray.getJSONObject(j); JSONObject object = jsonArray.getJSONObject(j);
int rank = j+1; int rank = j+1;
String name = object.getString("name"); String name = object.getString("name");
Integer count = object.getIntValue("num"); Long count = object.getLongValue("num");
String sougouUrl = "https://m.sogou.com/web/searchList.jsp?s_from=pcsearch&keyword=" + URLCodeUtil.getURLEncode(name, "utf-8"); String sougouUrl = "https://m.sogou.com/web/searchList.jsp?s_from=pcsearch&keyword=" + URLCodeUtil.getURLEncode(name, "utf-8");
String icon = object.getIntValue("tag") == 1 ? "热" : null; String icon = object.getIntValue("tag") == 1 ? "热" : null;
HotSearchList hotSearchList = new HotSearchList(sougouUrl,name,count,false,rank,HotSearchType.搜狗微信客户端热搜.name(),icon,date); HotSearchList hotSearchList = new HotSearchList(sougouUrl,name,count,false,rank,HotSearchType.搜狗微信客户端热搜.name(),icon,date);
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -45,7 +45,7 @@ public class SouhuTopicCrawler { ...@@ -45,7 +45,7 @@ public class SouhuTopicCrawler {
Integer rank = i+1; Integer rank = i+1;
String name = dataJson.getJSONObject(i).getJSONObject("eventNewsInfo").getString("title"); String name = dataJson.getJSONObject(i).getJSONObject("eventNewsInfo").getString("title");
String hotValue = dataJson.getJSONObject(i).getString("value"); String hotValue = dataJson.getJSONObject(i).getString("value");
Integer count = 0; Long count;
if(hotValue.contains("观点")) { if(hotValue.contains("观点")) {
count = TipsUtils.getHotCount(hotValue.substring(0, hotValue.indexOf("观点"))); count = TipsUtils.getHotCount(hotValue.substring(0, hotValue.indexOf("观点")));
}else{ }else{
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
...@@ -50,17 +50,17 @@ public class TengXunCrawler { ...@@ -50,17 +50,17 @@ public class TengXunCrawler {
String urlID = dataJson.getJSONObject(i).getString("id"); String urlID = dataJson.getJSONObject(i).getString("id");
String tengxunUrl = null; String tengxunUrl = null;
//String tengxunUrl = "https://view.inews.qq.com/topic/" + dataJson.getJSONObject(i).getString("id"); //String tengxunUrl = "https://view.inews.qq.com/topic/" + dataJson.getJSONObject(i).getString("id");
Integer count = 0; Long count = null;
String icon = null; String icon = null;
if (dataJson.getJSONObject(i).containsKey("topic")) { if (dataJson.getJSONObject(i).containsKey("topic")) {
tengxunUrl = "https://view.inews.qq.com/topic/" + urlID; tengxunUrl = "https://view.inews.qq.com/topic/" + urlID;
count = dataJson.getJSONObject(i).getJSONObject("topic").getIntValue("ranking_score"); count = dataJson.getJSONObject(i).getJSONObject("topic").getLongValue("ranking_score");
if (dataJson.getJSONObject(i).getJSONObject("topic").containsKey("rec_icon")) { if (dataJson.getJSONObject(i).getJSONObject("topic").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("topic").getString("rec_icon"); icon = dataJson.getJSONObject(i).getJSONObject("topic").getString("rec_icon");
} }
} else if (dataJson.getJSONObject(i).containsKey("hotEvent")) { } else if (dataJson.getJSONObject(i).containsKey("hotEvent")) {
tengxunUrl = "https://view.inews.qq.com/hotEvent/" + urlID; tengxunUrl = "https://view.inews.qq.com/hotEvent/" + urlID;
count = dataJson.getJSONObject(i).getJSONObject("hotEvent").getIntValue("hotScore"); count = dataJson.getJSONObject(i).getJSONObject("hotEvent").getLongValue("hotScore");
if (dataJson.getJSONObject(i).getJSONObject("hotEvent").containsKey("rec_icon")) { if (dataJson.getJSONObject(i).getJSONObject("hotEvent").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("hotEvent").getString("rec_icon"); icon = dataJson.getJSONObject(i).getJSONObject("hotEvent").getString("rec_icon");
} }
...@@ -107,7 +107,7 @@ public class TengXunCrawler { ...@@ -107,7 +107,7 @@ public class TengXunCrawler {
JSONObject jsonObject = jsonArray.getJSONObject(i); JSONObject jsonObject = jsonArray.getJSONObject(i);
Integer rank = jsonObject.getIntValue("index"); Integer rank = jsonObject.getIntValue("index");
String name = jsonObject.getString("title"); String name = jsonObject.getString("title");
Integer count = jsonObject.getIntValue("score"); Long count = jsonObject.getLongValue("score");
String tengxunUrl = jsonObject.getString("link"); String tengxunUrl = jsonObject.getString("link");
String topicResult = jsonObject.getString("result"); String topicResult = jsonObject.getString("result");
HotSearchList hotSearchList = new HotSearchList(tengxunUrl,name,count,false,rank,HotSearchType.腾讯较真榜.name(),date,null,topicResult); HotSearchList hotSearchList = new HotSearchList(tengxunUrl,name,count,false,rank,HotSearchType.腾讯较真榜.name(),date,null,topicResult);
......
...@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON; ...@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
...@@ -77,7 +77,7 @@ public class ToutiaoHotSearchCrawler { ...@@ -77,7 +77,7 @@ public class ToutiaoHotSearchCrawler {
String name = word.getString("Title"); String name = word.getString("Title");
// String link = "https://ib.snssdk.com/search/?keyword=" + URLCodeUtil.getURLEncode(name, "utf-8") + "&pd=synthesis&source=trending_list&traffic_source="; // String link = "https://ib.snssdk.com/search/?keyword=" + URLCodeUtil.getURLEncode(name, "utf-8") + "&pd=synthesis&source=trending_list&traffic_source=";
String link = word.getString("Url"); String link = word.getString("Url");
Integer hotCount = word.getInteger("HotValue"); Long hotCount = word.getLongValue("HotValue");
String wordsType = word.getString("Label"); String wordsType = word.getString("Label");
String icon = getIcon(wordsType); String icon = getIcon(wordsType);
...@@ -170,7 +170,7 @@ public class ToutiaoHotSearchCrawler { ...@@ -170,7 +170,7 @@ public class ToutiaoHotSearchCrawler {
if (Objects.nonNull(elements) && !elements.isEmpty()) { if (Objects.nonNull(elements) && !elements.isEmpty()) {
Element element = elements.first(); Element element = elements.first();
String readCount = element.text().replaceAll("阅读", ""); String readCount = element.text().replaceAll("阅读", "");
Integer count = TipsUtils.getHotCount(readCount); Long count = TipsUtils.getHotCount(readCount);
log.info("{},阅读量:{}", hotSearchList.getName(), count); log.info("{},阅读量:{}", hotSearchList.getName(), count);
hotSearchList.setCommentCount(count); hotSearchList.setCommentCount(count);
hotSearchListDAO.updateTouTiaoReadCount(hotSearchList); hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -51,7 +51,7 @@ public class WangYiHotSearchCrawler { ...@@ -51,7 +51,7 @@ public class WangYiHotSearchCrawler {
for (int i = 0; i < jsonObject.size(); i++) { for (int i = 0; i < jsonObject.size(); i++) {
int rank = i + 1; int rank = i + 1;
String name = jsonObject.getJSONObject(i).getString("title"); String name = jsonObject.getJSONObject(i).getString("title");
int count = jsonObject.getJSONObject(i).getIntValue("hotValue"); long count = jsonObject.getJSONObject(i).getLongValue("hotValue");
String contentId = jsonObject.getJSONObject(i).getString("contentId"); String contentId = jsonObject.getJSONObject(i).getString("contentId");
String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html"; String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html";
HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易热榜.name(),date); HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易热榜.name(),date);
...@@ -90,7 +90,7 @@ public class WangYiHotSearchCrawler { ...@@ -90,7 +90,7 @@ public class WangYiHotSearchCrawler {
for (int i = 0; i < jsonObject.size(); i++) { for (int i = 0; i < jsonObject.size(); i++) {
int rank = i + 1; int rank = i + 1;
String name = jsonObject.getJSONObject(i).getString("doc_title"); String name = jsonObject.getJSONObject(i).getString("doc_title");
int count = jsonObject.getJSONObject(i).getIntValue("hotScore")*10000; long count = jsonObject.getJSONObject(i).getIntValue("hotScore")*10000;
String contentId = jsonObject.getJSONObject(i).getString("docId"); String contentId = jsonObject.getJSONObject(i).getString("docId");
String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html"; String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html";
HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易跟帖热议.name(),date); HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易跟帖热议.name(),date);
......
...@@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory; ...@@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo; import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
...@@ -32,9 +32,9 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -32,9 +32,9 @@ import com.zhiwei.tools.tools.URLCodeUtil;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
/** /**
* @ClassName: WeiboHotSearch * @ClassName: WeiboHotSearch
* @Description: 微博实时热搜采集 * @Description: 微博实时热搜采集
* @author hero * @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
@Log4j2 @Log4j2
...@@ -44,9 +44,9 @@ public class WeiboHotSearchCrawler { ...@@ -44,9 +44,9 @@ public class WeiboHotSearchCrawler {
private static RedisDao redisDao = new RedisDao(); private static RedisDao redisDao = new RedisDao();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端微博热搜采集) * @Description: TODO(PC端微博热搜采集)
* @return void 返回类型 * @return void 返回类型
*/ */
// public static List<HotSearchList> weiboHotSearch(){ // public static List<HotSearchList> weiboHotSearch(){
...@@ -103,14 +103,14 @@ public class WeiboHotSearchCrawler { ...@@ -103,14 +103,14 @@ public class WeiboHotSearchCrawler {
// } // }
// return list; // return list;
// } // }
/** /**
* @Title: weiboHotSearchByPhoneTest * @Title: weiboHotSearchByPhoneTest
* @author hero * @author hero
* @Description: TODO(手机端Iphone 微博热搜采集) * @Description: TODO(手机端Iphone 微博热搜采集)
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<HotSearchList> weiboHotSearchByPhone(Date date){ public static List<HotSearchList> weiboHotSearchByPhone(Date date){
...@@ -149,7 +149,7 @@ public class WeiboHotSearchCrawler { ...@@ -149,7 +149,7 @@ public class WeiboHotSearchCrawler {
for (int j = 0; j < cardGroup.size(); j++) { for (int j = 0; j < cardGroup.size(); j++) {
JSONObject cardInfo = cardGroup.getJSONObject(j); JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc"); String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr"); long hotCount = cardInfo.getLongValue("desc_extr");
String icon = cardInfo.getString("icon"); String icon = cardInfo.getString("icon");
if (StringUtils.isNotBlank(icon)) { if (StringUtils.isNotBlank(icon)) {
icon = icon.split("_")[1].split(".png")[0]; icon = icon.split("_")[1].split(".png")[0];
...@@ -205,7 +205,7 @@ public class WeiboHotSearchCrawler { ...@@ -205,7 +205,7 @@ public class WeiboHotSearchCrawler {
for(int i=0; i<jsonArray.size(); i++){ for(int i=0; i<jsonArray.size(); i++){
JSONObject cardInfo = jsonArray.getJSONObject(i); JSONObject cardInfo = jsonArray.getJSONObject(i);
String name = cardInfo.getString("desc"); String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr"); long hotCount = cardInfo.getIntValue("desc_extr");
String weiboUrl = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; String weiboUrl = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearchList = new HotSearchList(weiboUrl,name,hotCount,null,HotSearchType.微博预热榜.name(),date); HotSearchList hotSearchList = new HotSearchList(weiboUrl,name,hotCount,null,HotSearchType.微博预热榜.name(),date);
result.add(hotSearchList); result.add(hotSearchList);
......
...@@ -19,31 +19,31 @@ import org.slf4j.LoggerFactory; ...@@ -19,31 +19,31 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
/** /**
* *
* @ClassName: WeiboSuperTopicCrawler * @ClassName: WeiboSuperTopicCrawler
* @Description: 微博超话榜单采集(明星) * @Description: 微博超话榜单采集(明星)
* @author Bewilder ZW * @author Bewilder ZW
* @date 2019年9月27日 下午3:01:34 * @date 2019年9月27日 下午3:01:34
*/ */
@Log4j2 @Log4j2
public class WeiboSuperTopicCrawler { public class WeiboSuperTopicCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Map<String,String> headMap = new HashMap<>(); private static Map<String,String> headMap = new HashMap<>();
static { static {
headMap.put("X-Requested-With", "XMLHttpRequest"); headMap.put("X-Requested-With", "XMLHttpRequest");
headMap.put("Referer", "https://huati.weibo.cn/discovery/super?extparam=ctg1_2%7Cscorll_1&luicode=10000011&lfid=100803_-_super&sourceType=weixin"); headMap.put("Referer", "https://huati.weibo.cn/discovery/super?extparam=ctg1_2%7Cscorll_1&luicode=10000011&lfid=100803_-_super&sourceType=weixin");
headMap.put("Host", "huati.weibo.cn"); headMap.put("Host", "huati.weibo.cn");
} }
/** /**
* *
* 开始采集明星话题 * 开始采集明星话题
* @return void * @return void
*/ */
...@@ -52,7 +52,7 @@ public class WeiboSuperTopicCrawler { ...@@ -52,7 +52,7 @@ public class WeiboSuperTopicCrawler {
urlMap.put("明星", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=star&from=&wm="); urlMap.put("明星", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=star&from=&wm=");
urlMap.put("明星潜力", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=potential&from=&wm="); urlMap.put("明星潜力", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=potential&from=&wm=");
urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm="); urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm=");
List<WeiboSuperTopic> topicList = new ArrayList<>(); List<WeiboSuperTopic> topicList = new ArrayList<>();
for(Entry<String,String> entry : urlMap.entrySet()) { for(Entry<String,String> entry : urlMap.entrySet()) {
...@@ -81,10 +81,10 @@ public class WeiboSuperTopicCrawler { ...@@ -81,10 +81,10 @@ public class WeiboSuperTopicCrawler {
} }
return topicList; return topicList;
} }
/** /**
* *
* 解析话题榜单 * 解析话题榜单
* @param htmlBody * @param htmlBody
* @param type * @param type
...@@ -95,7 +95,7 @@ public class WeiboSuperTopicCrawler { ...@@ -95,7 +95,7 @@ public class WeiboSuperTopicCrawler {
JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list"); JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list");
if(Objects.nonNull(list) && !list.isEmpty()) { if(Objects.nonNull(list) && !list.isEmpty()) {
page = (page-1)*20; page = (page-1)*20;
List<WeiboSuperTopic> topicList = new ArrayList<>(); List<WeiboSuperTopic> topicList = new ArrayList<>();
Integer toprank = null; Integer toprank = null;
String topicName = null; String topicName = null;
...@@ -125,11 +125,11 @@ public class WeiboSuperTopicCrawler { ...@@ -125,11 +125,11 @@ public class WeiboSuperTopicCrawler {
} }
return Collections.emptyList(); return Collections.emptyList();
} }
/** /**
* *
* 根据单一话题id获取话题阅读数及发帖数 * 根据单一话题id获取话题阅读数及发帖数
* @param id * @param id
* @param topic * @param topic
...@@ -159,8 +159,8 @@ public class WeiboSuperTopicCrawler { ...@@ -159,8 +159,8 @@ public class WeiboSuperTopicCrawler {
} }
return topic; return topic;
} }
} }
...@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON; ...@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic; import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
...@@ -169,8 +169,8 @@ public class WeiboTopicCrawler { ...@@ -169,8 +169,8 @@ public class WeiboTopicCrawler {
String topicName = null; String topicName = null;
String url = null; String url = null;
String description = null; String description = null;
Integer commentNum = null; Long commentNum = null;
Integer readNum = null; Long readNum = null;
String desc2 = null; String desc2 = null;
for(int i=0; i<cards.size(); i++) { for(int i=0; i<cards.size(); i++) {
JSONObject cardGroup = cards.getJSONObject(i); JSONObject cardGroup = cards.getJSONObject(i);
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -57,7 +57,7 @@ public class XinLangHotSearchCrawler { ...@@ -57,7 +57,7 @@ public class XinLangHotSearchCrawler {
String name = jsonArray.getJSONObject(i).getString("text"); String name = jsonArray.getJSONObject(i).getString("text");
Integer rank = i + 1; Integer rank = i + 1;
String hotValue = jsonArray.getJSONObject(i).getString("hotValue"); String hotValue = jsonArray.getJSONObject(i).getString("hotValue");
Integer count = TipsUtils.getHotCount(hotValue); Long count = TipsUtils.getHotCount(hotValue);
String showTags; String showTags;
if (jsonArray.getJSONObject(i).containsKey("card")){ if (jsonArray.getJSONObject(i).containsKey("card")){
JSONArray cardArray = jsonArray.getJSONObject(i).getJSONArray("card"); JSONArray cardArray = jsonArray.getJSONObject(i).getJSONArray("card");
...@@ -124,7 +124,7 @@ public class XinLangHotSearchCrawler { ...@@ -124,7 +124,7 @@ public class XinLangHotSearchCrawler {
Integer rank = i + 1; Integer rank = i + 1;
String name = dataJson.getJSONObject(i).getString("title"); String name = dataJson.getJSONObject(i).getString("title");
String xinlangUrl = dataJson.getJSONObject(i).getString("wapurl"); String xinlangUrl = dataJson.getJSONObject(i).getString("wapurl");
Integer hot = dataJson.getJSONObject(i).getIntValue("hot_value"); Long hot = dataJson.getJSONObject(i).getLongValue("hot_value");
HotSearchList hotSearchList = new HotSearchList(xinlangUrl, name, hot, rank, HotSearchType.新浪热点.name(),date); HotSearchList hotSearchList = new HotSearchList(xinlangUrl, name, hot, rank, HotSearchType.新浪热点.name(),date);
hotSearchLists.add(hotSearchList); hotSearchLists.add(hotSearchList);
} }
......
...@@ -5,8 +5,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -5,8 +5,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
...@@ -52,7 +52,7 @@ public class ZhihuChildHotSearchCrawler { ...@@ -52,7 +52,7 @@ public class ZhihuChildHotSearchCrawler {
Integer rank = i + 1; Integer rank = i + 1;
String name = jsonObject.getJSONObject("title_area").getString("text"); String name = jsonObject.getJSONObject("title_area").getString("text");
String hotCountString = jsonObject.getJSONObject("metrics_area").getString("text"); String hotCountString = jsonObject.getJSONObject("metrics_area").getString("text");
Integer count = TipsUtils.getHotCount(hotCountString.substring(0, hotCountString.indexOf("领域热度"))); Long count = TipsUtils.getHotCount(hotCountString.substring(0, hotCountString.indexOf("领域热度")));
String childUrl = jsonObject.getJSONObject("link").getString("url"); String childUrl = jsonObject.getJSONObject("link").getString("url");
HotSearchList hotSearchList = new HotSearchList(childUrl, name, count, rank, HotSearchType.知乎热搜.name() + typeName + "分类",date); HotSearchList hotSearchList = new HotSearchList(childUrl, name, count, rank, HotSearchType.知乎热搜.name() + typeName + "分类",date);
list.add(hotSearchList); list.add(hotSearchList);
......
...@@ -15,26 +15,26 @@ import org.slf4j.LoggerFactory; ...@@ -15,26 +15,26 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
/** /**
* @ClassName: ZhihuHotCrawler * @ClassName: ZhihuHotCrawler
* @Description: 知乎热搜采集程序 * @Description: 知乎热搜采集程序
* @author hero * @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
@Log4j2 @Log4j2
public class ZhihuHotSearchCrawler { public class ZhihuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* @Title: getZhihuHotList * @Title: getZhihuHotList
* @author hero * @author hero
* @Description: 知乎热搜采集程序 * @Description: 知乎热搜采集程序
* @return void 返回类型 * @return void 返回类型
*/ */
...@@ -74,14 +74,14 @@ public class ZhihuHotSearchCrawler { ...@@ -74,14 +74,14 @@ public class ZhihuHotSearchCrawler {
// } // }
// return list; // return list;
// } // }
/** /**
* @Title: getMobileZhihuHotList * @Title: getMobileZhihuHotList
* @author hero * @author hero
* @Description: 移動端知乎熱搜榜 * @Description: 移動端知乎熱搜榜
* @param @return 设定文件 * @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型 * @return List<ZhihuHotSearch> 返回类型
*/ */
public static List<HotSearchList> getMobileZhihuHotList(Date date){ public static List<HotSearchList> getMobileZhihuHotList(Date date){
...@@ -106,7 +106,7 @@ public class ZhihuHotSearchCrawler { ...@@ -106,7 +106,7 @@ public class ZhihuHotSearchCrawler {
JSONArray dataJson = topSearch.getJSONArray("data"); JSONArray dataJson = topSearch.getJSONArray("data");
String link = null; String link = null;
String displayQuery = null; String displayQuery = null;
Integer hotCount = null; Long hotCount = null;
String hotText = null; String hotText = null;
for (int i = 0; i < dataJson.size(); i++) { for (int i = 0; i < dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i).getJSONObject("target"); JSONObject data = dataJson.getJSONObject(i).getJSONObject("target");
...@@ -118,12 +118,12 @@ public class ZhihuHotSearchCrawler { ...@@ -118,12 +118,12 @@ public class ZhihuHotSearchCrawler {
try { try {
if (hotText.contains("万")) { if (hotText.contains("万")) {
hotText = hotText.replaceAll("万.*", "").trim(); hotText = hotText.replaceAll("万.*", "").trim();
hotCount = (int) (Double.parseDouble(hotText) * 10000); hotCount = (long) (Double.parseDouble(hotText) * 10000);
} else if (hotText.contains("亿")) { } else if (hotText.contains("亿")) {
hotText = hotText.replaceAll("亿.*", "").trim(); hotText = hotText.replaceAll("亿.*", "").trim();
hotCount = (int) (Double.parseDouble(hotText) * 100000000); hotCount = (long) (Double.parseDouble(hotText) * 100000000);
} else { } else {
hotCount = Integer.getInteger(hotText); hotCount = Long.getLong(hotText);
} }
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
......
...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler; ...@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType; import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
......
package com.zhiwei.searchhotcrawler.run; package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig; import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener; import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
...@@ -15,14 +15,14 @@ import java.util.concurrent.TimeUnit; ...@@ -15,14 +15,14 @@ import java.util.concurrent.TimeUnit;
public class HotSearchRun { public class HotSearchRun {
public static void main(String[] args) { public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml"); ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry) SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build(); .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig); ProxyFactory.init(simpleConfig);
new UpdateWechatUserRun().start(); new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000); ZhiWeiTools.sleep(10000);
// new CacheListener().startListen(); // new CacheListener().startListen();
......
package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBLocalTemplate;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Log4j2
public class Job51Test {
public static void main(String[] args) {
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
List<HotSearchList> list = new ArrayList<>();
String url = "https://search.51job.com/list/080300,000000,0000,00,9,99,java,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=";
Map<String,Object> header = new HashMap<>();
header.put("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
header.put("Accept-Encoding","gzip, deflate, br");
header.put("Accept-Language","zh-CN,zh;q=0.9");
header.put("Cache-Control","max-age=0");
header.put("Connection","keep-alive");
header.put("Cookie","guid=1925f996c7ae446cdf1f579f113bff6e; _ujz=MTg3NDg4MTM4MA%3D%3D; ps=needv%3D0; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20210318%26%7C%26securetime%3DBztcaVQzWTsEZlJrWmJdPwQ2Ajw%253D; track=registertype%3D1; 51job=cuid%3D187488138%26%7C%26cusername%3Dphone_15757871020_202103189219%26%7C%26cpassword%3D%26%7C%26cname%3D%25B3%25C2%25EC%25BF%25CC%25CE%26%7C%26cemail%3D15757871020%2540163.com%26%7C%26cemailstatus%3D0%26%7C%26cnickname%3D%26%7C%26ccry%3D.0b4qUteozwmg%26%7C%26cconfirmkey%3D%25241%2524UXfAYBHG%2524Hni.5zaFu5kr7BN.eVcOU%252F%26%7C%26cautologin%3D1%26%7C%26cenglish%3D0%26%7C%26sex%3D0%26%7C%26cnamekey%3D%25241%2524CN04lL8j%2524kCHAFcf4TNh%252F2odmIqujW1%26%7C%26to%3D8019a57bb26817913b5f3c2080ba5792605354bf%26%7C%26; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60080300%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21");
header.put("Host","search.51job.com");
header.put("Referer","https://search.51job.com/list/080300,000000,0000,00,9,99,%2B,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=");
header.put("sec-ch-ua","\"Google Chrome\";v=\"89\", \"Chromium\";v=\"89\", \";Not A Brand\";v=\"99\"");
header.put("Sec-Fetch-Dest","document");
header.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36");
// header.put("","");
JSONObject jsonObject = null;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,header);
for (int t = 0; t < 1 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody);
log.info("document:{}",document);
log.info("======================");
String html = document.getElementsByClass("j_joblist").first().html();
log.info("html:{}",html);
jsonObject = JSONObject.parseObject(html);
if (jsonObject != null) {
// JSONArray dataJson = jsonObject.getJSONObject("initialState").getJSONObject("topsearch").getJSONArray("data");
// for (int i = 0; i < dataJson.size(); i++) {
// Integer rank = i + 1;
// JSONObject data = dataJson.getJSONObject(i);
// String name = data.getString("queryDisplay");
// String realQuery = data.getString("realQuery");
// String zhihuUrl = "https://www.zhihu.com/search?q=" + realQuery + "&utm_content=search_hot&type=content";
//
// }
}
} else {
log.error("临时爬取出问题");
}
}
}
}
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.MediaType; import okhttp3.MediaType;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.RequestBody; import okhttp3.RequestBody;
......
...@@ -68,16 +68,16 @@ public class TipsUtils { ...@@ -68,16 +68,16 @@ public class TipsUtils {
* @param hotCountString * @param hotCountString
* @return * @return
*/ */
public static Integer getHotCount(String hotCountString){ public static Long getHotCount(String hotCountString){
Integer count; Long count;
if(hotCountString.contains("万")){ if(hotCountString.contains("万")){
hotCountString = hotCountString.replaceAll("万.*", "").trim(); hotCountString = hotCountString.replaceAll("万.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*10000); count = (long)(Double.parseDouble(hotCountString)*10000);
}else if(hotCountString.contains("亿")){ }else if(hotCountString.contains("亿")){
hotCountString = hotCountString.replaceAll("亿.*", "").trim(); hotCountString = hotCountString.replaceAll("亿.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*100000000); count = (long)(Double.parseDouble(hotCountString)*100000000);
}else{ }else{
count = (int)(Double.parseDouble(hotCountString)); count = (long)(Double.parseDouble(hotCountString));
} }
return count; return count;
} }
......
...@@ -14,7 +14,7 @@ import org.slf4j.LoggerFactory; ...@@ -14,7 +14,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.MediaType; import okhttp3.MediaType;
...@@ -139,9 +139,9 @@ public class WechatCodeUtil { ...@@ -139,9 +139,9 @@ public class WechatCodeUtil {
} }
return null; return null;
} }
public static List<String> getUserListByGroupId(Integer groupId) { public static List<String> getUserListByGroupId(Integer groupId) {
try { try {
String token = getToken(); String token = getToken();
...@@ -180,7 +180,7 @@ public class WechatCodeUtil { ...@@ -180,7 +180,7 @@ public class WechatCodeUtil {
} }
/*** /***
* *
* @Title: getGroupIp * @Title: getGroupIp
* @author hero * @author hero
* @Description: 根据分组名称获取分组id * @Description: 根据分组名称获取分组id
...@@ -218,7 +218,7 @@ public class WechatCodeUtil { ...@@ -218,7 +218,7 @@ public class WechatCodeUtil {
} }
return groupId; return groupId;
} }
/** /**
* 查询公众号下的所有分组 * 查询公众号下的所有分组
* @return * @return
......
#redis.host=127.0.0.1 #redis.host=115.236.59.91
#redis.port=6379 #redis.port=7382
#redis.password= #redis.password=
#redis #redis
#redis.host = 192.168.0.39
#redis.port = 7382
#redis.database = 3
#redis
redis.host = 192.168.0.39 redis.host = 192.168.0.39
redis.port = 6379 redis.port = 6379
redis.database = 1 redis.database = 1
#maxIdle #maxIdle
redis.maxIdle=20 redis.maxIdle=20
#minIdle #minIdle
...@@ -14,4 +19,4 @@ redis.maxTotal=20 ...@@ -14,4 +19,4 @@ redis.maxTotal=20
#timeout #timeout
redis.timeout=5000 redis.timeout=5000
redis.testOnBorrow=false redis.testOnBorrow=false
redis.testOnReturn=false redis.testOnReturn=false
\ No newline at end of file
/**
* ***************************************************
* Copyright (C), NingBo ZhiWeiReach info. Co., Ltd. *
*****************************************************
* 类的详细说明
*
* @author 东临碣石
* @Date 2016年1月16日
* @version 1.00
*/
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
/**
* @Description: SpringTest的父类,用来加载基础的配置文件
* @date 2016年1月16日 上午11:40:14
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{ "classpath:applicationContext.xml" })
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests
{
}
package weiboTest;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.RedisConfig;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.io.IOException;
import java.util.*;
/**
* @author cwt
* @date 2021/5/26 10:35
*/
@Log4j2
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{ "classpath:applicationContext.xml" })
public class WeiboHotSearchTest{
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
@Test
public void test(){
Document document = Jsoup.parse("a href=\"https://m.weibo.cn/search?containerid=231522type%3D1%26t%3D10%26q%3D%23%E9%82%93%E4%BC%A6%E8%AE%B2%E6%88%8F%E4%B8%93%E4%B8%9A%23&extparam=%23%E9%82%93%E4%BC%A6%E8%AE%B2%E6%88%8F%E4%B8%93%E4%B8%9A%23&luicode=10000011&lfid=100103type%3D1%26t%3D10%26q%3D%23%E9%82%93%E4%BC%A6%E8%AE%B2%E6%88%8F%E4%B8%93%E4%B8%9A%23\" data-hide=\"\"><span class=\"surl-text\">#邓伦讲戏专业#</span></a><a href=\"https://m.weibo.cn/search?containerid=231522type%3D1%26t%3D10%26q%3D%23%E6%9E%81%E9%99%90%E6%8C%91%E6%88%98%23&luicode=10000011&lfid=100103type%3D1%26t%3D10%26q%3D%23%E9%82%93%E4%BC%A6%E8%AE%B2%E6%88%8F%E4%B8%93%E4%B8%9A%23\" data-hide=\"\"><span class=\"surl-text\">#极限挑战#</span></a> <a href='/n/邓伦'>@邓伦</a> 和<a href='/n/景甜'>@景甜</a> 改编《甄嬛传》剧本,伦伦认真讲戏的样子让人瞬间穿越到拍摄现场。看来戏瘾上身的邓伦还过了一把导演的瘾,这专业的模样要不要考虑跨界当当导演呀~<span class=\"url-icon\"><img alt=[哈哈] src=\"https://h5.sinaimg.cn/m/emoticon/icon/default/d_haha-0ec05e6dad.png\" style=\"width:1em; height:1em;\" /></span><a data-url=\"http://t.cn/A6VJPN9w\" href=\"https://video.weibo.com/show?fid=1034:4640837901156490\" data-hide=\"\"><span class='url-icon'><img style='width: 1rem;height: 1rem' src='https://h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_video_default.png'></span><span class=\"surl-text\">东方卫视极限挑战的微博视频</span></a>");
System.out.println(document.text());
}
@Test
public void testHotWeibo(){
Date date = new Date();
List<HotSearchList> hotSearchLists = weiboHotSearchByPhone(date);
for (HotSearchList hotSearchList : hotSearchLists) {
}
}
/**
* 微博热搜数据更新导语,阅读量,讨论量
* @param document
* @return
*/
public static org.bson.Document weiboUpdate(org.bson.Document document) {
log.info("更新微博热搜{}导语阅读量和讨论量",document.getString("name"));
String url = "https://m.weibo.cn/api/container/getIndex?"+ document.getString("url").substring(
document.getString("url").indexOf("?")+1,document.getString("url").indexOf("&"));
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for(int count =0; count<=5; count++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博热搜详情页面时出现连接失败", e);
}
if (htmlBody != null && htmlBody.contains("data")) {
JSONObject dataJson = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONObject cardlistInfoJson = dataJson.getJSONObject("cardlistInfo");
List<JSONObject> cardsJsons = (List<JSONObject>) dataJson.get("cards");
//解析cardlistInfo,讨论、导语、阅读
if(cardlistInfoJson.containsKey("desc")){
String topicLead = cardlistInfoJson.getString("desc");
if(!"".equals(topicLead)) {
document.put("topicLead", topicLead);
}
}
if(cardlistInfoJson.containsKey("cardlist_head_cards")){
JSONObject readJson = cardlistInfoJson.getJSONArray("cardlist_head_cards").getJSONObject(0);
if (readJson.containsKey("head_data")) {
String midText = readJson.getJSONObject("head_data").getString("midtext");
String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim();
String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim();
String pictureUrl = readJson.getJSONObject("head_data").getString("portrait_url");
document.put("readCount", TipsUtils.getHotCount(read));
document.put("discussCount", TipsUtils.getHotCount(discussCount));
document.put("pictureUrl",pictureUrl);
if (readJson.getJSONObject("head_data").containsKey("downtext")){
String downtext = readJson.getJSONObject("head_data").getString("downtext");
if(!"".equals(downtext)) {
document.put("downtext",downtext.replaceAll("主持人:",""));
}
}
}
}
//解析cards,获取热门微博、人物
for (JSONObject jsonObject : cardsJsons) {
}
return document;
}
}
return null;
}
public JSONObject analysisWeiboSon(JSONObject readJson){
return null;
}
/**
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearchByPhone(Date date){
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for(int count =0; count<=5; count++){
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("解析微博时热搜时出现连接失败",e);
}
List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
int rank = 0;
// for (int i = 0; i < cards.size(); i++) {
try {
JSONObject card = cards.getJSONObject(0);
JSONArray cardGroup = card.getJSONArray("card_group");
JSONObject topCard =cardGroup.getJSONObject(0);
if(!topCard.containsKey("pic")){
rank = 1;
}
if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) {
// String title = card.getString("title");
boolean hot = true;
// if (Objects.nonNull(title) && title.contains("实时上升热点")) {
// hot = false;
// rank = 51;
// }
for (int j = 0; j < cardGroup.size(); j++) {
JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc");
long hotCount = cardInfo.getIntValue("desc_extr");
String icon = cardInfo.getString("icon");
if (StringUtils.isNotBlank(icon)) {
icon = icon.split("_")[1].split(".png")[0];
}
// String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
String id = cardInfo.getString("scheme");
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon, date);
result.add(hotSearch);
rank++;
// redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS,name+"_微博热搜");
}
} else {
log.info("card 数据结构为:{}", card);
}
} catch (Exception e) {
log.error("解析微博时热搜时出现解析错误", e);
continue;
}
// }
return result;
} catch (Exception e) {
log.error("解析微博时热搜时出现解析错误,数据不是json结构", e);
}
} else {
log.info("解析微博时热搜时出现解析错误,页面结构有问题");
}
}
return Collections.emptyList();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment