Commit b9b6305c by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !79
parents 6f3b50b7 1c04790d
......@@ -113,6 +113,12 @@
<artifactId>jedis</artifactId>
<version>2.8.1</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
......
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.mongodb:mongo-java-driver:3.12.2" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei:sendmail:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: javax.mail:mail:1.4.7" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.tools:zhiwei-tools:0.1.6-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.58" level="project" />
<orderEntry type="library" name="Maven: de.ruedigermoeller:fst:2.57" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.8.8" level="project" />
<orderEntry type="library" name="Maven: org.javassist:javassist:3.21.0-GA" level="project" />
<orderEntry type="library" name="Maven: org.objenesis:objenesis:2.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.8.1" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:crawler-core:0.6.7.4-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.14.9" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.17.2" level="project" />
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.13.1" level="project" />
<orderEntry type="library" name="Maven: cn.wanghaomiao:JsoupXpath:2.3.2" level="project" />
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.7" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.20" level="project" />
<orderEntry type="library" name="Maven: org.brotli:dec:0.1.2" level="project" />
<orderEntry type="library" name="Maven: com.ibm.icu:icu4j:67.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:29.0-jre" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:failureaccess:1.0.1" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava" level="project" />
<orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:3.0.2" level="project" />
<orderEntry type="library" name="Maven: org.checkerframework:checker-qual:2.11.1" level="project" />
<orderEntry type="library" name="Maven: com.google.errorprone:error_prone_annotations:2.3.4" level="project" />
<orderEntry type="library" name="Maven: com.google.j2objc:j2objc-annotations:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-core:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-1.2-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.8.0-beta4" level="project" />
<orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.async:task-boot:0.0.3-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: com.zhiwei.crawler:proxy-client:1.0.5-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.apache.dubbo:dubbo:2.7.4.1" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-all:4.1.25.Final" level="project" />
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.12.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.8" level="project" />
<orderEntry type="library" name="Maven: jline:jline:0.9.94" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty:3.7.0.Final" level="project" />
<orderEntry type="library" name="Maven: com.kohlschutter.boilerpipe:boilerpipe-extractor:0.0.1-SNAPSHOT" level="project" />
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.8" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: aopalliance:aopalliance:1.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-test:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context-support:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-web:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:4.2.2.RELEASE" level="project" />
<orderEntry type="library" name="Maven: redis.clients:jedis:2.8.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
</component>
</module>
\ No newline at end of file
......@@ -33,12 +33,12 @@ public class HotSearchCache {
/**
* 最高热搜值
*/
private Integer highestCount;
private Long highestCount;
/**
* 最新热搜热度值
*/
private Integer lastCount;
private Long lastCount;
/**
* 状态(true 为热搜; false为时时上升)
......@@ -83,12 +83,12 @@ public class HotSearchCache {
/**
* 阅读量
*/
private Integer readCount;
private Long readCount;
/**
* 讨论量
*/
private Integer discussCount;
private Long discussCount;
/**
* 话题真假(腾讯较真榜使用)
......@@ -98,7 +98,7 @@ public class HotSearchCache {
/**
* 首次上榜热度
*/
private Integer firstCount;
private Long firstCount;
/** 详情页图片(微博平台) */
private String pictureUrl;
......@@ -113,7 +113,7 @@ public class HotSearchCache {
*/
private String downtext;
public HotSearchCache(String url, String name, String topicLead, Integer highestCount, Integer lastCount, Boolean hot,
public HotSearchCache(String url, String name, String topicLead, Long highestCount, Long lastCount, Boolean hot,
Date startTime, Date endTime, Integer highestRank, Integer lastRank, String type, Integer duration){
this.id = name + "_" + type;
this.url = url;
......@@ -135,21 +135,21 @@ public class HotSearchCache {
public void setRecommend(Boolean recommend) { this.recommend = recommend; }
public Integer getReadCount() { return readCount; }
public Long getReadCount() { return readCount; }
public void setReadCount(Integer readCount) { this.readCount = readCount; }
public void setReadCount(Long readCount) { this.readCount = readCount; }
public Integer getDiscussCount() { return discussCount; }
public Long getDiscussCount() { return discussCount; }
public void setDiscussCount(Integer discussCount) { this.discussCount = discussCount; }
public void setDiscussCount(Long discussCount) { this.discussCount = discussCount; }
public String getTopicLead() { return topicLead; }
public void setTopicLead(String topicLead) { this.topicLead = topicLead; }
public Integer getFirstCount() { return firstCount; }
public Long getFirstCount() { return firstCount; }
public void setFirstCount(Integer firstCount) { this.firstCount = firstCount; }
public void setFirstCount(Long firstCount) { this.firstCount = firstCount; }
public String getPictureUrl() { return pictureUrl; }
......
package com.zhiwei.searchhotcrawler.bean;
/**
* @ClassName: WeiboHotSearch
* @Description: TODO(微博时时热搜)
* @author hero
* @ClassName: WeiboHotSearch
* @Description: TODO(微博时时热搜)
* @author hero
* @date 2017年9月26日 下午5:41:11
*/
......@@ -42,7 +42,7 @@ public class HotSearchList implements Serializable{
/**
* 热搜量
*/
private Integer count;
private Long count;
/**
* 状态(true 为热搜; false为时时上升)
......@@ -77,7 +77,7 @@ public class HotSearchList implements Serializable{
/**
* 话题讨论量或阅读量
*/
private Integer commentCount;
private Long commentCount;
/**
* 话题真假结果(腾讯较真榜使用)
......@@ -87,12 +87,12 @@ public class HotSearchList implements Serializable{
/**
* 观看数(目前近B站排行榜使用)
*/
private Integer view;
private Long view;
/**
* 弹幕数(目前仅B站排行榜使用)
*/
private Integer barrage;
private Long barrage;
/**
* 图片地址
......@@ -100,9 +100,9 @@ public class HotSearchList implements Serializable{
private String pictureUrl;
public HotSearchList(){}
public HotSearchList(String url, String name, Integer count,Boolean hot,Integer rank,String type,String icon,Date date){
this.id = name + "_" + new Date().getTime() + "_" + type;
public HotSearchList(String url, String name, Long count,Boolean hot,Integer rank,String type,String icon,Date date){
this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url;
this.name = name;
this.count = count;
......@@ -113,10 +113,10 @@ public class HotSearchList implements Serializable{
this.type = type;
this.icon = icon;
}
public HotSearchList(String url, String name, Integer count,Integer rank,String type,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type;
public HotSearchList(String url, String name, Long count,Integer rank,String type,Date date){
this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url;
this.name = name;
this.count = count;
......@@ -128,8 +128,8 @@ public class HotSearchList implements Serializable{
}
public HotSearchList(String url, String name, Integer count,Integer rank,String type, Integer commentCount, String topicLead,Date date){
this.id = name + "_" + new Date().getTime()+ "_" + type;
public HotSearchList(String url, String name, Long count,Integer rank,String type, Long commentCount, String topicLead,Date date){
this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url;
this.name = name;
this.count = count;
......@@ -142,8 +142,8 @@ public class HotSearchList implements Serializable{
this.topicLead = topicLead;
}
public HotSearchList(String url, String name, Integer count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){
this.id = name + "_" + new Date().getTime() + "_" + type;
public HotSearchList(String url, String name, Long count, Boolean hot,Integer rank, String type, Date date, String icon, String topicResult){
this.id = name + "_" + System.currentTimeMillis() + "_" + type;
this.url = url;
this.name = name;
this.hot = hot;
......@@ -156,8 +156,8 @@ public class HotSearchList implements Serializable{
this.topicResult = topicResult;
}
public HotSearchList(String url, String name, String topicLead, Integer count, Boolean hot, Date time, Integer rank, String type, Integer view, Integer barrage, String pictureUrl) {
this.id = name + "_" + new Date().getTime()+ "_" + type;
public HotSearchList(String url, String name, String topicLead, Long count, Boolean hot, Date time, Integer rank, String type, Long view, Long barrage, String pictureUrl) {
this.id = name + "_" + System.currentTimeMillis()+ "_" + type;
this.url = url;
this.name = name;
this.topicLead = topicLead;
......
......@@ -3,6 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import java.time.Duration;
import java.util.*;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
......@@ -14,8 +16,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -29,7 +29,7 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
public class BaiDuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
/**
* @Title: BaiDuHotSearchTest
* @author hero
......@@ -40,7 +40,7 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
try(Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (Exception e) {
log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
......@@ -52,8 +52,8 @@ public class BaiDuHotSearchCrawler {
}
return Collections.emptyList();
}
/**
* 解析数据
* @param htmlBody
......@@ -96,7 +96,7 @@ public class BaiDuHotSearchCrawler {
else if (!element.select("td.last").select("span.icon-fair").isEmpty()) {
hot = element.select("td.last").select("span.icon-fair").text();
}
int count = 0;
long count = 0;
// 判断hot是否为空
if (StringUtils.isNotBlank(hot)) {
count = Integer.valueOf(hot);
......@@ -122,4 +122,4 @@ public class BaiDuHotSearchCrawler {
return list;
}
}
\ No newline at end of file
}
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -52,16 +52,16 @@ public class BililiCrawler {
int rank = i+1;
String name = data.getString("title");
String topicLead = data.getString("desc");
int count = data.getIntValue("score");
long count = data.getLongValue("score");
String bvid = data.getString("bvid");
String pic = data.getString("pic");
String bUrl = "https://www.bilibili.com/video/"+bvid;
Integer view = null;
Integer barrage = null;
Long view = null;
Long barrage = null;
if(data.containsKey("stat")) {
JSONObject stat = data.getJSONObject("stat");
view = stat.getIntValue("view");
barrage = stat.getIntValue("danmaku");
view = stat.getLongValue("view");
barrage = stat.getLongValue("danmaku");
}
HotSearchList hotSearchList = new HotSearchList(bUrl,name,topicLead,count,null,date,rank,HotSearchType.B站排行榜.name(),view,barrage,pic);
hotSearchLists.add(hotSearchList);
......
......@@ -15,8 +15,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -30,16 +30,16 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchType;
*/
@Log4j2
public class DouyinHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).build();
public static List<HotSearchList> list = new ArrayList<>();
/**
* @Title: getMobileDouyinHotList
* @author hero
* @Description: 移动端抖音热搜榜
* @param @return 设定文件
* @Title: getMobileDouyinHotList
* @author hero
* @Description: 移动端抖音热搜榜
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<HotSearchList> getMobileDouyinHotList(Date date){
......@@ -69,8 +69,8 @@ public class DouyinHotSearchCrawler {
word = wl.getString("word");
//获取热度值
hotValueStr = wl.getString("hot_value");
Integer hotValue = null;
hotValue = Integer.valueOf(hotValueStr);
Long hotValue = null;
hotValue = Long.valueOf(hotValueStr);
// logger.info("热度为:::{}", hot_value);
HotSearchList douyin = new HotSearchList(null, word, hotValue, position, HotSearchType.抖音热搜.name(),date);
list.add(douyin);
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -47,8 +47,8 @@ public class FengHuangSearchCrawler {
String topicLead = jsonArray.getJSONObject(i).getString("title");
String fenghuangUrl = jsonArray.getJSONObject(i).getJSONObject("link").getString("weburl");
String hotValue = jsonArray.getJSONObject(i).getJSONObject("hotLabel").getString("hotGrade");
Integer count = hotValue.length()>0 ? TipsUtils.getHotCount(hotValue) : 0;
Integer commentCount = jsonArray.getJSONObject(i).getIntValue("commentsall");
Long count = hotValue.length()>0 ? TipsUtils.getHotCount(hotValue) : 0;
Long commentCount = jsonArray.getJSONObject(i).getLongValue("commentsall");
HotSearchList hotSearchList = new HotSearchList(fenghuangUrl,name,count,
rank,HotSearchType.凤凰新闻热榜.name(),commentCount,topicLead,date);
list.add(hotSearchList);
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -52,7 +52,7 @@ public class MaiMaiHotSearchCrawler {
icon = jsonObject.getJSONObject("hot_type_card").getString("text");
}
String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info");
Integer count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0;
Long count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0;
HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date);
list.add(hotSearchList);
}
......
......@@ -17,8 +17,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
......@@ -120,7 +120,7 @@ public class SougoHotSearchCrawler {
JSONObject object = jsonArray.getJSONObject(j);
int rank = j+1;
String name = object.getString("name");
Integer count = object.getIntValue("num");
Long count = object.getLongValue("num");
String sougouUrl = "https://m.sogou.com/web/searchList.jsp?s_from=pcsearch&keyword=" + URLCodeUtil.getURLEncode(name, "utf-8");
String icon = object.getIntValue("tag") == 1 ? "热" : null;
HotSearchList hotSearchList = new HotSearchList(sougouUrl,name,count,false,rank,HotSearchType.搜狗微信客户端热搜.name(),icon,date);
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -45,7 +45,7 @@ public class SouhuTopicCrawler {
Integer rank = i+1;
String name = dataJson.getJSONObject(i).getJSONObject("eventNewsInfo").getString("title");
String hotValue = dataJson.getJSONObject(i).getString("value");
Integer count = 0;
Long count;
if(hotValue.contains("观点")) {
count = TipsUtils.getHotCount(hotValue.substring(0, hotValue.indexOf("观点")));
}else{
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.tools.ZhiWeiTools;
......@@ -50,17 +50,17 @@ public class TengXunCrawler {
String urlID = dataJson.getJSONObject(i).getString("id");
String tengxunUrl = null;
//String tengxunUrl = "https://view.inews.qq.com/topic/" + dataJson.getJSONObject(i).getString("id");
Integer count = 0;
Long count = null;
String icon = null;
if (dataJson.getJSONObject(i).containsKey("topic")) {
tengxunUrl = "https://view.inews.qq.com/topic/" + urlID;
count = dataJson.getJSONObject(i).getJSONObject("topic").getIntValue("ranking_score");
count = dataJson.getJSONObject(i).getJSONObject("topic").getLongValue("ranking_score");
if (dataJson.getJSONObject(i).getJSONObject("topic").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("topic").getString("rec_icon");
}
} else if (dataJson.getJSONObject(i).containsKey("hotEvent")) {
tengxunUrl = "https://view.inews.qq.com/hotEvent/" + urlID;
count = dataJson.getJSONObject(i).getJSONObject("hotEvent").getIntValue("hotScore");
count = dataJson.getJSONObject(i).getJSONObject("hotEvent").getLongValue("hotScore");
if (dataJson.getJSONObject(i).getJSONObject("hotEvent").containsKey("rec_icon")) {
icon = dataJson.getJSONObject(i).getJSONObject("hotEvent").getString("rec_icon");
}
......@@ -107,7 +107,7 @@ public class TengXunCrawler {
JSONObject jsonObject = jsonArray.getJSONObject(i);
Integer rank = jsonObject.getIntValue("index");
String name = jsonObject.getString("title");
Integer count = jsonObject.getIntValue("score");
Long count = jsonObject.getLongValue("score");
String tengxunUrl = jsonObject.getString("link");
String topicResult = jsonObject.getString("result");
HotSearchList hotSearchList = new HotSearchList(tengxunUrl,name,count,false,rank,HotSearchType.腾讯较真榜.name(),date,null,topicResult);
......
......@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
......@@ -77,7 +77,7 @@ public class ToutiaoHotSearchCrawler {
String name = word.getString("Title");
// String link = "https://ib.snssdk.com/search/?keyword=" + URLCodeUtil.getURLEncode(name, "utf-8") + "&pd=synthesis&source=trending_list&traffic_source=";
String link = word.getString("Url");
Integer hotCount = word.getInteger("HotValue");
Long hotCount = word.getLongValue("HotValue");
String wordsType = word.getString("Label");
String icon = getIcon(wordsType);
......@@ -170,7 +170,7 @@ public class ToutiaoHotSearchCrawler {
if (Objects.nonNull(elements) && !elements.isEmpty()) {
Element element = elements.first();
String readCount = element.text().replaceAll("阅读", "");
Integer count = TipsUtils.getHotCount(readCount);
Long count = TipsUtils.getHotCount(readCount);
log.info("{},阅读量:{}", hotSearchList.getName(), count);
hotSearchList.setCommentCount(count);
hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -51,7 +51,7 @@ public class WangYiHotSearchCrawler {
for (int i = 0; i < jsonObject.size(); i++) {
int rank = i + 1;
String name = jsonObject.getJSONObject(i).getString("title");
int count = jsonObject.getJSONObject(i).getIntValue("hotValue");
long count = jsonObject.getJSONObject(i).getLongValue("hotValue");
String contentId = jsonObject.getJSONObject(i).getString("contentId");
String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html";
HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易热榜.name(),date);
......@@ -90,7 +90,7 @@ public class WangYiHotSearchCrawler {
for (int i = 0; i < jsonObject.size(); i++) {
int rank = i + 1;
String name = jsonObject.getJSONObject(i).getString("doc_title");
int count = jsonObject.getJSONObject(i).getIntValue("hotScore")*10000;
long count = jsonObject.getJSONObject(i).getIntValue("hotScore")*10000;
String contentId = jsonObject.getJSONObject(i).getString("docId");
String wangyiUrl = "https://c.m.163.com/news/a/" + contentId + ".html";
HotSearchList hotSearchList = new HotSearchList(wangyiUrl, name, count, rank, HotSearchType.网易跟帖热议.name(),date);
......
......@@ -23,8 +23,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.mail.SendMailWeibo;
......@@ -32,9 +32,9 @@ import com.zhiwei.tools.tools.URLCodeUtil;
import org.springframework.beans.factory.annotation.Autowired;
/**
* @ClassName: WeiboHotSearch
* @ClassName: WeiboHotSearch
* @Description: 微博实时热搜采集
* @author hero
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
@Log4j2
......@@ -44,9 +44,9 @@ public class WeiboHotSearchCrawler {
private static RedisDao redisDao = new RedisDao();
/**
* @Title: weiboHotSearchTest
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @Title: weiboHotSearchTest
* @author hero
* @Description: TODO(PC端微博热搜采集)
* @return void 返回类型
*/
// public static List<HotSearchList> weiboHotSearch(){
......@@ -103,14 +103,14 @@ public class WeiboHotSearchCrawler {
// }
// return list;
// }
/**
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @Title: weiboHotSearchByPhoneTest
* @author hero
* @Description: TODO(手机端Iphone 微博热搜采集)
* @return void 返回类型
*/
public static List<HotSearchList> weiboHotSearchByPhone(Date date){
......@@ -149,7 +149,7 @@ public class WeiboHotSearchCrawler {
for (int j = 0; j < cardGroup.size(); j++) {
JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
long hotCount = cardInfo.getLongValue("desc_extr");
String icon = cardInfo.getString("icon");
if (StringUtils.isNotBlank(icon)) {
icon = icon.split("_")[1].split(".png")[0];
......@@ -205,7 +205,7 @@ public class WeiboHotSearchCrawler {
for(int i=0; i<jsonArray.size(); i++){
JSONObject cardInfo = jsonArray.getJSONObject(i);
String name = cardInfo.getString("desc");
int hotCount = cardInfo.getIntValue("desc_extr");
long hotCount = cardInfo.getIntValue("desc_extr");
String weiboUrl = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
HotSearchList hotSearchList = new HotSearchList(weiboUrl,name,hotCount,null,HotSearchType.微博预热榜.name(),date);
result.add(hotSearchList);
......
......@@ -19,31 +19,31 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
/**
*
*
* @ClassName: WeiboSuperTopicCrawler
* @Description: 微博超话榜单采集(明星)
* @author Bewilder ZW
* @author Bewilder ZW
* @date 2019年9月27日 下午3:01:34
*/
@Log4j2
public class WeiboSuperTopicCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Map<String,String> headMap = new HashMap<>();
static {
headMap.put("X-Requested-With", "XMLHttpRequest");
headMap.put("Referer", "https://huati.weibo.cn/discovery/super?extparam=ctg1_2%7Cscorll_1&luicode=10000011&lfid=100803_-_super&sourceType=weixin");
headMap.put("Host", "huati.weibo.cn");
}
/**
*
*
* 开始采集明星话题
* @return void
*/
......@@ -52,7 +52,7 @@ public class WeiboSuperTopicCrawler {
urlMap.put("明星", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=star&from=&wm=");
urlMap.put("明星潜力", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=potential&from=&wm=");
urlMap.put("明星上升", "https://huati.weibo.cn/aj/discovery/rank?cate_id=2&topic_to_page=&block_time=0&star_type=up&from=&wm=");
List<WeiboSuperTopic> topicList = new ArrayList<>();
for(Entry<String,String> entry : urlMap.entrySet()) {
......@@ -81,10 +81,10 @@ public class WeiboSuperTopicCrawler {
}
return topicList;
}
/**
*
*
* 解析话题榜单
* @param htmlBody
* @param type
......@@ -95,7 +95,7 @@ public class WeiboSuperTopicCrawler {
JSONArray list = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONArray("list");
if(Objects.nonNull(list) && !list.isEmpty()) {
page = (page-1)*20;
List<WeiboSuperTopic> topicList = new ArrayList<>();
Integer toprank = null;
String topicName = null;
......@@ -125,11 +125,11 @@ public class WeiboSuperTopicCrawler {
}
return Collections.emptyList();
}
/**
*
*
* 根据单一话题id获取话题阅读数及发帖数
* @param id
* @param topic
......@@ -159,8 +159,8 @@ public class WeiboSuperTopicCrawler {
}
return topic;
}
}
......@@ -4,8 +4,8 @@ import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
......@@ -169,8 +169,8 @@ public class WeiboTopicCrawler {
String topicName = null;
String url = null;
String description = null;
Integer commentNum = null;
Integer readNum = null;
Long commentNum = null;
Long readNum = null;
String desc2 = null;
for(int i=0; i<cards.size(); i++) {
JSONObject cardGroup = cards.getJSONObject(i);
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -57,7 +57,7 @@ public class XinLangHotSearchCrawler {
String name = jsonArray.getJSONObject(i).getString("text");
Integer rank = i + 1;
String hotValue = jsonArray.getJSONObject(i).getString("hotValue");
Integer count = TipsUtils.getHotCount(hotValue);
Long count = TipsUtils.getHotCount(hotValue);
String showTags;
if (jsonArray.getJSONObject(i).containsKey("card")){
JSONArray cardArray = jsonArray.getJSONObject(i).getJSONArray("card");
......@@ -124,7 +124,7 @@ public class XinLangHotSearchCrawler {
Integer rank = i + 1;
String name = dataJson.getJSONObject(i).getString("title");
String xinlangUrl = dataJson.getJSONObject(i).getString("wapurl");
Integer hot = dataJson.getJSONObject(i).getIntValue("hot_value");
Long hot = dataJson.getJSONObject(i).getLongValue("hot_value");
HotSearchList hotSearchList = new HotSearchList(xinlangUrl, name, hot, rank, HotSearchType.新浪热点.name(),date);
hotSearchLists.add(hotSearchList);
}
......
......@@ -5,8 +5,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
......@@ -52,7 +52,7 @@ public class ZhihuChildHotSearchCrawler {
Integer rank = i + 1;
String name = jsonObject.getJSONObject("title_area").getString("text");
String hotCountString = jsonObject.getJSONObject("metrics_area").getString("text");
Integer count = TipsUtils.getHotCount(hotCountString.substring(0, hotCountString.indexOf("领域热度")));
Long count = TipsUtils.getHotCount(hotCountString.substring(0, hotCountString.indexOf("领域热度")));
String childUrl = jsonObject.getJSONObject("link").getString("url");
HotSearchList hotSearchList = new HotSearchList(childUrl, name, count, rank, HotSearchType.知乎热搜.name() + typeName + "分类",date);
list.add(hotSearchList);
......
......@@ -15,26 +15,26 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil;
/**
* @ClassName: ZhihuHotCrawler
* @ClassName: ZhihuHotCrawler
* @Description: 知乎热搜采集程序
* @author hero
* @author hero
* @date 2017年9月15日 上午10:54:31
*/
@Log4j2
public class ZhihuHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
* @Title: getZhihuHotList
* @author hero
* @Title: getZhihuHotList
* @author hero
* @Description: 知乎热搜采集程序
* @return void 返回类型
*/
......@@ -74,14 +74,14 @@ public class ZhihuHotSearchCrawler {
// }
// return list;
// }
/**
* @Title: getMobileZhihuHotList
* @author hero
* @Description: 移動端知乎熱搜榜
* @param @return 设定文件
* @Title: getMobileZhihuHotList
* @author hero
* @Description: 移動端知乎熱搜榜
* @param @return 设定文件
* @return List<ZhihuHotSearch> 返回类型
*/
public static List<HotSearchList> getMobileZhihuHotList(Date date){
......@@ -106,7 +106,7 @@ public class ZhihuHotSearchCrawler {
JSONArray dataJson = topSearch.getJSONArray("data");
String link = null;
String displayQuery = null;
Integer hotCount = null;
Long hotCount = null;
String hotText = null;
for (int i = 0; i < dataJson.size(); i++) {
JSONObject data = dataJson.getJSONObject(i).getJSONObject("target");
......@@ -118,12 +118,12 @@ public class ZhihuHotSearchCrawler {
try {
if (hotText.contains("万")) {
hotText = hotText.replaceAll("万.*", "").trim();
hotCount = (int) (Double.parseDouble(hotText) * 10000);
hotCount = (long) (Double.parseDouble(hotText) * 10000);
} else if (hotText.contains("亿")) {
hotText = hotText.replaceAll("亿.*", "").trim();
hotCount = (int) (Double.parseDouble(hotText) * 100000000);
hotCount = (long) (Double.parseDouble(hotText) * 100000000);
} else {
hotCount = Integer.getInteger(hotText);
hotCount = Long.getLong(hotText);
}
} catch (Exception e) {
e.printStackTrace();
......
......@@ -3,8 +3,8 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.tools.httpclient.HeaderTool;
......
package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
......@@ -15,14 +15,14 @@ import java.util.concurrent.TimeUnit;
public class HotSearchRun {
public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
// new CacheListener().startListen();
......
package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.client.MongoDatabase;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.config.DBConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBLocalTemplate;
import com.zhiwei.searchhotcrawler.util.HttpClientUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Log4j2
public class Job51Test {
public static void main(String[] args) {
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
List<HotSearchList> list = new ArrayList<>();
String url = "https://search.51job.com/list/080300,000000,0000,00,9,99,java,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=";
Map<String,Object> header = new HashMap<>();
header.put("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
header.put("Accept-Encoding","gzip, deflate, br");
header.put("Accept-Language","zh-CN,zh;q=0.9");
header.put("Cache-Control","max-age=0");
header.put("Connection","keep-alive");
header.put("Cookie","guid=1925f996c7ae446cdf1f579f113bff6e; _ujz=MTg3NDg4MTM4MA%3D%3D; ps=needv%3D0; slife=lowbrowser%3Dnot%26%7C%26lastlogindate%3D20210318%26%7C%26securetime%3DBztcaVQzWTsEZlJrWmJdPwQ2Ajw%253D; track=registertype%3D1; 51job=cuid%3D187488138%26%7C%26cusername%3Dphone_15757871020_202103189219%26%7C%26cpassword%3D%26%7C%26cname%3D%25B3%25C2%25EC%25BF%25CC%25CE%26%7C%26cemail%3D15757871020%2540163.com%26%7C%26cemailstatus%3D0%26%7C%26cnickname%3D%26%7C%26ccry%3D.0b4qUteozwmg%26%7C%26cconfirmkey%3D%25241%2524UXfAYBHG%2524Hni.5zaFu5kr7BN.eVcOU%252F%26%7C%26cautologin%3D1%26%7C%26cenglish%3D0%26%7C%26sex%3D0%26%7C%26cnamekey%3D%25241%2524CN04lL8j%2524kCHAFcf4TNh%252F2odmIqujW1%26%7C%26to%3D8019a57bb26817913b5f3c2080ba5792605354bf%26%7C%26; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60080300%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAjava%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60080300%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21");
header.put("Host","search.51job.com");
header.put("Referer","https://search.51job.com/list/080300,000000,0000,00,9,99,%2B,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=");
header.put("sec-ch-ua","\"Google Chrome\";v=\"89\", \"Chromium\";v=\"89\", \";Not A Brand\";v=\"99\"");
header.put("Sec-Fetch-Dest","document");
header.put("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36");
// header.put("","");
JSONObject jsonObject = null;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,header);
for (int t = 0; t < 1 && jsonObject == null; t++) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
} catch (IOException e) {
log.error("知乎热搜页面连接异常", e);
}
if (htmlBody != null) {
Document document = Jsoup.parse(htmlBody);
log.info("document:{}",document);
log.info("======================");
String html = document.getElementsByClass("j_joblist").first().html();
log.info("html:{}",html);
jsonObject = JSONObject.parseObject(html);
if (jsonObject != null) {
// JSONArray dataJson = jsonObject.getJSONObject("initialState").getJSONObject("topsearch").getJSONArray("data");
// for (int i = 0; i < dataJson.size(); i++) {
// Integer rank = i + 1;
// JSONObject data = dataJson.getJSONObject(i);
// String name = data.getString("queryDisplay");
// String realQuery = data.getString("realQuery");
// String zhihuUrl = "https://www.zhihu.com/search?q=" + realQuery + "&utm_content=search_hot&type=content";
//
// }
}
} else {
log.error("临时爬取出问题");
}
}
}
}
package com.zhiwei.searchhotcrawler.util;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.MediaType;
import okhttp3.Request;
import okhttp3.RequestBody;
......
......@@ -68,16 +68,16 @@ public class TipsUtils {
* @param hotCountString
* @return
*/
public static Integer getHotCount(String hotCountString){
Integer count;
public static Long getHotCount(String hotCountString){
Long count;
if(hotCountString.contains("万")){
hotCountString = hotCountString.replaceAll("万.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*10000);
count = (long)(Double.parseDouble(hotCountString)*10000);
}else if(hotCountString.contains("亿")){
hotCountString = hotCountString.replaceAll("亿.*", "").trim();
count = (int)(Double.parseDouble(hotCountString)*100000000);
count = (long)(Double.parseDouble(hotCountString)*100000000);
}else{
count = (int)(Double.parseDouble(hotCountString));
count = (long)(Double.parseDouble(hotCountString));
}
return count;
}
......
......@@ -14,7 +14,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.httpclient.HeaderTool;
import okhttp3.MediaType;
......@@ -139,9 +139,9 @@ public class WechatCodeUtil {
}
return null;
}
public static List<String> getUserListByGroupId(Integer groupId) {
try {
String token = getToken();
......@@ -180,7 +180,7 @@ public class WechatCodeUtil {
}
/***
*
*
* @Title: getGroupIp
* @author hero
* @Description: 根据分组名称获取分组id
......@@ -218,7 +218,7 @@ public class WechatCodeUtil {
}
return groupId;
}
/**
* 查询公众号下的所有分组
* @return
......
#redis.host=127.0.0.1
#redis.port=6379
#redis.host=115.236.59.91
#redis.port=7382
#redis.password=
#redis
#redis
#redis.host = 192.168.0.39
#redis.port = 7382
#redis.database = 3
#redis
redis.host = 192.168.0.39
redis.port = 6379
redis.database = 1
#maxIdle
redis.maxIdle=20
#minIdle
......@@ -14,4 +19,4 @@ redis.maxTotal=20
#timeout
redis.timeout=5000
redis.testOnBorrow=false
redis.testOnReturn=false
\ No newline at end of file
redis.testOnReturn=false
/**
* ***************************************************
* Copyright (C), NingBo ZhiWeiReach info. Co., Ltd. *
*****************************************************
* 类的详细说明
*
* @author 东临碣石
* @Date 2016年1月16日
* @version 1.00
*/
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
/**
* @Description: SpringTest的父类,用来加载基础的配置文件
* @date 2016年1月16日 上午11:40:14
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{ "classpath:applicationContext.xml" })
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests
{
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment