Commit ea714ae2 by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !210
parents a8cc6e0b 3b65725a
......@@ -15,6 +15,11 @@
<log4j.version>2.15.0</log4j.version>
<commons-lang3.version>3.12.0</commons-lang3.version>
<http-boot.version>0.1.0.8-SNAPSHOT</http-boot.version>
<cynomys-consumer.version>0.0.5-SNAPSHOT</cynomys-consumer.version>
<proxy-client.version>2.0.1-SNAPSHOT</proxy-client.version>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
......@@ -56,7 +61,7 @@
<dependency>
<groupId>com.zhiwei.http</groupId>
<artifactId>http-boot</artifactId>
<version>0.0.8.2-SNAPSHOT</version>
<version>${http-boot.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
......@@ -67,8 +72,15 @@
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>proxy-client</artifactId>
<version>1.1.5-SNAPSHOT</version>
<version>${proxy-client.version}</version>
</dependency>
<dependency>
<groupId>com.zhiwei.network</groupId>
<artifactId>cynomys-consumer</artifactId>
<version>${cynomys-consumer.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.conscrypt/conscrypt-openjdk-uber -->
<dependency>
<groupId>org.conscrypt</groupId>
......
......@@ -12,15 +12,34 @@ public class ProxyConfig {
conf = new Properties();
conf.load(is);
is.close();
registry = conf.getProperty("registry");
group = conf.getProperty("group");
localRegistry = conf.getProperty("local.registry");
localGroup = conf.getProperty("local.group");
localUsername = conf.getProperty("local.username");
localPassword = conf.getProperty("local.password");
hangzhouRegistry = conf.getProperty("hangzhou.registry");
hangzhouGroup = conf.getProperty("hangzhou.group");
hangzhouUsername = conf.getProperty("hangzhou.username");
hangzhouPassword = conf.getProperty("hangzhou.password");
isLocal =Boolean.parseBoolean(conf.getProperty("isLocal"));
} catch (Exception e) {
e.printStackTrace();
}
}
public static boolean isLocal;
public static String localRegistry;
public static String localGroup;
public static String localUsername;
public static String localPassword;
public static String registry;
public static String group;
public static String hangzhouRegistry;
public static String hangzhouGroup;
public static String hangzhouUsername;
public static String hangzhouPassword;
}
......@@ -6,6 +6,7 @@ import java.util.*;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import lombok.extern.log4j.Log4j2;
......@@ -40,7 +41,7 @@ public class BaiDuHotSearchCrawler {
String url = "http://top.baidu.com/buzz?b=1&fr=topindex";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析百度风云榜时出现解析错误,页面结构有问题", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -53,7 +54,7 @@ public class BiliComprehensiveHotCrawler {
for (int i = 0; i < urlList.size(); i++) {
Request request = RequestUtils.wrapGet(urlList.get(i));
//发送请求每次获取20条数据
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error(fmt.format(date)+":第"+i+1+"次请求解析B站综合热门时出现连接失败", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -43,7 +44,7 @@ public class BililiCrawler {
String url = "https://api.bilibili.com/x/web-interface/ranking/v2?rid=0&type=all";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("B站排行榜页面连接失败",cause.fillInStackTrace());
......@@ -133,7 +134,7 @@ public class BililiCrawler {
Request request = RequestUtils.wrapGet(url);
try {
System.setProperty("https.protocols", "TLSv1,TLSv1.1,TLSv1.2,SSLv3");
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
String htmlBody = response.bodyString();
if (htmlBody != null && htmlBody.contains("v-wrap")) {
Document document = Jsoup.parse(htmlBody);
......@@ -181,7 +182,7 @@ public class BililiCrawler {
String url = "https://app.biliapi.com/x/v2/search/square?build=616050&limit=10";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("B站热搜页面连接失败",cause.fillInStackTrace());
......
......@@ -5,6 +5,7 @@ import java.util.*;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import lombok.extern.log4j.Log4j2;
......@@ -46,7 +47,7 @@ public class DouyinHotSearchCrawler {
String url = "https://api.amemv.com/aweme/v1/hot/search/list/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音热搜榜时出现问题:{}", cause);
......@@ -90,7 +91,7 @@ public class DouyinHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int x = 0; x < 3; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音热搜榜链接时出现问题:{}", cause);
......@@ -125,7 +126,7 @@ public class DouyinHotSearchCrawler {
String url = "https://api5-normal-c-lq.amemv.com/aweme/v1/hot/search/list/?board_type=2&board_sub_type=2&version_code=140900";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.debug("获取抖音娱乐榜榜时出现问题:{}", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -34,7 +35,7 @@ public class FengHuangSearchCrawler {
String url = "https://nine.ifeng.com/hotspotlist?gv=7.9.1&page="+page;
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("凤凰新闻热榜页面连接异常...", cause);
......@@ -74,7 +75,7 @@ public class FengHuangSearchCrawler {
String url = "https://shankapi.ifeng.com/autumn/sogouSearchHotword";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("凤凰新闻热搜页面连接异常...", cause);
......
......@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -46,7 +47,7 @@ public class HotSearch36KrCrawler {
headerMap.put("sec-fetch-dest","empty");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -42,7 +43,7 @@ public class HuXiuHotSearchCrawler {
headerMap.put("sec-ch-ua"," Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -36,7 +37,7 @@ public class KuaiShouHotSearchCrawler {
String url = "https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析快手热榜时出现解析错误,页面结构有问题", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -35,7 +36,7 @@ public class MaiMaiHotSearchCrawler {
String url = "https://open.taou.com/maimai/feed/v6/hot_posts_list?tab=profession&count=15&version=5.3.34&u=232258287&access_token=1.4c82e8ad6d6b4e03262a48f334dea336";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("脉脉热榜页面连接异常...", cause);
......
......@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.util.HeaderTool;
......@@ -46,7 +47,7 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析搜狗微信时出现解析错误,页面结构有问题", cause);
......@@ -90,7 +91,7 @@ public class SougoHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headMap);
for (int i = 0; i < 3; i++) {
String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析搜狗微信时出现解析错误,页面结构有问题", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -31,7 +32,7 @@ public class SouhuTopicCrawler {
String url = "https://api.k.sohu.com/api/news/moment/v2/list.go?pageSize=50&v=6.4.4";
Request request = RequestUtils.wrapGet(url);
for(int t=0; t<3 && dataJson==null; t++){
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("搜狐话题页面连接失败",cause.fillInStackTrace());
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -35,7 +36,7 @@ public class TaoBaoHotSearchCrawler {
String urls = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request1 = RequestUtils.wrapGet(urls);
String token = null;
Response response = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request1, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
......@@ -54,7 +55,7 @@ public class TaoBaoHotSearchCrawler {
String sign = MD5Util.getMD5(signs).toLowerCase();
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request = RequestUtils.wrapGet(url, headerMap);
Response response1 = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response1 = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -35,7 +36,7 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
......@@ -99,7 +100,7 @@ public class TengXunCrawler {
Request request = RequestUtils.wrapGet(url);
//采集为空最多重试3次
for (int t = 0; t < 3; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
......
......@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -49,7 +50,7 @@ public class ToutiaoHotSearchCrawler {
String jsUrl = "https://s3.pstatp.com/toutiao/feoffline/hot_list/resource/hot_list/js/index.45f50250.chunk.js";
Request jsRequest = RequestUtils.wrapGet(jsUrl);
String jsBody = null;
Response response = httpBoot.syncCall(jsRequest, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(jsRequest, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("获取今日头条实时热搜头部信息标识失败", cause);
......@@ -65,7 +66,7 @@ public class ToutiaoHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response1 = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response1 = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()) {
Throwable cause = response1.cause();
log.error("解析今日头条实时热搜时出现连接失败", cause);
......@@ -165,34 +166,29 @@ public class ToutiaoHotSearchCrawler {
String htmlBody = null;
String url = hotSearchList.getUrl();
Request request = RequestUtils.wrapGet(url);
for (int i = 0; i <= 5; i++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析今日头条热搜详情页面出现连接失败", cause);
} else {
htmlBody = response.bodyString();
}
if (StringUtils.isNotBlank(htmlBody)) {
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select(".result-content .cs-view .cs-topone-tail .cs-view .margin-bottom-m .margin-left-m");
if (Objects.nonNull(elements) && !elements.isEmpty()) {
Element element = elements.first();
String readCount = element.text().replaceAll("阅读", "");
Long count = TipsUtils.getHotCount(readCount);
log.info("{},阅读量:{}", hotSearchList.getName(), count);
hotSearchList.setCommentCount(count);
if (StringUtils.isNotBlank(htmlBody)&&htmlBody.contains("data")) {
try {
String substring = htmlBody.substring(htmlBody.indexOf("read_count")+12, htmlBody.indexOf("search_bar_controll"));
String s = substring.split(",")[0];
Long commentCount = Long.valueOf(s);
hotSearchList.setCommentCount(commentCount);
hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
return hotSearchList;
} catch (Exception e) {
e.printStackTrace();
}
}
ZhiWeiTools.sleep(1000L);
}
}
return hotSearchList;
}
/**
* 热搜类型
*
......@@ -231,7 +227,7 @@ public class ToutiaoHotSearchCrawler {
headerMap.put("User-Agent", "com.ss.android.article.news/8770 (Linux; U; Android 9; zh_CN; Redmi 8; Build/PKQ1.190319.001; Cronet/TTNetVersion:a867b489 2022-03-11 QuicVersion:b314d107 2021-11-24) Accept-Encoding: gzip, deflate, br");
Request request = RequestUtils.wrapGet(url, headerMap);
String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("获取今日头条榜单出错", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -41,7 +42,7 @@ public class WangYiHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
for(int t=0 ;t<3; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("网易新闻实时热榜页面连接异常...",cause);
......@@ -84,7 +85,7 @@ public class WangYiHotSearchCrawler {
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
for(int t=0 ;t<3; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("网易新闻跟贴热议页面连接异常...",cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -42,7 +43,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌总榜时出现连接失败", cause);
......@@ -76,7 +77,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌汽车榜时出现连接失败", cause);
......@@ -110,7 +111,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌手机榜时出现连接失败", cause);
......@@ -144,7 +145,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌美妆榜时出现连接失败", cause);
......@@ -178,7 +179,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌奢侈品榜时出现连接失败", cause);
......@@ -212,7 +213,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌食品饮料榜时出现连接失败", cause);
......@@ -246,7 +247,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌家电榜时出现连接失败", cause);
......@@ -280,7 +281,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌服装鞋帽榜时出现连接失败", cause);
......@@ -314,7 +315,7 @@ public class WeiBoBrandCrawler {
Request request = RequestUtils.wrapGet(url);
//重试两次
for (int x = 0; x < 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("解析微博品牌服装鞋帽榜时出现连接失败", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords;
......@@ -36,7 +37,7 @@ public class WeiBoSearchBoxHotWordsCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博搜索框热词时出现解析错误,页面结构有问题",cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiBoSearchBoxHotWords;
......@@ -35,7 +36,7 @@ public class WeiBoSearchHotWordsCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -44,7 +45,7 @@ public class WeiShiHotSearchCrawler {
headerMap.put("Host","api.weishi.qq.com");
Request request = RequestUtils.wrapPost(url,headerMap,RequestBody.create(MediaType.get("application/json"),"{\"req_body\":{\"hotRankID\":\"\",\"attachInfo\":\"\",\"hotRankType\":1,\"sourceID\":\"WSSearchH5\"}}"));
for (int count = 0; count <=3; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微视热榜时出现连接失败", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -37,7 +38,7 @@ public class WeiboEntertainmentCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博娱乐榜时出现连接失败",cause);
......
......@@ -12,6 +12,7 @@ import java.util.stream.Collectors;
import com.alibaba.fastjson.JSON;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.*;
......@@ -67,7 +68,7 @@ public class WeiboHotSearchCrawler {
for (int i = 0; i < 3; i++) {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
if (i == 2) {
return list;
......@@ -263,7 +264,7 @@ public class WeiboHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败",cause);
......@@ -361,7 +362,7 @@ public class WeiboHotSearchCrawler {
String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜时出现连接失败",cause);
......@@ -419,7 +420,7 @@ public class WeiboHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败",cause);
......@@ -517,7 +518,7 @@ public class WeiboHotSearchCrawler {
//该cookie有效期一年,微博pc端获取游客cookie链接 https://s.weibo.com/top/summary?cate=realtimehot
headerMap.put("Cookie", "SUB=_2AkMUShJMf8NxqwJRmP0RyWvgb4RwwgnEieKiFuOXJRMxHRl-yT92qlQvtRB6P8o8oso9Ew-s6vf16fdCca-Xz6DwwAMH; SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFdAobr6HdAbgQQ9vbUQKDx");
Request request = RequestUtils.wrapGet(url,headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败",cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -48,7 +49,7 @@ public class WeiboNewsCrawler {
for (int count = 0; count <= 5; count++) {
List<HotSearchList> result = new ArrayList();
//发送第一次请求获取前20条数据
Response response = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request1, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("第一次请求解析微博要闻榜时出现连接失败", cause);
......@@ -70,7 +71,7 @@ public class WeiboNewsCrawler {
continue;
}
//发送第二次请求获取中间20条数据
Response response1 = httpBoot.syncCall(request2, ProxySupplier.NAT_HEAVY_PROXY);
Response response1 = httpBoot.syncCall(request2, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("第二次请求解析微博要闻榜时出现连接失败",cause);
......@@ -93,7 +94,7 @@ public class WeiboNewsCrawler {
continue;
}
//发送第三次请求获取最后10条数据
Response response2 = httpBoot.syncCall(request3, ProxySupplier.NAT_HEAVY_PROXY);
Response response2 = httpBoot.syncCall(request3, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response2.hasCause()){
Throwable cause = response2.cause();
log.error("第三次请求解析微博要闻榜时出现连接失败",cause);
......
......@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.crawler;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -42,7 +43,7 @@ public class WeiboOutCircleCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,headerMap);
for (int x = 0; x <= 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博出圈榜时出现连接失败", cause);
......
......@@ -10,6 +10,7 @@ import java.util.Objects;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.WeiboSuperTopic;
......@@ -64,7 +65,7 @@ public class WeiboSuperTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("获取榜单列表页面时出现错误,错误为:{}",cause);
......@@ -140,7 +141,7 @@ public class WeiboSuperTopicCrawler {
String url = "https://m.weibo.cn/api/container/getIndex?containerid="+ id;
Request request = RequestUtils.wrapGet(url);
String htmlBody = null;
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析榜单详情页面时出现错误,错误为:{}",cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -139,7 +140,7 @@ public class WeiboTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=5; retryTimes++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("下载榜单列表页面时出现错误,错误为:{}", cause);
......@@ -228,7 +229,7 @@ public class WeiboTopicCrawler {
String htmlBody = null;
//重试三次
for(int retryTimes = 1; retryTimes<=3; retryTimes++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("下载榜单列表页面时出现错误,错误为:{}", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -41,7 +42,7 @@ public class WeiboVideoCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int x = 0; x <= 2; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博视频榜时出现连接失败", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -39,7 +40,7 @@ public class XinLangHotSearchCrawler {
String htmlBody = null;
JSONObject jsonObject = null;
for(int t=0 ;t<3&&jsonObject==null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("新浪热榜页面连接异常...",cause);
......@@ -115,7 +116,7 @@ public class XinLangHotSearchCrawler {
String htmlBody = null;
JSONArray dataJson = null;
for(int t=0 ;t<3&&dataJson==null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("新浪热点页面连接异常...",cause);
......
......@@ -7,6 +7,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -41,7 +42,7 @@ public class ZhihuChildHotSearchCrawler {
Request request = RequestUtils.wrapGet(url, headerMap);
//采集为空最多重试3次
for (int t = 0; t < 3 && dataJson == null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
response.cause().printStackTrace();
}else {
......
......@@ -5,6 +5,7 @@ import java.util.*;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import io.netty.handler.ssl.SslProvider;
......@@ -99,7 +100,7 @@ public class ZhihuHotSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for (int x = 0; x <= 5; x++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.debug("获取知乎热搜时出现问题:{}", cause);
......@@ -170,7 +171,7 @@ public class ZhihuHotSearchCrawler {
Map.put("cookie", "_xsrf=7NFWM5qBcOutfs8MaW7bhQQH65t3Xia4");
Request request = RequestUtils.wrapGet(url, Map);
try {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()) {
Throwable cause = response.cause();
log.error("单条知乎热搜数据页面连接失败", cause);
......
......@@ -4,6 +4,7 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -25,7 +26,7 @@ public class ZhihuTopicSearchCrawler {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int t = 0; t < 3 && jsonObject == null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("知乎热搜页面连接异常",cause);
......
package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.http.proxy.CynomysFactory;
import com.zhiwei.network.cynomys.consumer.CynomysConsumer;
import com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.*;
import com.zhiwei.tools.tools.ZhiWeiTools;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
......@@ -15,9 +19,34 @@ public class HotSearchRun {
public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
ApplicationConfig applicationConfig = new ApplicationConfig();
applicationConfig.setName("hot_search-project");
RegistryConfig registryConfig = new RegistryConfig();
ConsumerConfig consumerConfig = new ConsumerConfig();
String username = null;
String password = null;
if (ProxyConfig.isLocal) {
registryConfig.setAddress(ProxyConfig.localRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.localGroup);
username = ProxyConfig.localUsername;
password = ProxyConfig.localPassword;
} else {
registryConfig.setAddress(ProxyConfig.hangzhouRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.hangzhouGroup);
username = ProxyConfig.hangzhouUsername;
password = ProxyConfig.hangzhouPassword;
}
// 创建 consumer,applicationConfig 非必需参数
CynomysConsumer consumer = CynomysConsumerFactory.create(applicationConfig, registryConfig, consumerConfig, username, password);
// 初始化 http-boot 桥接
CynomysFactory.init(consumer);
new UpdateWechatUserRun().start();
ZhiWeiTools.sleep(10000);
......
......@@ -3,6 +3,7 @@ package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -40,7 +41,7 @@ public class HotSearch36KrCrawlerTest {
String url = "https://www.36kr.com/hot-list/catalog";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析36Kr人气榜时出现解析错误,页面结构有问题", cause);
......
......@@ -2,7 +2,6 @@ package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
......@@ -13,9 +12,9 @@ import java.text.ParseException;
public class HotSearchRunTest {
public static void main(String[] args) throws ParseException {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
//微博热搜开始采集
// new WeiboHotSearchRun().start();
......
......@@ -2,6 +2,7 @@ package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -39,7 +40,7 @@ public class HuXiuHotSearchCrawlerTest {
String url = "https://www.huxiu.com/";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析虎嗅热文推荐时出现解析错误,页面结构有问题", cause);
......
......@@ -7,7 +7,7 @@ import com.mongodb.client.MongoDatabase;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
......@@ -36,9 +36,9 @@ public class Job51Test {
public static void main(String[] args) {
// ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
HttpBoot httpBoot = HttpBoot.newBuilder().retryTimes(3).build();
// MongoDatabase mongoDBLocal = MongoDBLocalTemplate.getDB(DBConfig.dbName);
......@@ -64,7 +64,7 @@ public class Job51Test {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url,header);
for (int t = 0; t < 1 && jsonObject == null; t++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("知乎热搜页面连接异常", cause);
......
......@@ -6,6 +6,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -37,7 +38,7 @@ public class KuaiShouHotSearchCrawlerTest {
String url = "https://video.kuaishou.com/?utm_source=aa&utm_medium=05&utm_campaign=aa_05_pp_yr&plan_id=138090084&unit_id=5205658029&creative_id=43661481717&keyword_id=202928529242&keyword=202928529242&bd_vid=11937382025080724791";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析快手热榜时出现解析错误,页面结构有问题", cause);
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -37,7 +38,7 @@ public class TaoBaoHotSearchCrawlerTest {
String urls = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request1 = RequestUtils.wrapGet(urls);
String token = null;
Response response1 = httpBoot.syncCall(request1, ProxySupplier.NAT_HEAVY_PROXY);
Response response1 = httpBoot.syncCall(request1, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response1.hasCause()){
Throwable cause = response1.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
......@@ -56,7 +57,7 @@ public class TaoBaoHotSearchCrawlerTest {
String sign = MD5Util.getMD5(signs).toLowerCase();
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Request request = RequestUtils.wrapGet(url, headerMap);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", cause);
......
package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
......@@ -13,9 +12,9 @@ import java.text.ParseException;
public class TaoBaoRunTest {
public static void main(String[] args) throws ParseException {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
//微博热搜开始采集
// new WeiboHotSearchRun().start();
......
......@@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......@@ -41,7 +42,7 @@ public class WeiboEntertainmentCrawlerTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博娱乐榜时出现连接失败", cause);
......
......@@ -113,8 +113,13 @@ public class GatherTimer {
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
log.info("今日头条热搜采集结束...");
log.info("今日头条热搜详情趋势阅读量更新...");
TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
//暂停今日头条阅读量更新
// log.info("今日头条热搜详情趋势阅读量更新开始...");
// //TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// for (HotSearchList hotSearchList : toutiaoList) {
// ToutiaoHotSearchCrawler.toutiaoReadCount(hotSearchList);
// }
// log.info("今日头条热搜详情趋势阅读量更新结束...");
}
/**
......@@ -362,7 +367,7 @@ public class GatherTimer {
* 知乎热搜数码分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
//@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuDigital(){
this.crawlerZhiHuChild(DIGITAL);
}
......@@ -428,7 +433,7 @@ public class GatherTimer {
* 微博超话的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/3 * * ? ")
//@Scheduled(cron = "0 0 0/3 * * ? ")
public void crawlerWeiBoSuperTopic(){
log.info("微博超话采集开始........");
Date date = DateUtils.getMillSecondTime(new Date());
......@@ -675,7 +680,7 @@ public class GatherTimer {
*微博热词采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/1 * * ? ")
//@Scheduled(cron = "0 0 0/1 * * ? ")
public void WeiBoSearchHotWordsCrawler(){
log.info("微博热词采集开始........");
Date date = DateUtils.getMillSecondTime(new Date());
......
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
group=hangzhou
isLocal = false
hangzhou.registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
hangzhou.group=hangzhou
hangzhou.username=hot-search
hangzhou.password=gRG9QJ6QghuLcCC9
########################################################
#registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
#group=local
local.registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
local.group=local
#local.username=15139460980
#local.password=lllq2w3e4r
local.username=15757871020
local.password=Cwt1q2w3e4r@
package InfoTest;
import com.mongodb.client.MongoCollection;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
......@@ -36,9 +36,9 @@ public class InfoTest {
@Test
public void testMaimai(){
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
List<HotSearchList> hotSearchLists = MaiMaiHotSearchCrawler.getMaiMaiHotData(new Date());
......
package baiduTest;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
......@@ -62,9 +62,9 @@ public class BaiduTest {
@Test
public void test(){
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
List<HotSearchList> hotSearchLists = baiduHotSearch(new Date());
......@@ -122,7 +122,7 @@ public class BaiduTest {
// headers.put("Content-type","text/html; charset=gb2312");
// Request request = RequestUtils.wrapGet(url, HeadersUtils.convertRepeatably(headers, Charset.forName("gb2312")));
Request request = RequestUtils.wrapGet(url);
// try(Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY)) {
// try(Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY)) {
// htmlBody = response.body().string();
// } catch (Exception e) {
// log.error("解析百度风云榜时出现解析错误,页面结构有问题", e);
......
......@@ -5,7 +5,7 @@ import com.mongodb.client.MongoCollection;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
......@@ -51,9 +51,9 @@ public class HotSearchTest {
@Test
public void kuaiShouTestCrawler() {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
List<HotSearchList> hotSearchLists = KuaiShouHotSearchCrawlerTest.KuaiShouHotSearchCrawler(new Date());
System.out.println(hotSearchLists);
......@@ -64,15 +64,15 @@ public class HotSearchTest {
@Test
public void WeiBoUpdate() {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
Document document = new Document();
//String url = "https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D1%26t%3D10%26q%3D%23我国新冠疫苗接种剂次超9亿%23";
String url = "https://m.weibo.cn/api/container/getIndex?containerid=231522type%3D1%26q%3D%23可口可乐回应C罗拒绝与可乐同框%23";
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
......@@ -135,9 +135,9 @@ public class HotSearchTest {
@Test
public void taoBaoTestCrawler() {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
List<HotSearchList> hotSearchLists = TaoBaoHotSearchCrawlerTest.taoBaoHotSearch(new Date());
......@@ -152,9 +152,9 @@ public class HotSearchTest {
@Test
public void baiDuTestCrawler() {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
List<HotSearchList> hotSearchLists = BaiDuHotSearchCrawler.baiduHotSearch(new Date());
System.out.println(hotSearchLists);
......
package proxy;
import com.zhiwei.http.proxy.CynomysFactory;
import com.zhiwei.network.cynomys.consumer.CynomysConsumer;
import com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.crawler.HotSearch36KrCrawler;
import lombok.extern.log4j.Log4j2;
import org.apache.dubbo.config.ApplicationConfig;
import org.apache.dubbo.config.ConsumerConfig;
import org.apache.dubbo.config.RegistryConfig;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.util.Date;
import java.util.List;
/**
* @author cwt
* @date 2022/11/8 10:22
*/
@Log4j2
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{"classpath:applicationContext.xml"})
public class ProxyTest {
@Test
public void initTest() {
ApplicationConfig applicationConfig = new ApplicationConfig();
applicationConfig.setName("hot_search-project");
RegistryConfig registryConfig = new RegistryConfig();
ConsumerConfig consumerConfig = new ConsumerConfig();
String username = null;
String password = null;
if (ProxyConfig.isLocal) {
registryConfig.setAddress(ProxyConfig.localRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.localGroup);
username = ProxyConfig.localUsername;
password = ProxyConfig.localPassword;
} else {
registryConfig.setAddress(ProxyConfig.hangzhouRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.hangzhouGroup);
username = ProxyConfig.hangzhouUsername;
password = ProxyConfig.hangzhouPassword;
}
// 创建 consumer,applicationConfig 非必需参数
CynomysConsumer consumer = CynomysConsumerFactory.create(applicationConfig, registryConfig, consumerConfig, username, password);
// 初始化 http-boot 桥接
CynomysFactory.init(consumer);
log.info("桥接初始化完成");
try {
Thread.sleep(1000L);
} catch (InterruptedException e) {
e.printStackTrace();
}
List<HotSearchList> hotSearchLists = HotSearch36KrCrawler.hotSearch36Kr(new Date());
hotSearchLists.forEach(System.out::println);
}
@Test
public void configTest() {
System.out.println(ProxyConfig.isLocal);
System.out.println(ProxyConfig.hangzhouGroup);
}
}
......@@ -6,7 +6,8 @@ import com.alibaba.fastjson.JSONObject;
import com.zhiwei.http.boot.HttpBoot;
import com.zhiwei.http.boot.Response;
import com.zhiwei.http.proxy.ProxyFactory;
import com.zhiwei.http.proxy.ProxyServerSupplier;
import com.zhiwei.http.proxy.ProxySupplier;
import com.zhiwei.http.util.RequestUtils;
import com.zhiwei.proxy.config.SimpleConfig;
......@@ -69,9 +70,9 @@ public class WeiboHotSearchTest {
@Test
public void testHotWeibo() {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
while(true) {
try {
......@@ -110,9 +111,9 @@ public class WeiboHotSearchTest {
//org.bson.Document document
// @Test
public void test12(org.bson.Document document) {
SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
.group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
ProxyFactory.init(simpleConfig);
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig);
// org.bson.Document document = new org.bson.Document();
// document.put("name","新疆人讲述真实的新疆");
// document.put("url","https://m.weibo.cn/search?containerid=100103type%3D1%26t%3D10%26q%3D%23%E6%96%B0%E7%96%86%E4%BA%BA%E8%AE%B2%E8%BF%B0%E7%9C%9F%E5%AE%9E%E7%9A%84%E6%96%B0%E7%96%86%23&isnewpage=1&extparam=seat%3D1%26filter_type%3Drealtimehot%26dgr%3D0%26cate%3D0%26pos%3D1%26realpos%3D2%26flag%3D1%26c_type%3D31%26display_time%3D1622705918&luicode=10000011&lfid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot");
......@@ -123,7 +124,7 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 2; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
......@@ -216,7 +217,7 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url);
for (int count = 0; count <= 2; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博热搜详情页面时出现连接失败", cause);
......@@ -532,7 +533,7 @@ public class WeiboHotSearchTest {
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
for (int count = 0; count <= 5; count++) {
Response response = httpBoot.syncCall(request, ProxySupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
if (response.hasCause()){
Throwable cause = response.cause();
log.error("解析微博时热搜时出现连接失败", cause);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment