Commit 1d1a7503 by leiliangliang

暂停知乎数码,微博超话,微博热词定时任务及头条阅读量更新

parent 2450a48a
...@@ -166,34 +166,29 @@ public class ToutiaoHotSearchCrawler { ...@@ -166,34 +166,29 @@ public class ToutiaoHotSearchCrawler {
String htmlBody = null; String htmlBody = null;
String url = hotSearchList.getUrl(); String url = hotSearchList.getUrl();
Request request = RequestUtils.wrapGet(url); Request request = RequestUtils.wrapGet(url);
for (int i = 0; i <= 5; i++) { Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY);
Response response = httpBoot.syncCall(request, ProxyServerSupplier.NAT_HEAVY_PROXY); if (response.hasCause()) {
if (response.hasCause()) { Throwable cause = response.cause();
Throwable cause = response.cause(); log.error("解析今日头条热搜详情页面出现连接失败", cause);
log.error("解析今日头条热搜详情页面出现连接失败", cause); } else {
} else { htmlBody = response.bodyString();
htmlBody = response.bodyString(); }
} if (StringUtils.isNotBlank(htmlBody)&&htmlBody.contains("data")) {
if (StringUtils.isNotBlank(htmlBody)) { try {
Document document = Jsoup.parse(htmlBody); String substring = htmlBody.substring(htmlBody.indexOf("read_count")+12, htmlBody.indexOf("search_bar_controll"));
Elements elements = document.select(".result-content .cs-view .cs-topone-tail .cs-view .margin-bottom-m .margin-left-m"); String s = substring.split(",")[0];
if (Objects.nonNull(elements) && !elements.isEmpty()) { Long commentCount = Long.valueOf(s);
Element element = elements.first(); hotSearchList.setCommentCount(commentCount);
String readCount = element.text().replaceAll("阅读", ""); hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
Long count = TipsUtils.getHotCount(readCount); return hotSearchList;
log.info("{},阅读量:{}", hotSearchList.getName(), count); } catch (Exception e) {
hotSearchList.setCommentCount(count); e.printStackTrace();
hotSearchListDAO.updateTouTiaoReadCount(hotSearchList);
return hotSearchList;
}
} }
ZhiWeiTools.sleep(1000L);
} }
} }
return hotSearchList; return hotSearchList;
} }
/** /**
* 热搜类型 * 热搜类型
* *
......
...@@ -4,6 +4,7 @@ package com.zhiwei.searchhotcrawler.run; ...@@ -4,6 +4,7 @@ package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.http.proxy.CynomysFactory; import com.zhiwei.http.proxy.CynomysFactory;
import com.zhiwei.network.cynomys.consumer.CynomysConsumer; import com.zhiwei.network.cynomys.consumer.CynomysConsumer;
import com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory; import com.zhiwei.network.cynomys.consumer.CynomysConsumerFactory;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.*; import com.zhiwei.searchhotcrawler.timer.*;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import org.apache.dubbo.config.ApplicationConfig; import org.apache.dubbo.config.ApplicationConfig;
...@@ -18,21 +19,28 @@ public class HotSearchRun { ...@@ -18,21 +19,28 @@ public class HotSearchRun {
public static void main(String[] args) { public static void main(String[] args) {
ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml"); ApplicationContext context = new ClassPathXmlApplicationContext("applicationContext.xml");
// SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry) // SimpleConfig simpleConfig = SimpleConfig.builder().registry(ProxyConfig.registry)
// .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build(); // .group(ProxyConfig.group).appId(10000013).appName("hotsearch").build();
// ProxyFactory.init(simpleConfig); // ProxyFactory.init(simpleConfig);
ApplicationConfig applicationConfig = new ApplicationConfig(); ApplicationConfig applicationConfig = new ApplicationConfig();
applicationConfig.setName("xxx-project"); applicationConfig.setName("hot_search-project");
RegistryConfig registryConfig = new RegistryConfig(); RegistryConfig registryConfig = new RegistryConfig();
registryConfig.setAddress("zookeeper://192.168.0.30:2181?timeout=30000");
ConsumerConfig consumerConfig = new ConsumerConfig(); ConsumerConfig consumerConfig = new ConsumerConfig();
// 设置分组 String username = null;
consumerConfig.setGroup("test"); String password = null;
String username = "your cool username"; if (ProxyConfig.isLocal) {
String password = "your cool password"; registryConfig.setAddress(ProxyConfig.localRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.localGroup);
username = ProxyConfig.localUsername;
password = ProxyConfig.localPassword;
} else {
registryConfig.setAddress(ProxyConfig.hangzhouRegistry);
// 设置分组
consumerConfig.setGroup(ProxyConfig.hangzhouGroup);
username = ProxyConfig.hangzhouUsername;
password = ProxyConfig.hangzhouPassword;
}
// 创建 consumer,applicationConfig 非必需参数 // 创建 consumer,applicationConfig 非必需参数
CynomysConsumer consumer = CynomysConsumerFactory.create(applicationConfig, registryConfig, consumerConfig, username, password); CynomysConsumer consumer = CynomysConsumerFactory.create(applicationConfig, registryConfig, consumerConfig, username, password);
......
...@@ -113,8 +113,13 @@ public class GatherTimer { ...@@ -113,8 +113,13 @@ public class GatherTimer {
log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0); log.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList); TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
log.info("今日头条热搜采集结束..."); log.info("今日头条热搜采集结束...");
log.info("今日头条热搜详情趋势阅读量更新..."); //暂停今日头条阅读量更新
TouTiaoExecutor.countTouTiaoReadCount(toutiaoList); // log.info("今日头条热搜详情趋势阅读量更新开始...");
// //TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// for (HotSearchList hotSearchList : toutiaoList) {
// ToutiaoHotSearchCrawler.toutiaoReadCount(hotSearchList);
// }
// log.info("今日头条热搜详情趋势阅读量更新结束...");
} }
/** /**
...@@ -362,7 +367,7 @@ public class GatherTimer { ...@@ -362,7 +367,7 @@ public class GatherTimer {
* 知乎热搜数码分类采集 * 知乎热搜数码分类采集
*/ */
@Async(value = "myScheduler") @Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ") //@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuDigital(){ public void crawlerZhiHuDigital(){
this.crawlerZhiHuChild(DIGITAL); this.crawlerZhiHuChild(DIGITAL);
} }
...@@ -428,7 +433,7 @@ public class GatherTimer { ...@@ -428,7 +433,7 @@ public class GatherTimer {
* 微博超话的采集 * 微博超话的采集
*/ */
@Async(value = "myScheduler") @Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/3 * * ? ") //@Scheduled(cron = "0 0 0/3 * * ? ")
public void crawlerWeiBoSuperTopic(){ public void crawlerWeiBoSuperTopic(){
log.info("微博超话采集开始........"); log.info("微博超话采集开始........");
Date date = DateUtils.getMillSecondTime(new Date()); Date date = DateUtils.getMillSecondTime(new Date());
...@@ -675,7 +680,7 @@ public class GatherTimer { ...@@ -675,7 +680,7 @@ public class GatherTimer {
*微博热词采集 *微博热词采集
*/ */
@Async(value = "myScheduler") @Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/1 * * ? ") //@Scheduled(cron = "0 0 0/1 * * ? ")
public void WeiBoSearchHotWordsCrawler(){ public void WeiBoSearchHotWordsCrawler(){
log.info("微博热词采集开始........"); log.info("微博热词采集开始........");
Date date = DateUtils.getMillSecondTime(new Date()); Date date = DateUtils.getMillSecondTime(new Date());
......
isLocal = true isLocal = false
hangzhou.registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000 hangzhou.registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
hangzhou.group=hangzhou hangzhou.group=hangzhou
...@@ -7,5 +7,7 @@ hangzhou.password=gRG9QJ6QghuLcCC9 ...@@ -7,5 +7,7 @@ hangzhou.password=gRG9QJ6QghuLcCC9
######################################################## ########################################################
local.registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000 local.registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
local.group=local local.group=local
#local.username=15139460980
#local.password=lllq2w3e4r
local.username=15757871020 local.username=15757871020
local.password=Cwt1q2w3e4r@ local.password=Cwt1q2w3e4r@
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment