Commit 96ffc323 by chenweitao

Revert "Merge branch 'working' of D:\IdeaProjects\searchhotcrawler with conflicts."

This reverts commit f986b5c8
parent 37d43810
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" version="4"> <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager"> <component name="FacetManager">
<facet type="Spring" name="Spring"> <facet type="Spring" name="Spring">
<configuration /> <configuration />
...@@ -10,8 +10,8 @@ ...@@ -10,8 +10,8 @@
<output-test url="file://$MODULE_DIR$/target/test-classes" /> <output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$"> <content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" /> <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" /> <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" /> <excludeFolder url="file://$MODULE_DIR$/target" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
...@@ -77,8 +77,5 @@ ...@@ -77,8 +77,5 @@
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" /> <orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.6" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.10" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.10" level="project" />
</component> </component>
</module> </module>
\ No newline at end of file
package com.zhiwei.searchhotcrawler.bean; package com.zhiwei.searchhotcrawler.bean;
public enum HotSearchType { public enum HotSearchType {
百度热搜, 百度热搜,
微博热搜, 微博热搜,
知乎热搜, 知乎热搜,
抖音热搜, 抖音热搜,
搜狗微信热搜, 搜狗微信热搜,
搜狗微信客户端热搜, 搜狗微信客户端热搜,
微博话题, 微博话题,
今日头条热搜, 今日头条热搜,
知乎热搜榜单, 知乎热搜榜单,
腾讯新闻, 腾讯新闻,
新浪热榜, 新浪热榜,
新浪热点, 新浪热点,
搜狐话题, 搜狐话题,
凤凰新闻热榜, 凤凰新闻热榜,
凤凰新闻热搜, 凤凰新闻热搜,
网易热榜, 网易热榜,
网易跟帖热议, 网易跟帖热议,
微博预热榜, 微博预热榜,
腾讯较真榜, 腾讯较真榜,
脉脉热榜, 脉脉热榜,
B站排行榜, B站排行榜,
B站热搜, B站热搜,
人气榜36, 人气榜36,
虎嗅热文推荐, 虎嗅热文推荐,
快手热榜, 快手热榜,
淘宝热搜,
} }
...@@ -53,11 +53,6 @@ public class WeiBoUser implements Serializable { ...@@ -53,11 +53,6 @@ public class WeiBoUser implements Serializable {
* 头像地址 * 头像地址
*/ */
private String profileImageUrl; private String profileImageUrl;
/**
* 类型
*/
private String type;
public WeiBoUser() { public WeiBoUser() {
} }
......
...@@ -14,6 +14,8 @@ import org.jsoup.Jsoup; ...@@ -14,6 +14,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.searchhotcrawler.bean.HotSearchList; import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......
package com.zhiwei.searchhotcrawler.crawler; package com.zhiwei.searchhotcrawler.crawler;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.mongodb.client.result.UpdateResult;
import com.zhiwei.searchhotcrawler.bean.*; import com.zhiwei.searchhotcrawler.bean.*;
import com.zhiwei.searchhotcrawler.config.RedisConfig; import com.zhiwei.searchhotcrawler.config.RedisConfig;
import com.zhiwei.searchhotcrawler.dao.RedisDao; import com.zhiwei.searchhotcrawler.dao.RedisDao;
...@@ -20,12 +17,6 @@ import lombok.extern.log4j.Log4j2; ...@@ -20,12 +17,6 @@ import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.bson.Document; import org.bson.Document;
import org.checkerframework.checker.units.qual.C; import org.checkerframework.checker.units.qual.C;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
...@@ -46,26 +37,26 @@ import org.springframework.beans.factory.annotation.Autowired; ...@@ -46,26 +37,26 @@ import org.springframework.beans.factory.annotation.Autowired;
import static java.util.Objects.nonNull; import static java.util.Objects.nonNull;
/** /**
* @author hero
* @ClassName: WeiboHotSearch * @ClassName: WeiboHotSearch
* @Description: 微博实时热搜采集 * @Description: 微博实时热搜采集
* @author hero
* @date 2017年9月15日 上午10:54:31 * @date 2017年9月15日 上午10:54:31
*/ */
@Log4j2 @Log4j2
public class WeiboHotSearchCrawler { public class WeiboHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static RedisDao redisDao = new RedisDao(); private static RedisDao redisDao = new RedisDao();
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao(); static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao(); static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
/** /**
* @Title: weiboHotSearchTest * @Title: weiboHotSearchTest
* @author hero * @author hero
* @Description: TODO(PC端微博热搜采集) * @Description: TODO(PC端微博热搜采集)
* @return void 返回类型 * @return void 返回类型
*/ */
// public static List<HotSearchList> weiboHotSearch(){ // public static List<HotSearchList> weiboHotSearch(){
// String url = "https://s.weibo.com/top/summary?cate=realtimehot"; // String url = "https://s.weibo.com/top/summary?cate=realtimehot";
// //
...@@ -122,528 +113,444 @@ public class WeiboHotSearchCrawler { ...@@ -122,528 +113,444 @@ public class WeiboHotSearchCrawler {
// } // }
/**
* @return void 返回类型
* @Title: weiboHotSearchByPhoneTest /**
* @author hero * @Title: weiboHotSearchByPhoneTest
* @Description: TODO(手机端Iphone 微博热搜采集) * @author hero
*/ * @Description: TODO(手机端Iphone 微博热搜采集)
public static List<HotSearchList> weiboHotSearchByPhone(Date date) { * @return void 返回类型
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"; */
Map<String, String> headerMap = new HashMap<>(); public static List<HotSearchList> weiboHotSearchByPhone(Date date){
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"); String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
String htmlBody = null; Map<String,String> headerMap = new HashMap<>();
Request request = RequestUtils.wrapGet(url, headerMap); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
for (int count = 0; count <= 5; count++) { String htmlBody = null;
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { Request request = RequestUtils.wrapGet(url, headerMap);
htmlBody = response.body().string(); for(int count =0; count<=5; count++){
} catch (IOException e) { try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
log.error("解析微博时热搜时出现连接失败", e); htmlBody = response.body().string();
} } catch (IOException e) {
List<HotSearchList> result = new ArrayList<HotSearchList>(); log.error("解析微博时热搜时出现连接失败",e);
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) { }
try { List<HotSearchList> result = new ArrayList<HotSearchList>();
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data"); if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
JSONArray cards = json.getJSONArray("cards"); try {
int rank = 0; JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
int rank = 0;
// for (int i = 0; i < cards.size(); i++) { // for (int i = 0; i < cards.size(); i++) {
try { try {
JSONObject card = cards.getJSONObject(0); JSONObject card = cards.getJSONObject(0);
JSONArray cardGroup = card.getJSONArray("card_group"); JSONArray cardGroup = card.getJSONArray("card_group");
JSONObject topCard = cardGroup.getJSONObject(0); JSONObject topCard =cardGroup.getJSONObject(0);
if (!topCard.containsKey("pic")) { if(!topCard.containsKey("pic")){
rank = 1; rank = 1;
} }
if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) { if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) {
// String title = card.getString("title"); // String title = card.getString("title");
boolean hot = true; boolean hot = true;
// if (Objects.nonNull(title) && title.contains("实时上升热点")) { // if (Objects.nonNull(title) && title.contains("实时上升热点")) {
// hot = false; // hot = false;
// rank = 51; // rank = 51;
// } // }
for (int j = 0; j < cardGroup.size(); j++) { for (int j = 0; j < cardGroup.size(); j++) {
JSONObject cardInfo = cardGroup.getJSONObject(j); JSONObject cardInfo = cardGroup.getJSONObject(j);
String name = cardInfo.getString("desc"); String name = cardInfo.getString("desc");
long hotCount = cardInfo.getLongValue("desc_extr"); long hotCount = cardInfo.getLongValue("desc_extr");
String icon = cardInfo.getString("icon"); String icon = cardInfo.getString("icon");
if (StringUtils.isNotBlank(icon)) { if (StringUtils.isNotBlank(icon)) {
icon = icon.split("_")[1].split(".png")[0]; icon = icon.split("_")[1].split(".png")[0];
} }
String rankPic = cardInfo.getString("pic");
// String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; // String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
String id = cardInfo.getString("scheme"); String urlScheme = cardInfo.getString("scheme");
HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon, date); HotSearchList hotSearch = new HotSearchList(urlScheme, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon, date);
result.add(hotSearch); hotSearch.setRankPic(rankPic);
rank++; result.add(hotSearch);
redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜"); rank++;
} redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS,name+"_微博热搜");
} else { }
log.info("card 数据结构为:{}", card); } else {
} log.info("card 数据结构为:{}", card);
} catch (Exception e) { }
log.error("解析微博时热搜时出现解析错误", e); } catch (Exception e) {
continue; log.error("解析微博时热搜时出现解析错误", e);
} continue;
}
// } // }
return result; return result;
} catch (Exception e) { } catch (Exception e) {
log.error("解析微博时热搜时出现解析错误,数据不是json结构", e); log.error("解析微博时热搜时出现解析错误,数据不是json结构", e);
} }
} else { } else {
log.info("解析微博时热搜时出现解析错误,页面结构有问题"); log.info("解析微博时热搜时出现解析错误,页面结构有问题");
} }
} }
return Collections.emptyList(); return Collections.emptyList();
} }
/** /**
* 微博预热榜(实时上升热点采集) * 微博预热榜(实时上升热点采集)
* * @param date
* @param date * @return
* @return */
*/ public static List<HotSearchList> weiboPreheatSearch(Date date){
public static List<HotSearchList> weiboPreheatSearch(Date date) { String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot"; String htmlBody = null;
String htmlBody = null; Request request = RequestUtils.wrapGet(url);
Request request = RequestUtils.wrapGet(url); try(Response response = httpBoot.syncCall(request,ProxyHolder.NAT_HEAVY_PROXY)) {
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { htmlBody = response.body().string();
htmlBody = response.body().string(); } catch (IOException e) {
} catch (IOException e) { log.error("解析微博热搜时出现连接失败",e);
log.error("解析微博热搜时出现连接失败", e); }
} List<HotSearchList> result = new ArrayList<>();
List<HotSearchList> result = new ArrayList<>(); if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")){
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) { JSONArray cardArray = JSON.parseObject(htmlBody).getJSONArray("cards");
JSONArray cardArray = JSON.parseObject(htmlBody).getJSONArray("cards"); if(cardArray.size() > 1) {
if (cardArray.size() > 1) { JSONObject jsonObject = cardArray.getJSONObject(1);
JSONObject jsonObject = cardArray.getJSONObject(1); if ("实时上升热点".equals(jsonObject.getString("title")) &&
if ("实时上升热点".equals(jsonObject.getString("title")) && jsonObject.containsKey("card_group")) {
jsonObject.containsKey("card_group")) { JSONArray jsonArray = jsonObject.getJSONArray("card_group");
JSONArray jsonArray = jsonObject.getJSONArray("card_group"); for(int i=0; i<jsonArray.size(); i++){
for (int i = 0; i < jsonArray.size(); i++) { JSONObject cardInfo = jsonArray.getJSONObject(i);
JSONObject cardInfo = jsonArray.getJSONObject(i); String name = cardInfo.getString("desc");
String name = cardInfo.getString("desc"); long hotCount = cardInfo.getIntValue("desc_extr");
long hotCount = cardInfo.getIntValue("desc_extr"); String weiboUrl = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
String weiboUrl = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top"; HotSearchList hotSearchList = new HotSearchList(weiboUrl,name,hotCount,null,HotSearchType.微博预热榜.name(),date);
HotSearchList hotSearchList = new HotSearchList(weiboUrl, name, hotCount, null, HotSearchType.微博预热榜.name(), date); result.add(hotSearchList);
result.add(hotSearchList); }
} //根据热度排序,赋值排名
//根据热度排序,赋值排名 result = result.stream().sorted(Comparator.comparing(HotSearchList::getCount).reversed()).collect(Collectors.toList());
result = result.stream().sorted(Comparator.comparing(HotSearchList::getCount).reversed()).collect(Collectors.toList()); int rank =1;
int rank = 1; for(HotSearchList hotSearchList : result){
for (HotSearchList hotSearchList : result) { hotSearchList.setRank(rank);
hotSearchList.setRank(rank); rank++;
rank++; }
} }
} }
} }
} return result;
return result; }
}
/**
/** * 微博热搜数据更新导语,阅读量,讨论量
* 微博热搜数据更新导语,阅读量,讨论量 * @param document
* * @return
* @param document */
* @return public static Document weiboUpdate(Document document) {
*/ log.info("更新微博热搜{}导语阅读量和讨论量",document.getString("name"));
public static Document weiboUpdate(Document document) { String url = "https://m.weibo.cn/api/container/getIndex?"+ document.getString("url").substring(
log.info("更新微博热搜{}导语阅读量和讨论量", document.getString("name")); document.getString("url").indexOf("?")+1,document.getString("url").indexOf("&"));
String url = "https://m.weibo.cn/api/container/getIndex?" + document.getString("url").substring( String htmlBody = null;
document.getString("url").indexOf("?") + 1, document.getString("url").indexOf("&")); Request request = RequestUtils.wrapGet(url);
String htmlBody = null; for(int count =0; count<=5; count++) {
Request request = RequestUtils.wrapGet(url); try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
for (int count = 0; count <= 5; count++) { htmlBody = response.body().string();
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { } catch (IOException e) {
htmlBody = response.body().string(); log.error("解析微博热搜详情页面时出现连接失败", e);
} catch (IOException e) { }
log.error("解析微博热搜详情页面时出现连接失败", e); if (htmlBody != null && htmlBody.contains("data")) {
} JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("cardlistInfo");
if (htmlBody != null && htmlBody.contains("data")) { List<JSONObject> cardsJsons = (List<JSONObject>)JSONObject.parseObject(htmlBody).getJSONObject("data").get("cards");
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data").getJSONObject("cardlistInfo"); if(json.containsKey("desc")){
List<JSONObject> cardsJsons = (List<JSONObject>) JSONObject.parseObject(htmlBody).getJSONObject("data").get("cards"); String topicLead = json.getString("desc");
if (json.containsKey("desc")) { if(!"".equals(topicLead)) {
String topicLead = json.getString("desc"); document.put("topicLead", topicLead);
if (!"".equals(topicLead)) { }
document.put("topicLead", topicLead); }
} if(json.containsKey("cardlist_head_cards")){
} JSONObject readJson = json.getJSONArray("cardlist_head_cards").getJSONObject(0);
if (json.containsKey("cardlist_head_cards")) { if (readJson.containsKey("head_data")) {
JSONObject readJson = json.getJSONArray("cardlist_head_cards").getJSONObject(0); String midText = readJson.getJSONObject("head_data").getString("midtext");
if (readJson.containsKey("head_data")) { String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim();
String midText = readJson.getJSONObject("head_data").getString("midtext"); String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim();
String read = midText.replaceAll("阅读", "").replaceAll("讨论.*", "").trim(); String pictureUrl = readJson.getJSONObject("head_data").getString("portrait_url");
String discussCount = midText.replaceAll(".*讨论", "").replaceAll("详情.*", "").trim(); document.put("readCount", TipsUtils.getHotCount(read));
String pictureUrl = readJson.getJSONObject("head_data").getString("portrait_url"); document.put("discussCount", TipsUtils.getHotCount(discussCount));
document.put("readCount", TipsUtils.getHotCount(read)); document.put("pictureUrl",pictureUrl);
document.put("discussCount", TipsUtils.getHotCount(discussCount)); if (readJson.getJSONObject("head_data").containsKey("downtext")){
document.put("pictureUrl", pictureUrl); String downtext = readJson.getJSONObject("head_data").getString("downtext");
if (readJson.getJSONObject("head_data").containsKey("downtext")) { if(!"".equals(downtext)) {
String downtext = readJson.getJSONObject("head_data").getString("downtext"); document.put("downtext",downtext.replaceAll("主持人:",""));
if (!"".equals(downtext)) { }
document.put("downtext", downtext.replaceAll("主持人:", "")); }
} }
} }
}
} try {
//解析cards,获取热门微博、人物
try { if (Objects.isNull(weiBoMassageDao)){
//解析cards,获取热门微博、人物 weiBoMassageDao = new WeiBoMassageDao();
if (Objects.isNull(weiBoMassageDao)) { }
weiBoMassageDao = new WeiBoMassageDao(); if (Objects.isNull(weiBoUserDao)){
} weiBoUserDao = new WeiBoUserDao();
if (Objects.isNull(weiBoUserDao)) { }
weiBoUserDao = new WeiBoUserDao(); for (JSONObject jsonObject : cardsJsons) {
} if (nonNull(jsonObject) && !jsonObject.isEmpty()) {
for (JSONObject jsonObject : cardsJsons) { if (jsonObject.containsKey("mblog")) {
if (nonNull(jsonObject) && !jsonObject.isEmpty()) { if (jsonObject.getJSONObject("mblog").containsKey("title")) {
if (jsonObject.containsKey("mblog")) { WeiBoMassage weiBoMassage = analysisWeiboMBlog(jsonObject, document.getString("name"));
if (jsonObject.getJSONObject("mblog").containsKey("title")) { if (Objects.nonNull(weiBoMassage)) {
WeiBoMassage weiBoMassage = analysisWeiboMBlog(jsonObject, document.getString("name")); weiBoMassageDao.addWeiBoMassage(weiBoMassage);
if (Objects.nonNull(weiBoMassage)) { }
weiBoMassageDao.addWeiBoMassage(weiBoMassage); }
} } else if (jsonObject.containsKey("card_group")) {
} JSONArray cardGroup = jsonObject.getJSONArray("card_group");
} else if (jsonObject.containsKey("card_group")) { WeiBoMassage weiBoMassage = analysisWeiboMassage(cardGroup, document.getString("name"));
JSONArray cardGroup = jsonObject.getJSONArray("card_group"); if (Objects.nonNull(weiBoMassage)) {
WeiBoMassage weiBoMassage = analysisWeiboMassage(cardGroup, document.getString("name")); weiBoMassageDao.addWeiBoMassage(weiBoMassage);
if (Objects.nonNull(weiBoMassage)) { }
weiBoMassageDao.addWeiBoMassage(weiBoMassage); List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name"));
} if (!weiBoUserList.isEmpty()){
List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name")); for (int i = 0; i < weiBoUserList.size(); i++) {
if (!weiBoUserList.isEmpty()) { weiBoUserDao.addWeiBoUser(weiBoUserList.get(i));
for (int i = 0; i < weiBoUserList.size(); i++) { }
weiBoUserDao.addWeiBoUser(weiBoUserList.get(i)); }
} }
} }
} }
} } catch (Exception e) {
} log.error("解析cards失败,未获得热门微博,人物信息",e);
} catch (Exception e) { }
log.error("解析cards失败,未获得热门微博,人物信息", e); return document;
} }
return document; }
} return null;
} }
return null;
} /**
* 解析微博信息
/** *
* 微博热搜数据更新话题贡献者排行,阅读量,讨论量,关于 * @param cardGroup
* @param document * @param topic
* @return * @return
*/ */
public static Document weiboUpdatePC(Document document) {
document.getString("name"); public static WeiBoMassage analysisWeiboMassage(JSONArray cardGroup, String topic) {
String name = document.getString("name"); for (int i = 0; i < cardGroup.size(); i++) {
String gb = "#" + name + "#"; if (cardGroup.getJSONObject(i).containsKey("mblog")) {
String encode =null; if (cardGroup.getJSONObject(i).getJSONObject("mblog").containsKey("title")) {
try { WeiBoMassage weiBoMassage = analysisWeiboMBlog(cardGroup.getJSONObject(i), topic);
encode = URLEncoder.encode(gb, "utf-8"); return weiBoMassage;
} catch (UnsupportedEncodingException e) { }
log.error("字符解析成URl模式异常", e); }
} }
String url = "https://s.weibo.com/weibo?q=" + encode; return null;
String htmlBody = null; }
Request request = RequestUtils.wrapGet(url);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { /**
htmlBody = response.body().string(); * 解析用户信息
} catch (IOException e) { *
log.error("解析微博时热搜时出现连接失败", e); * @param cardGroup
} * @param topic
if (htmlBody != null && htmlBody.contains("m-main")) { * @return
Document docm = new Document(); */
try { public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
org.jsoup.nodes.Document documen = Jsoup.parse(htmlBody); List<WeiBoUser> weiBoUserList = new ArrayList();
//获取贡献者信息 //解析weibo人物信息
try { Date date = new Date();
Elements li = documen.select("ul.card-user-list-a").select("li"); for (int i = 0; i < cardGroup.size(); i++) {
if (Objects.isNull(weiBoUserDao)) { if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
weiBoUserDao = new WeiBoUserDao(); if (cardGroup.getJSONObject(i).containsKey("users")){
} JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users");
if (Objects.nonNull(li)) { for (int i1 = 0; i1 < users.size(); i1++) {
Date date = new Date(); //获取用户id
for (Element element : li) { String userId = users.getJSONObject(i1).getString("id");
WeiBoUser weiBoUser = new WeiBoUser(); //获取用户名
//获取用户名 String userName = users.getJSONObject(i1).getString("screen_name");
String userName = element.select("a.name").text(); //获取认证信息
//获取用户id String attestationMassage = users.getJSONObject(i1).getString("verified_reason");
String attr = element.select("span.avator").select("a").first().attr("href");
String userId = attr.substring(14); //获取粉丝数量
String type = "话题贡献者"; String followers_count = users.getJSONObject(i1).getString("followers_count");
String id = userId + "_" + type + "_" + name; Long followerCount =null;
weiBoUser.setType(type); if (!followers_count.contains("万")){
weiBoUser.setId(id); followerCount = Long.valueOf(followers_count);
weiBoUser.setUserName(userName); }else {
weiBoUser.setUserId(userId); String[] split = followers_count.split("万");
weiBoUser.setTopic(name); followerCount = Long.valueOf(split[0])*10000;
weiBoUser.setTime(date); }
weiBoUserDao.addWeiBoUser(weiBoUser); //用户头像地址
} String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url");
} WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount,profileImageUrl);
} catch (Exception e) { weiBoUserList.add(weiBoUser);
log.error("话题贡献者排行采集异常",e); }
} }
Elements dt = documen.select("div.card-about").select("dt"); return weiBoUserList;
if (Objects.nonNull(dt)) { } else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
Elements dd = documen.select("div.card-about").select("dd"); if (cardGroup.getJSONObject(i).containsKey("user")){
Document dtDocument = new Document(); JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
Document ddDocument = new Document(); //获取用户id
for (int i = 0; i < dt.size(); i++) { String userId = user.getString("id");
String dtText = dt.get(i).text().replaceAll(":", "").trim(); //获取用户名
dtDocument.put(String.valueOf(i), dtText); String userName = user.getString("screen_name");
} //获取认证信息
for (int i1 = 0; i1 < dd.size(); i1++) { String attestationMassage = user.getString("verified_reason");
Elements a = dd.get(i1).select("a"); //获取粉丝数
List<String> str = new ArrayList<>(); String followers_count = user.getString("followers_count");
for (int b = 0; b < a.size(); b++) { Long followerCount =null;
String text1 = a.get(b).text(); if (followers_count.contains("万")){
str.add(text1); String[] split = followers_count.split("万");
} followerCount = Long.valueOf(split[0])*10000;
ddDocument.put(String.valueOf(i1), str); }else {
} followerCount = Long.valueOf(followers_count);
for (int a = 0; a < dt.size(); a++) { }
docm.put(dtDocument.getString(String.valueOf(a)), ddDocument.get(String.valueOf(a))); //用户头像地址
} String profileImageUrl = user.getString("profile_image_url");
} WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount,profileImageUrl);
return docm; weiBoUserList.add(weiBoUser);
} catch (Exception e) { }
log.error("解析微博话题时出现解析错误",e); return weiBoUserList;
} }
} }
return document; return Collections.emptyList();
} }
/** /**
* 解析微博信息 * 解析微博类型
* *
* @param cardGroup * @param jsonObject
* @param topic * @param topic
* @return * @return
*/ */
public static WeiBoMassage analysisWeiboMBlog(JSONObject jsonObject, String topic) {
public static WeiBoMassage analysisWeiboMassage(JSONArray cardGroup, String topic) { JSONObject mblog = jsonObject.getJSONObject("mblog");
for (int i = 0; i < cardGroup.size(); i++) { String type = mblog.getJSONObject("title").getString("text");
if (cardGroup.getJSONObject(i).containsKey("mblog")) { String card_type = jsonObject.getString("card_type");
if (cardGroup.getJSONObject(i).getJSONObject("mblog").containsKey("title")) { Integer cardType = Integer.valueOf(card_type);
WeiBoMassage weiBoMassage = analysisWeiboMBlog(cardGroup.getJSONObject(i), topic); String show_type = jsonObject.getString("show_type");
return weiBoMassage; Integer showType = Integer.valueOf(show_type);
} //点赞数
} String attitudes_count = mblog.getString("attitudes_count");
} Long attitudeCount = null;
return null; if (attitudes_count.contains("万")) {
} String[] split = attitudes_count.split("万");
attitudeCount = Long.valueOf(split[0]) * 10000;
/** } else {
* 解析用户信息 attitudeCount = Long.valueOf(attitudes_count);
* }
* @param cardGroup
* @param topic //评论数
* @return String comments_count = mblog.getString("comments_count");
*/ Long commentCount = null;
public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) { if (comments_count.contains("万")) {
List<WeiBoUser> weiBoUserList = new ArrayList(); String[] split = comments_count.split("万");
//解析weibo人物信息 commentCount = Long.valueOf(split[0]) * 10000;
Date date = new Date(); } else {
for (int i = 0; i < cardGroup.size(); i++) { commentCount = Long.valueOf(comments_count);
if (3 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { }
if (cardGroup.getJSONObject(i).containsKey("users")) {
JSONArray users = cardGroup.getJSONObject(i).getJSONArray("users"); //转发数
for (int i1 = 0; i1 < users.size(); i1++) { String reposts_count = mblog.getString("reposts_count");
//获取用户id Long repostCount =null;
String userId = users.getJSONObject(i1).getString("id"); if (reposts_count.contains("万")){
//获取用户名 String[] split = reposts_count.split("万");
String userName = users.getJSONObject(i1).getString("screen_name"); repostCount = Long.valueOf(split[0]) * 10000;
//获取认证信息 }else {
String attestationMassage = users.getJSONObject(i1).getString("verified_reason"); repostCount = Long.valueOf(reposts_count);
}
//获取粉丝数量 Date createTime = null;
String followers_count = users.getJSONObject(i1).getString("followers_count"); Date editTime = null;
Long followerCount = null;
if (!followers_count.contains("万")) { try {
followerCount = Long.valueOf(followers_count); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", java.util.Locale.US);
} else { //创建时间
String[] split = followers_count.split("万"); String created_at = mblog.getString("created_at");
followerCount = Long.valueOf(split[0]) * 10000;
} createTime = simpleDateFormat.parse(created_at);
//用户头像地址 //编辑时间
String profileImageUrl = users.getJSONObject(i1).getString("profile_image_url"); if (mblog.containsKey("edit_at")){
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl); String edit_at = mblog.getString("edit_at");
weiBoUserList.add(weiBoUser); editTime = simpleDateFormat.parse(edit_at);
} }
} } catch (ParseException e) {
return weiBoUserList; log.error("创建时间和编辑时间解析异常",e);
} else if (10 == Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) { }
if (cardGroup.getJSONObject(i).containsKey("user")) {
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user"); String mid = mblog.getString("mid");
//获取用户id //用户id
String userId = user.getString("id"); String userId = mblog.getJSONObject("user").getString("id");
//获取用户名 //用户名
String userName = user.getString("screen_name"); String userName = mblog.getJSONObject("user").getString("screen_name");
//获取认证信息 //来源
String attestationMassage = user.getString("verified_reason"); String source = mblog.getString("source");
//获取粉丝数 //用户头像地址
String followers_count = user.getString("followers_count"); String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url");
Long followerCount = null; //内容
if (followers_count.contains("万")) { String content = null;
String[] split = followers_count.split("万"); if (mblog.getString("text").contains("<")) {
followerCount = Long.valueOf(split[0]) * 10000; String text = mblog.getString("text");
} else { org.jsoup.nodes.Document parse = Jsoup.parse(text);
followerCount = Long.valueOf(followers_count); content = parse.text();
}
//用户头像地址 } else {
String profileImageUrl = user.getString("profile_image_url"); content = mblog.getString("text");
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic, date, followerCount, profileImageUrl); }
weiBoUserList.add(weiBoUser);
} WeiBoMassage weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType,
return weiBoUserList; repostCount, commentCount, attitudeCount, source, type, topic,profileImageUrl);
} //默认不转发为0
} weiBoMassage.setForward(0);
return Collections.emptyList();
} JSONObject weiboJson = null;
//微博实体 是否转发
if (mblog.containsKey("retweeted_status")) {
/** weiboJson = mblog.getJSONObject("retweeted_status");
* 解析微博类型 //处理转发特有的
* //weiBoMassage.set
* @param jsonObject //源mid
* @param topic String rootMid = weiboJson.getString("mid");
* @return //源来源
*/ String rootSource = weiboJson.getString("source");
public static WeiBoMassage analysisWeiboMBlog(JSONObject jsonObject, String topic) { //源text
JSONObject mblog = jsonObject.getJSONObject("mblog"); String text = weiboJson.getString("text");
String type = mblog.getJSONObject("title").getString("text"); //解析
String card_type = jsonObject.getString("card_type"); org.jsoup.nodes.Document parse = Jsoup.parse(text);
Integer cardType = Integer.valueOf(card_type); String rootText = parse.text();
String show_type = jsonObject.getString("show_type"); //源用户id
Integer showType = Integer.valueOf(show_type); String rootId = weiboJson.getJSONObject("user").getString("id");
//点赞数 //源用户名
String attitudes_count = mblog.getString("attitudes_count"); String rootName = weiboJson.getJSONObject("user").getString("screen_name");
Long attitudeCount = null; //数据保存到对象中
if (attitudes_count.contains("万")) { weiBoMassage.setRoot_mid(rootMid);
String[] split = attitudes_count.split("万"); weiBoMassage.setRoot_id(rootId);
attitudeCount = Long.valueOf(split[0]) * 10000; weiBoMassage.setRoot_source(rootSource);
} else { weiBoMassage.setRoot_text(rootText);
attitudeCount = Long.valueOf(attitudes_count); weiBoMassage.setRoot_name(rootName);
} //转发为1
weiBoMassage.setForward(1);
//评论数 } else {
String comments_count = mblog.getString("comments_count"); weiboJson = mblog;
Long commentCount = null; }
if (comments_count.contains("万")) { List<String> pictureUrlList = new ArrayList();
String[] split = comments_count.split("万"); Long playCount = null;
commentCount = Long.valueOf(split[0]) * 10000; //获取播放量和图片链接
} else { if (weiboJson.getJSONArray("pic_ids").size() > 0) {
commentCount = Long.valueOf(comments_count); JSONArray jsonArray = weiboJson.getJSONArray("pics");
} for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
//转发数 pictureUrlList.add(picUrl);
String reposts_count = mblog.getString("reposts_count"); }
Long repostCount = null; } else if (weiboJson.containsKey("page_info")) {
if (reposts_count.contains("万")) { if (weiboJson.getJSONObject("page_info").containsKey("play_count")){
String[] split = reposts_count.split("万"); String play = weiboJson.getJSONObject("page_info").getString("play_count");
repostCount = Long.valueOf(split[0]) * 10000; if (play.contains("万")) {
} else { String[] split = play.split("万");
repostCount = Long.valueOf(reposts_count); playCount = Long.valueOf(split[0]) * 10000;
} }else if(play.contains("次")){
Date createTime = null; String[] split = play.split("次");
Date editTime = null; playCount = Long.valueOf(split[0]);
}
try { }
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("EEE MMM dd HH:mm:ss z yyyy", java.util.Locale.US); }
//创建时间 weiBoMassage.setPlayCount(playCount);
String created_at = mblog.getString("created_at"); weiBoMassage.setPictureUrlList(pictureUrlList);
return weiBoMassage;
createTime = simpleDateFormat.parse(created_at); }
//编辑时间
if (mblog.containsKey("edit_at")) {
String edit_at = mblog.getString("edit_at");
editTime = simpleDateFormat.parse(edit_at);
}
} catch (ParseException e) {
log.error("创建时间和编辑时间解析异常", e);
}
String mid = mblog.getString("mid");
//用户id
String userId = mblog.getJSONObject("user").getString("id");
//用户名
String userName = mblog.getJSONObject("user").getString("screen_name");
//来源
String source = mblog.getString("source");
//用户头像地址
String profileImageUrl = mblog.getJSONObject("user").getString("profile_image_url");
//内容
String content = null;
if (mblog.getString("text").contains("<")) {
String text = mblog.getString("text");
org.jsoup.nodes.Document parse = Jsoup.parse(text);
content = parse.text();
} else {
content = mblog.getString("text");
}
WeiBoMassage weiBoMassage = new WeiBoMassage(userId, content, userName, mid, createTime, editTime, cardType, showType,
repostCount, commentCount, attitudeCount, source, type, topic, profileImageUrl);
//默认不转发为0
weiBoMassage.setForward(0);
JSONObject weiboJson = null;
//微博实体 是否转发
if (mblog.containsKey("retweeted_status")) {
weiboJson = mblog.getJSONObject("retweeted_status");
//处理转发特有的
//weiBoMassage.set
//源mid
String rootMid = weiboJson.getString("mid");
//源来源
String rootSource = weiboJson.getString("source");
//源text
String text = weiboJson.getString("text");
//解析
org.jsoup.nodes.Document parse = Jsoup.parse(text);
String rootText = parse.text();
//源用户id
String rootId = weiboJson.getJSONObject("user").getString("id");
//源用户名
String rootName = weiboJson.getJSONObject("user").getString("screen_name");
//数据保存到对象中
weiBoMassage.setRoot_mid(rootMid);
weiBoMassage.setRoot_id(rootId);
weiBoMassage.setRoot_source(rootSource);
weiBoMassage.setRoot_text(rootText);
weiBoMassage.setRoot_name(rootName);
//转发为1
weiBoMassage.setForward(1);
} else {
weiboJson = mblog;
}
List<String> pictureUrlList = new ArrayList();
Long playCount = null;
//获取播放量和图片链接
if (weiboJson.getJSONArray("pic_ids").size() > 0) {
JSONArray jsonArray = weiboJson.getJSONArray("pics");
for (int i = 0; i < jsonArray.size(); i++) {
String picUrl = jsonArray.getJSONObject(i).getString("url");
pictureUrlList.add(picUrl);
}
} else if (weiboJson.containsKey("page_info")) {
if (weiboJson.getJSONObject("page_info").containsKey("play_count")) {
String play = weiboJson.getJSONObject("page_info").getString("play_count");
if (play.contains("万")) {
String[] split = play.split("万");
playCount = Long.valueOf(split[0]) * 10000;
} else if (play.contains("次")) {
String[] split = play.split("次");
playCount = Long.valueOf(split[0]);
}
}
}
weiBoMassage.setPlayCount(playCount);
weiBoMassage.setPictureUrlList(pictureUrlList);
return weiBoMassage;
}
// /** // /**
// * 微博更新历史数据 // * 微博更新历史数据
......
...@@ -208,16 +208,6 @@ public class HotSearchCacheDAO { ...@@ -208,16 +208,6 @@ public class HotSearchCacheDAO {
} }
if("微博热搜".equals(type)){ if("微博热搜".equals(type)){
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc); nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) {
nowDoc.put("classify",documentPC.get("分类"));
}
if (documentPC.containsKey("地区")) {
nowDoc.put("region", documentPC.get("地区"));
}
if (documentPC.containsKey("标签")) {
nowDoc.put("label", documentPC.get("标签"));
}
if(nowDoc.containsKey("topicLead")){ if(nowDoc.containsKey("topicLead")){
nowDoc.put("topicLead", nowDoc.getString("topicLead")); nowDoc.put("topicLead", nowDoc.getString("topicLead"));
} }
......
...@@ -42,15 +42,8 @@ public class WeiBoUserDao { ...@@ -42,15 +42,8 @@ public class WeiBoUserDao {
document.put("userName",weiBoUser.getUserName()); document.put("userName",weiBoUser.getUserName());
document.put("topic",weiBoUser.getTopic()); document.put("topic",weiBoUser.getTopic());
document.put("time",weiBoUser.getTime()); document.put("time",weiBoUser.getTime());
if (Objects.nonNull(weiBoUser.getType())){ document.put("followerCount",weiBoUser.getFollowerCount());
document.put("type",weiBoUser.getType()); document.put("profileImageUrl",weiBoUser.getProfileImageUrl());
}
if (Objects.nonNull(weiBoUser.getFollowerCount())){
document.put("followerCount",weiBoUser.getFollowerCount());
}
if (Objects.nonNull(weiBoUser.getProfileImageUrl())){
document.put("profileImageUrl",weiBoUser.getProfileImageUrl());
}
try { try {
mongoCollection.insertOne(document); mongoCollection.insertOne(document);
} catch (Exception e) { } catch (Exception e) {
......
...@@ -2,12 +2,17 @@ package com.zhiwei.searchhotcrawler.run; ...@@ -2,12 +2,17 @@ package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.core.proxy.ProxyFactory; import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig; import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.*; import com.zhiwei.searchhotcrawler.timer.*;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class HotSearchRun { public class HotSearchRun {
......
...@@ -9,6 +9,12 @@ import lombok.extern.log4j.Log4j2; ...@@ -9,6 +9,12 @@ import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.Response; import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
......
...@@ -4,9 +4,6 @@ package com.zhiwei.searchhotcrawler.test; ...@@ -4,9 +4,6 @@ package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.proxy.ProxyFactory; import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig; import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig; import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import java.text.ParseException; import java.text.ParseException;
public class HotSearchRunTest { public class HotSearchRunTest {
...@@ -20,7 +17,5 @@ public class HotSearchRunTest { ...@@ -20,7 +17,5 @@ public class HotSearchRunTest {
// new WeiboHotSearchRun().start(); // new WeiboHotSearchRun().start();
//快手热榜开始采集 //快手热榜开始采集
// new KuaiShouHotSearchRun().start(); // new KuaiShouHotSearchRun().start();
//百度热搜
new BaiduHotSearchRun().run();
} }
} }
package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.time.Duration;
import java.util.*;
/**
* @author ll
* @ClassName:TaoBaoHotSearchCrawler
* @Description:
* @date 2021年6月18日 下午16:33:31
*/
@Log4j2
public class TaoBaoHotSearchCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> taoBaoHotSearch(Date date) {
long time = new Date().getTime();
String signs="undefined&"+time+"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
//String signs="undefined&1624862377708&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = TaoBaoUtils.parsJSFunction(signs);
//String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="+time+"&sign="+sign+"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624929605260&sign=ada01d783dc9772d2f84124d293bac26&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Map<String, String> headerMap = new HashMap<>();
headerMap.put("cookie", "_m_h5_tk=975fb07b671f12a689d4ec36cf2e9047_1624937028814; _m_h5_tk_enc=ffb83d60b283eee5992d5e32429c2597;");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
System.out.println(htmlBody);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
if (htmlBody != null && htmlBody.contains("data")) {
return ansysData(htmlBody, date);
} else {
log.info("解析淘宝热搜时出现解析错误,页面结构有问题");
}
return Collections.emptyList();
}
private static List<HotSearchList> ansysData(String htmlBody, Date date) {
List<HotSearchList> list = new ArrayList<>();
try {
String sub = htmlBody.substring(htmlBody.indexOf("searchdoor"), htmlBody.indexOf("searchdoorFrom"));
String substring = sub.substring(sub.indexOf("showReminder") + 27, sub.indexOf("multi_bangdan_flag") - 2).trim();
JSONArray objects = JSONObject.parseArray(substring);
JSONArray jsonArray = objects.getJSONObject(0).getJSONObject("result").getJSONArray("text");
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject jsonObject = jsonArray.getJSONObject(i);
String name = jsonObject.getString("showtext");
String showmark = jsonObject.getString("showmark");
Integer rank = Integer.valueOf(showmark);
String searchtext = jsonObject.getString("searchtext");
String url = "https://s.m.taobao.com/h5?q=" + searchtext;
String tagText = jsonObject.getString("tagText");
Long count = 0L;
HotSearchList hotSearchList = new HotSearchList(url,name,count,true,rank, HotSearchType.淘宝热搜.name(),tagText,date);
list.add(hotSearchList);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误",e);
}
}
System.out.println(jsonArray.size());
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,数据不是json结构", e);
}
return list;
}
}
...@@ -56,18 +56,18 @@ public class BaiduHotSearchRun extends Thread{ ...@@ -56,18 +56,18 @@ public class BaiduHotSearchRun extends Thread{
// } // }
TipsUtils.addHotList("百度热搜",baiduList); TipsUtils.addHotList("百度热搜",baiduList);
log.info("百度风云榜采集结束........"); log.info("百度风云榜采集结束........");
// ZhiWeiTools.sleep(2000L); ZhiWeiTools.sleep(2000L);
// log.info("搜狗微信采集开始........"); log.info("搜狗微信采集开始........");
// List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(new Date()); List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(new Date());
// log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0)); log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
// TipsUtils.addHotList("搜狗微信热搜",sougouList); TipsUtils.addHotList("搜狗微信热搜",sougouList);
// log.info("搜狗微信采集结束........"); log.info("搜狗微信采集结束........");
// ZhiWeiTools.sleep(2000L); ZhiWeiTools.sleep(2000L);
// log.info("知乎话题采集开始........"); log.info("知乎话题采集开始........");
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(new Date()); List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(new Date());
// log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0)); log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList("知乎热搜",zhihuList); TipsUtils.addHotList("知乎热搜",zhihuList);
// log.info("知乎话题采集结束........"); log.info("知乎话题采集结束........");
} }
} }
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer; package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO; import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
......
...@@ -29,496 +29,496 @@ import java.util.*; ...@@ -29,496 +29,496 @@ import java.util.*;
@EnableScheduling @EnableScheduling
@EnableAsync @EnableAsync
public class GatherTimer { public class GatherTimer {
//
// private Logger logger = LoggerFactory.getLogger(GatherTimer.class); private Logger logger = LoggerFactory.getLogger(GatherTimer.class);
//
// private RedisDao redisDao = new RedisDao(); private RedisDao redisDao = new RedisDao();
// /** 知乎数码子分类 */ /** 知乎数码子分类 */
// private String DIGITAL = "digital"; private String DIGITAL = "digital";
// /** 知乎国际子分类 */ /** 知乎国际子分类 */
// private String FOCUS = "focus"; private String FOCUS = "focus";
// /** 知乎时事子分类 */ /** 知乎时事子分类 */
// private String DEPTH = "depth"; private String DEPTH = "depth";
//
//
// /** /**
// * 虎嗅热文推荐的采集 * 虎嗅热文推荐的采集
// */ */
// @Async(value = "myScheduler") @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ?") @Scheduled(cron = "0 * * * * ?")
// public void crawlerHuXiu() { public void crawlerHuXiu() {
// logger.info("虎嗅热文推荐开始采集..."); logger.info("虎嗅热文推荐开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date()); Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> huXiuList = HuXiuHotSearchCrawler.HuXiuHotArticleRecommended(date); List<HotSearchList> huXiuList = HuXiuHotSearchCrawler.HuXiuHotArticleRecommended(date);
// logger.info("{}, 虎嗅热文推荐此轮采集到的数据量为:{}", new Date(), Integer.valueOf(huXiuList != null ? huXiuList.size() : 0)); logger.info("{}, 虎嗅热文推荐此轮采集到的数据量为:{}", new Date(), Integer.valueOf(huXiuList != null ? huXiuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.虎嗅热文推荐.name(), huXiuList); TipsUtils.addHotList(HotSearchType.虎嗅热文推荐.name(), huXiuList);
// logger.info("虎嗅热文推荐采集结束..."); logger.info("虎嗅热文推荐采集结束...");
//
// /** /**
// * 36氪人气榜的采集 * 36氪人气榜的采集
// */ */
// logger.info("36氪人气榜开始采集..."); logger.info("36氪人气榜开始采集...");
// List<HotSearchList> list36Kr = HotSearch36KrCrawler.hotSearch36Kr(date); List<HotSearchList> list36Kr = HotSearch36KrCrawler.hotSearch36Kr(date);
// logger.info("{}, 36氪人气榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list36Kr != null ? list36Kr.size() : 0)); logger.info("{}, 36氪人气榜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list36Kr != null ? list36Kr.size() : 0));
// TipsUtils.addHotList(HotSearchType.人气榜36氪.name(), list36Kr); TipsUtils.addHotList(HotSearchType.人气榜36.name(), list36Kr);
// logger.info("36氪人气榜采集结束..."); logger.info("36氪人气榜采集结束...");
// } }
//
// /** /**
// * 微博热搜的采集 * 微博热搜的采集
// */ */
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerWeiBo(){
logger.info("微博热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> weiboList = WeiboHotSearchCrawler.weiboHotSearchByPhone(date);
logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), weiboList != null ? weiboList.size() : 0);
TipsUtils.addHotList(HotSearchType.微博热搜.name(),weiboList);
logger.info("微博热搜采集结束...");
}
/**
* 微博热搜导语,阅读量,讨论量更新
*/
@Async(value = "myScheduler")
@Scheduled(cron = "45 0/10 * * * ? ")
public void updateWeiBo(){
logger.info("微博热搜导语更新...");
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
Set<String> hotSearchIdSet = redisDao.getRedisSetData(RedisConfig.WEIBO_HOTSEARCHIDS);
redisDao.removeRedis(RedisConfig.WEIBO_HOTSEARCHIDS);
Iterator<String> hotSearchIterator = hotSearchIdSet.iterator();
while (hotSearchIterator.hasNext()){
String id = hotSearchIterator.next();
Document document = hotSearchCacheDAO.getHotSearchById(id);
if(document != null){
document = WeiboHotSearchCrawler.weiboUpdate(document);
if(document.containsKey("topicLead") || document.containsKey("readCount") || document.containsKey("discussCount")) {
hotSearchCacheDAO.updateWeibo(document, id);
}
ZhiWeiTools.sleep(3000L);
}
}
logger.info("微博热搜导语更新结束...");
}
/**
* 今日头条热搜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerTouTiao(){
logger.info("今日头条热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date);
logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
logger.info("今日头条热搜采集结束...");
logger.info("今日头条热搜详情趋势阅读量更新...");
TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
}
/**
* 百度热搜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerBaiDu(){
logger.info("百度热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(date);
logger.info("{}, 百度热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
TipsUtils.addHotList(HotSearchType.百度热搜.name(),baiduList);
logger.info("百度热搜采集结束...");
}
/**
* 抖音热搜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerDouYin(){
logger.info("抖音热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> douyinList = DouyinHotSearchCrawler.getMobileDouyinHotList(date);
logger.info("{}, 抖音热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(douyinList != null ? douyinList.size() : 0));
TipsUtils.addHotList(HotSearchType.抖音热搜.name(),douyinList);
logger.info("抖音热搜采集结束...");
}
/**
* 抖音链接的更新
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0/5 * * * ? ")
public void updateDouYinUrl(){
logger.info("抖音链接更新开始...");
HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
List<HotSearchList> douyinList = DouyinHotSearchCrawler.list;
if(douyinList!=null && douyinList.size()>0){
for(int i=0; i<douyinList.size(); i++){
String name = douyinList.get(i).getName();
String id = name+"_"+douyinList.get(i).getType();
String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
if(url != null) {
Document document = new Document();
document.put("id", id);
document.put("url", url);
hotSearchCacheDAO.updateDouyinUrl(document);
}
}
logger.info("抖音链接更新结束");
}else{
logger.info("抖音链接更新失败,抖音热搜列表获取为空。");
}
}
/**
* 知乎热榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerZhihu(){
logger.info("知乎热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(date);
logger.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
TipsUtils.addHotList(HotSearchType.知乎热搜.name(),zhihuList);
logger.info("知乎热搜采集结束...");
}
/**
* 搜狗微信热词的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerWeChat(){
logger.info("搜狗微信热词开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(date);
logger.info("{}, 搜狗微信热词采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList(HotSearchType.搜狗微信热搜.name(),list);
logger.info("搜狗微信热词采集结束...");
}
/**
* 搜狗微信热搜的采集(app端采集链接)
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void ceawlerSougouHotData(){
logger.info("搜狗微信热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = SougoHotSearchCrawler.sougouHotDataCrawler(date);
logger.info("{}, 搜狗微信热搜此轮采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
TipsUtils.addHotList(HotSearchType.搜狗微信客户端热搜.name(),list);
logger.info("搜狗微信热搜采集结束...");
}
/**
* 微博话题的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerWeiBoTopic(){
logger.info("微博话题开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(date);
logger.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList(HotSearchType.微博话题.name(),list);
logger.info("微博话题采集结束...");
}
/**
* 腾讯新闻热点的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerTengXun(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = TengXunCrawler.getTengXunHotList(date);
TipsUtils.addHotList(HotSearchType.腾讯新闻.name(),list);
}
/**
* 新浪热点的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerXinLangHotSpot(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSpot(date);
TipsUtils.addHotList(HotSearchType.新浪热点.name(),list);
}
/**
* 新浪热榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerXinLangHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSearch(date);
TipsUtils.addHotList(HotSearchType.新浪热榜.name(),list);
}
/**
* 网易新闻热榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerWangYiHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = WangYiHotSearchCrawler.getWangYiHotSearch(date);
TipsUtils.addHotList(HotSearchType.网易热榜.name(),list);
}
/**
* 网易新闻跟帖热议的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerWangYiHotComment(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = WangYiHotSearchCrawler.getWangYicomment(date);
TipsUtils.addHotList(HotSearchType.网易跟帖热议.name(),list);
}
/**
* 凤凰新闻热榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "10 * * * * ? ")
public void crawlerFengHuangHotData(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotData(date);
TipsUtils.addHotList(HotSearchType.凤凰新闻热榜.name(),list);
}
/**
* 凤凰新闻热搜的采集
*/
// @Async(value = "myScheduler") // @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ") // @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWeiBo(){ public void crawlerFengHuangHotSearch(){
// logger.info("微博热搜开始采集..."); Date date = DateUtils.getMillSecondTime(new Date());
// Date date = DateUtils.getMillSecondTime(new Date()); List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotSearch(date);
// List<HotSearchList> weiboList = WeiboHotSearchCrawler.weiboHotSearchByPhone(date); TipsUtils.addHotList(HotSearchType.凤凰新闻热搜.name(),list);
// logger.info("{}, 微博此轮采集到的数据量为:{}", new Date(), weiboList != null ? weiboList.size() : 0); }
// TipsUtils.addHotList(HotSearchType.微博热搜.name(),weiboList);
// logger.info("微博热搜采集结束..."); /**
// } * 腾讯较真辟谣榜采集
// */
// /** @Async(value = "myScheduler")
// * 微博热搜导语,阅读量,讨论量更新 @Scheduled(cron = "10 * * * * ? ")
// */ public void crawlerTengXunVerificationHotSearch(){
logger.info("{},腾讯较真辟谣榜开始采集", new Date());
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = TengXunCrawler.getTengXunVerificationList(date);
logger.info("腾讯较真辟谣榜本轮采集数量:{}",list.size());
TipsUtils.addHotList(HotSearchType.腾讯较真榜.name(), list);
logger.info("{},腾讯较真辟谣榜采集结束", new Date());
}
/**
* 搜狐话题的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerSouHuTopic(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = SouhuTopicCrawler.getSouhuTopic(date);
TipsUtils.addHotList(HotSearchType.搜狐话题.name(),list);
}
/**
* 知乎热搜话题的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhihuHotTopic(){
logger.info("知乎热搜话题开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(date);
logger.info("{}, 知乎热搜话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),list);
logger.info("知乎热搜话题采集结束...");
}
/**
* 微博预热榜的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerWeiBoPreheat(){
logger.info("微博预热榜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = WeiboHotSearchCrawler.weiboPreheatSearch(date);
logger.info("{},微博预热榜此轮采集到的数据量为:{}", new Date(),Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList(HotSearchType.微博预热榜.name(),list);
logger.info("微博预热榜采集结束...");
}
/**
* 知乎热搜数码分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuDigital(){
this.crawlerZhiHuChild(DIGITAL);
}
/**
* 知乎热搜国际分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuFocus(){
this.crawlerZhiHuChild(FOCUS);
}
/**
* 知乎热搜时事分类采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "20 * * * * ? ")
public void crawlerZhiHuDepth(){
this.crawlerZhiHuChild(DEPTH);
}
/**
* maimai采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 0/30 * * * ? ")
public void crawlerMaiMaiHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
int i=0;
while (list.size()==0 && i<10){
ZhiWeiTools.sleep(5000L);
list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
i++;
}
TipsUtils.addHotList(HotSearchType.脉脉热榜.name(),list);
}
/**
* B站排行榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotSearch(){
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
}
/**
* B站热搜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "30 * * * * ? ")
public void crawlerBilibiliHotData() {
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> list = BililiCrawler.getBiHotData(date);
TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
}
/**
* 微博超话的采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 0 0/3 * * ? ")
public void crawlerWeiBoSuperTopic(){
logger.info("微博超话采集开始........");
Date date = DateUtils.getMillSecondTime(new Date());
WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
List<Document> data = new ArrayList<>();
for(WeiboSuperTopic topic : list){
logger.info("topic::::{}", topic);
Document doc = new Document();
doc.put("_id", topic.getId());
doc.put("name", topic.getTopicName());
doc.put("rank", topic.getRank());
doc.put("score_num", topic.getScore());
doc.put("fensi_num", topic.getFensi());
doc.put("post_num", topic.getPostNum());
doc.put("type", topic.getType());
doc.put("day", topic.getDay());
doc.put("time", topic.getTime());
doc.put("url", topic.getUrl());
data.add(doc);
}
weiboTopicDAO.addTopicList(data);
logger.info("微博话题采集结束........");
}
// @Async(value = "myScheduler") // @Async(value = "myScheduler")
// @Scheduled(cron = "45 0/10 * * * ? ") // @Scheduled(cron = "0 05 09 * * ? ")
// public void updateWeiBo(){ // public void updateWeiboHistory(){
// logger.info("微博热搜导语更新...");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO(); // HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// Set<String> hotSearchIdSet = redisDao.getRedisSetData(RedisConfig.WEIBO_HOTSEARCHIDS); // List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
// redisDao.removeRedis(RedisConfig.WEIBO_HOTSEARCHIDS); // int i=0;
// Iterator<String> hotSearchIterator = hotSearchIdSet.iterator(); // for (Document document : documentList){
// while (hotSearchIterator.hasNext()){ // document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
// String id = hotSearchIterator.next();
// Document document = hotSearchCacheDAO.getHotSearchById(id);
// if(document != null){ // if(document != null){
// document = WeiboHotSearchCrawler.weiboUpdate(document); // hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
// if(document.containsKey("topicLead") || document.containsKey("readCount") || document.containsKey("discussCount")) { // ZhiWeiTools.sleep(500L);
// hotSearchCacheDAO.updateWeibo(document, id);
// }
// ZhiWeiTools.sleep(3000L);
// }
// }
// logger.info("微博热搜导语更新结束...");
// }
//
// /**
// * 今日头条热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerTouTiao(){
// logger.info("今日头条热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> toutiaoList = ToutiaoHotSearchCrawler.toutiaoHotSearchByPhone(date);
// logger.info("{}, 今日头条此轮采集到的数据量为:{}", new Date(), toutiaoList != null ? toutiaoList.size() : 0);
// TipsUtils.addHotList(HotSearchType.今日头条热搜.name(),toutiaoList);
// logger.info("今日头条热搜采集结束...");
// logger.info("今日头条热搜详情趋势阅读量更新...");
// TouTiaoExecutor.countTouTiaoReadCount(toutiaoList);
// }
//
// /**
// * 百度热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerBaiDu(){
// logger.info("百度热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> baiduList = BaiDuHotSearchCrawler.baiduHotSearch(date);
// logger.info("{}, 百度热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(baiduList != null ? baiduList.size() : 0));
// TipsUtils.addHotList(HotSearchType.百度热搜.name(),baiduList);
// logger.info("百度热搜采集结束...");
// }
//
// /**
// * 抖音热搜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerDouYin(){
// logger.info("抖音热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.getMobileDouyinHotList(date);
// logger.info("{}, 抖音热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(douyinList != null ? douyinList.size() : 0));
// TipsUtils.addHotList(HotSearchType.抖音热搜.name(),douyinList);
// logger.info("抖音热搜采集结束...");
// }
//
// /**
// * 抖音链接的更新
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0/5 * * * ? ")
// public void updateDouYinUrl(){
// logger.info("抖音链接更新开始...");
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
// List<HotSearchList> douyinList = DouyinHotSearchCrawler.list;
// if(douyinList!=null && douyinList.size()>0){
// for(int i=0; i<douyinList.size(); i++){
// String name = douyinList.get(i).getName();
// String id = name+"_"+douyinList.get(i).getType();
// String url = DouyinHotSearchCrawler.getDouyinUrl("https://aweme-hl.snssdk.com/aweme/v1/hot/search/video/list/?hotword="+name);
// if(url != null) {
// Document document = new Document();
// document.put("id", id);
// document.put("url", url);
// hotSearchCacheDAO.updateDouyinUrl(document);
// }
// } // }
// logger.info("抖音链接更新结束");
// }else{
// logger.info("抖音链接更新失败,抖音热搜列表获取为空。");
// }
// }
//
// /**
// * 知乎热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerZhihu(){
// logger.info("知乎热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(date);
// logger.info("{}, 知乎热搜此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜.name(),zhihuList);
// logger.info("知乎热搜采集结束...");
// }
//
// /**
// * 搜狗微信热词的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeChat(){
// logger.info("搜狗微信热词开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougoHotSearch(date);
// logger.info("{}, 搜狗微信热词采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.搜狗微信热搜.name(),list);
// logger.info("搜狗微信热词采集结束...");
// }
//
// /**
// * 搜狗微信热搜的采集(app端采集链接)
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void ceawlerSougouHotData(){
// logger.info("搜狗微信热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SougoHotSearchCrawler.sougouHotDataCrawler(date);
// logger.info("{}, 搜狗微信热搜此轮采集到的数据量为:{}", new Date(), list != null ? list.size() : 0);
// TipsUtils.addHotList(HotSearchType.搜狗微信客户端热搜.name(),list);
// logger.info("搜狗微信热搜采集结束...");
// }
//
// /**
// * 微博话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerWeiBoTopic(){
// logger.info("微博话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboTopicCrawler.startCrawlerByPhone(date);
// logger.info("{}, 微博话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博话题.name(),list);
// logger.info("微博话题采集结束...");
// }
//
// /**
// * 腾讯新闻热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXun(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunHotList(date);
// TipsUtils.addHotList(HotSearchType.腾讯新闻.name(),list);
// }
//
// /**
// * 新浪热点的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSpot(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSpot(date);
// TipsUtils.addHotList(HotSearchType.新浪热点.name(),list);
// }
//
// /**
// * 新浪热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerXinLangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = XinLangHotSearchCrawler.getXinLangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.新浪热榜.name(),list);
// }
//
// /**
// * 网易新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYiHotSearch(date);
// TipsUtils.addHotList(HotSearchType.网易热榜.name(),list);
// }
//
// /**
// * 网易新闻跟帖热议的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerWangYiHotComment(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WangYiHotSearchCrawler.getWangYicomment(date);
// TipsUtils.addHotList(HotSearchType.网易跟帖热议.name(),list);
// }
//
// /**
// * 凤凰新闻热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotData(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotData(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热榜.name(),list);
// }
//
// /**
// * 凤凰新闻热搜的采集
// */
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerFengHuangHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = FengHuangSearchCrawler.getFengHuangHotSearch(date);
// TipsUtils.addHotList(HotSearchType.凤凰新闻热搜.name(),list);
// }
//
// /**
// * 腾讯较真辟谣榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "10 * * * * ? ")
// public void crawlerTengXunVerificationHotSearch(){
// logger.info("{},腾讯较真辟谣榜开始采集", new Date());
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = TengXunCrawler.getTengXunVerificationList(date);
// logger.info("腾讯较真辟谣榜本轮采集数量:{}",list.size());
// TipsUtils.addHotList(HotSearchType.腾讯较真榜.name(), list);
// logger.info("{},腾讯较真辟谣榜采集结束", new Date());
// }
//
// /**
// * 搜狐话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerSouHuTopic(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = SouhuTopicCrawler.getSouhuTopic(date);
// TipsUtils.addHotList(HotSearchType.搜狐话题.name(),list);
// }
//
// /**
// * 知乎热搜话题的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhihuHotTopic(){
// logger.info("知乎热搜话题开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = ZhihuTopicSearchCrawler.getZhihuTopicSearch(date);
// logger.info("{}, 知乎热搜话题此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.知乎热搜榜单.name(),list);
// logger.info("知乎热搜话题采集结束...");
// }
//
// /**
// * 微博预热榜的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerWeiBoPreheat(){
// logger.info("微博预热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = WeiboHotSearchCrawler.weiboPreheatSearch(date);
// logger.info("{},微博预热榜此轮采集到的数据量为:{}", new Date(),Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(HotSearchType.微博预热榜.name(),list);
// logger.info("微博预热榜采集结束...");
// }
//
// /**
// * 知乎热搜数码分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDigital(){
// this.crawlerZhiHuChild(DIGITAL);
// }
//
// /**
// * 知乎热搜国际分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuFocus(){
// this.crawlerZhiHuChild(FOCUS);
// }
//
// /**
// * 知乎热搜时事分类采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "20 * * * * ? ")
// public void crawlerZhiHuDepth(){
// this.crawlerZhiHuChild(DEPTH);
// }
//
// /**
// * maimai采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 0/30 * * * ? ")
// public void crawlerMaiMaiHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// int i=0;
// while (list.size()==0 && i<10){
// ZhiWeiTools.sleep(5000L);
// list = MaiMaiHotSearchCrawler.getMaiMaiHotData(date);
// i++; // i++;
// logger.info("更新进度:{}",i*100/documentList.size());
// } // }
// TipsUtils.addHotList(HotSearchType.脉脉热榜.name(),list); // logger.info("更新结束");
// }
//
// /**
// * B站排行榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotSearch(){
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list =BililiCrawler.getBilibiliHotSearch(date);
// TipsUtils.addHotList(HotSearchType.B站排行榜.name(),list);
// }
//
// /**
// * B站热搜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "30 * * * * ? ")
// public void crawlerBilibiliHotData() {
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> list = BililiCrawler.getBiHotData(date);
// TipsUtils.addHotList(HotSearchType.B站热搜.name(),list);
// }
//
// /**
// * 微博超话的采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 0 0/3 * * ? ")
// public void crawlerWeiBoSuperTopic(){
// logger.info("微博超话采集开始........");
// Date date = DateUtils.getMillSecondTime(new Date());
// WeiboSuperTopicDAO weiboTopicDAO = new WeiboSuperTopicDAO();
// List<WeiboSuperTopic> list = WeiboSuperTopicCrawler.startCrawler();
// logger.info("{}, 微博超话此轮采集到的数据量为:{}", new Date(), Integer.valueOf(list != null ? list.size() : 0));
// List<Document> data = new ArrayList<>();
// for(WeiboSuperTopic topic : list){
// logger.info("topic::::{}", topic);
// Document doc = new Document();
// doc.put("_id", topic.getId());
// doc.put("name", topic.getTopicName());
// doc.put("rank", topic.getRank());
// doc.put("score_num", topic.getScore());
// doc.put("fensi_num", topic.getFensi());
// doc.put("post_num", topic.getPostNum());
// doc.put("type", topic.getType());
// doc.put("day", topic.getDay());
// doc.put("time", topic.getTime());
// doc.put("url", topic.getUrl());
// data.add(doc);
// }
// weiboTopicDAO.addTopicList(data);
// logger.info("微博话题采集结束........");
// }
//
//
//// @Async(value = "myScheduler")
//// @Scheduled(cron = "0 05 09 * * ? ")
//// public void updateWeiboHistory(){
//// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
//// List<Document> documentList = hotSearchCacheDAO.getHotSearchList();
//// int i=0;
//// for (Document document : documentList){
//// document = WeiboHotSearchCrawler.updateWeiBoTopic(document);
//// if(document != null){
//// hotSearchCacheDAO.updateWeibo(document,document.getString("_id"));
//// ZhiWeiTools.sleep(500L);
//// }
//// i++;
//// logger.info("更新进度:{}",i*100/documentList.size());
//// }
//// logger.info("更新结束");
//// }
//
// /**
// * 知乎子类采集函数
// * @param type
// */
// private void crawlerZhiHuChild(String type){
// Date date = DateUtils.getMillSecondTime(new Date());
// String name = this.getTypeName(type);
// logger.info("知乎{}话题热榜采集开始...", name);
// List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(type,name,date);
// logger.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
// TipsUtils.addHotList(name,list);
// logger.info("知乎{}话题热榜采集结束...", name);
// }
//
// private String getTypeName(String type){
// String name;
// switch (type) {
// case "digital":
// name = "数码";
// break;
// case "focus":
// name = "国际";
// break;
// case "depth":
// name = "时事";
// break;
// default:
// name = "";
// }
// return name;
// }
// /**
// *快手热榜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerKuaiShou(){
// logger.info("快手热榜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> kuaiShouList = KuaiShouHotSearchCrawler.KuaiShouHotSearchCrawler(date);
// logger.info("{}, 快手此轮采集到的数据量为:{}", new Date(), kuaiShouList != null ? kuaiShouList.size() : 0);
// TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
// logger.info("快手热榜采集结束...");
// } // }
/**
* 知乎子类采集函数
* @param type
*/
private void crawlerZhiHuChild(String type){
Date date = DateUtils.getMillSecondTime(new Date());
String name = this.getTypeName(type);
logger.info("知乎{}话题热榜采集开始...", name);
List<HotSearchList> list = ZhihuChildHotSearchCrawler.getZhihuTopicSearch(type,name,date);
logger.info("{}, 知乎{}话题此轮采集到的数据量为:{}", new Date(),name, Integer.valueOf(list != null ? list.size() : 0));
TipsUtils.addHotList(name,list);
logger.info("知乎{}话题热榜采集结束...", name);
}
private String getTypeName(String type){
String name;
switch (type) {
case "digital":
name = "数码";
break;
case "focus":
name = "国际";
break;
case "depth":
name = "时事";
break;
default:
name = "";
}
return name;
}
/**
*快手热榜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerKuaiShou(){
logger.info("快手热榜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> kuaiShouList = KuaiShouHotSearchCrawler.KuaiShouHotSearchCrawler(date);
logger.info("{}, 快手此轮采集到的数据量为:{}", new Date(), kuaiShouList != null ? kuaiShouList.size() : 0);
TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
logger.info("快手热榜采集结束...");
}
} }
package com.zhiwei.searchhotcrawler.util;
import lombok.extern.log4j.Log4j2;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import java.io.FileReader;
@Log4j2
public class TaoBaoUtils {
public static String parsJSFunction(String sign) {
String scriptResult ="";//脚本的执行结果
ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");//1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎
try {
//2.引擎读取 脚本字符串
//engine.eval(new StringReader(routeScript));
//如果js存在文件里
Resource aesJs = new ClassPathResource("taobao.js");
engine.eval(new FileReader(aesJs.getFile()));
//3.将引擎转换为Invocable,这样才可以掉用js的方法
Invocable invocable = (Invocable) engine;
//4.使用 invocable.invokeFunction掉用js脚本里的方法,第一個参数为方法名,后面的参数为被调用的js方法的入参
scriptResult = (String) invocable.invokeFunction("h", sign);
}catch(Exception e){
log.error("Error executing script: ",e.getMessage());
}
return scriptResult;
}
}
...@@ -4,11 +4,11 @@ ...@@ -4,11 +4,11 @@
#线上old #线上old
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1 #mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#线上new #线上new
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1 mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#local #local
#mongoLocalUri=mongodb://192.168.0.81:27017/istarshine_data #mongoLocalUri=mongodb://192.168.0.81:27017/istarshine_data
#service #service
mongoUri=mongodb://127.0.0.1:27017/ #mongoUri=mongodb://127.0.0.1:27017/
#备用库 #备用库
#mongoUri=mongodb://202.107.192.94:37017/hot_search_list #mongoUri=mongodb://202.107.192.94:37017/hot_search_list
#mongoUri=mongodb://192.168.0.66:27017/ #mongoUri=mongodb://192.168.0.66:27017/
......
#registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000 registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
#group=hangzhou group=hangzhou
######################################################## ########################################################
registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000 #registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
group=local #group=local
redis.host=127.0.0.1 #redis.host=115.236.59.91
redis.port=6379 #redis.port=7382
#redis.password= #redis.password=
#redis #redis
#redis.host = 192.168.0.39 #redis.host = 192.168.0.39
#redis.port = 7382 #redis.port = 7382
#redis.database = 3 #redis.database = 3
#redis #redis
#redis.host = 192.168.0.39 redis.host = 192.168.0.39
#redis.port = 6379 redis.port = 6379
redis.database = 1 redis.database = 1
#maxIdle #maxIdle
......
function h(a) {
function b(a, b) {
return a << b | a >>> 32 - b
}
function c(a, b) {
var c, d, e, f, g;
return e = 2147483648 & a,
f = 2147483648 & b,
c = 1073741824 & a,
d = 1073741824 & b,
g = (1073741823 & a) + (1073741823 & b),
c & d ? 2147483648 ^ g ^ e ^ f : c | d ? 1073741824 & g ? 3221225472 ^ g ^ e ^ f : 1073741824 ^ g ^ e ^ f : g ^ e ^ f
}
function d(a, b, c) {
return a & b | ~a & c
}
function e(a, b, c) {
return a & c | b & ~c
}
function f(a, b, c) {
return a ^ b ^ c
}
function g(a, b, c) {
return b ^ (a | ~c)
}
function h(a, e, f, g, h, i, j) {
return a = c(a, c(c(d(e, f, g), h), j)),
c(b(a, i), e)
}
function i(a, d, f, g, h, i, j) {
return a = c(a, c(c(e(d, f, g), h), j)),
c(b(a, i), d)
}
function j(a, d, e, g, h, i, j) {
return a = c(a, c(c(f(d, e, g), h), j)),
c(b(a, i), d)
}
function k(a, d, e, f, h, i, j) {
return a = c(a, c(c(g(d, e, f), h), j)),
c(b(a, i), d)
}
function l(a) {
for (var b, c = a.length, d = c + 8, e = (d - d % 64) / 64, f = 16 * (e + 1), g = new Array(f - 1), h = 0, i = 0; c > i;)
b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | a.charCodeAt(i) << h,
i++;
return b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | 128 << h,
g[f - 2] = c << 3,
g[f - 1] = c >>> 29,
g
}
function m(a) {
var b, c, d = "", e = "";
for (c = 0; 3 >= c; c++)
b = a >>> 8 * c & 255,
e = "0" + b.toString(16),
d += e.substr(e.length - 2, 2);
return d
}
function n(a) {
a = a.replace(/\r\n/g, "\n");
for (var b = "", c = 0; c < a.length; c++) {
var d = a.charCodeAt(c);
128 > d ? b += String.fromCharCode(d) : d > 127 && 2048 > d ? (b += String.fromCharCode(d >> 6 | 192),
b += String.fromCharCode(63 & d | 128)) : (b += String.fromCharCode(d >> 12 | 224),
b += String.fromCharCode(d >> 6 & 63 | 128),
b += String.fromCharCode(63 & d | 128))
}
return b
}
var o, p, q, r, s, t, u, v, w, x = [], y = 7, z = 12, A = 17, B = 22, C = 5, D = 9, E = 14, F = 20, G = 4,
H = 11, I = 16, J = 23, K = 6, L = 10, M = 15, N = 21;
for (a = n(a),
x = l(a),
t = 1732584193,
u = 4023233417,
v = 2562383102,
w = 271733878,
o = 0; o < x.length; o += 16)
p = t,
q = u,
r = v,
s = w,
t = h(t, u, v, w, x[o + 0], y, 3614090360),
w = h(w, t, u, v, x[o + 1], z, 3905402710),
v = h(v, w, t, u, x[o + 2], A, 606105819),
u = h(u, v, w, t, x[o + 3], B, 3250441966),
t = h(t, u, v, w, x[o + 4], y, 4118548399),
w = h(w, t, u, v, x[o + 5], z, 1200080426),
v = h(v, w, t, u, x[o + 6], A, 2821735955),
u = h(u, v, w, t, x[o + 7], B, 4249261313),
t = h(t, u, v, w, x[o + 8], y, 1770035416),
w = h(w, t, u, v, x[o + 9], z, 2336552879),
v = h(v, w, t, u, x[o + 10], A, 4294925233),
u = h(u, v, w, t, x[o + 11], B, 2304563134),
t = h(t, u, v, w, x[o + 12], y, 1804603682),
w = h(w, t, u, v, x[o + 13], z, 4254626195),
v = h(v, w, t, u, x[o + 14], A, 2792965006),
u = h(u, v, w, t, x[o + 15], B, 1236535329),
t = i(t, u, v, w, x[o + 1], C, 4129170786),
w = i(w, t, u, v, x[o + 6], D, 3225465664),
v = i(v, w, t, u, x[o + 11], E, 643717713),
u = i(u, v, w, t, x[o + 0], F, 3921069994),
t = i(t, u, v, w, x[o + 5], C, 3593408605),
w = i(w, t, u, v, x[o + 10], D, 38016083),
v = i(v, w, t, u, x[o + 15], E, 3634488961),
u = i(u, v, w, t, x[o + 4], F, 3889429448),
t = i(t, u, v, w, x[o + 9], C, 568446438),
w = i(w, t, u, v, x[o + 14], D, 3275163606),
v = i(v, w, t, u, x[o + 3], E, 4107603335),
u = i(u, v, w, t, x[o + 8], F, 1163531501),
t = i(t, u, v, w, x[o + 13], C, 2850285829),
w = i(w, t, u, v, x[o + 2], D, 4243563512),
v = i(v, w, t, u, x[o + 7], E, 1735328473),
u = i(u, v, w, t, x[o + 12], F, 2368359562),
t = j(t, u, v, w, x[o + 5], G, 4294588738),
w = j(w, t, u, v, x[o + 8], H, 2272392833),
v = j(v, w, t, u, x[o + 11], I, 1839030562),
u = j(u, v, w, t, x[o + 14], J, 4259657740),
t = j(t, u, v, w, x[o + 1], G, 2763975236),
w = j(w, t, u, v, x[o + 4], H, 1272893353),
v = j(v, w, t, u, x[o + 7], I, 4139469664),
u = j(u, v, w, t, x[o + 10], J, 3200236656),
t = j(t, u, v, w, x[o + 13], G, 681279174),
w = j(w, t, u, v, x[o + 0], H, 3936430074),
v = j(v, w, t, u, x[o + 3], I, 3572445317),
u = j(u, v, w, t, x[o + 6], J, 76029189),
t = j(t, u, v, w, x[o + 9], G, 3654602809),
w = j(w, t, u, v, x[o + 12], H, 3873151461),
v = j(v, w, t, u, x[o + 15], I, 530742520),
u = j(u, v, w, t, x[o + 2], J, 3299628645),
t = k(t, u, v, w, x[o + 0], K, 4096336452),
w = k(w, t, u, v, x[o + 7], L, 1126891415),
v = k(v, w, t, u, x[o + 14], M, 2878612391),
u = k(u, v, w, t, x[o + 5], N, 4237533241),
t = k(t, u, v, w, x[o + 12], K, 1700485571),
w = k(w, t, u, v, x[o + 3], L, 2399980690),
v = k(v, w, t, u, x[o + 10], M, 4293915773),
u = k(u, v, w, t, x[o + 1], N, 2240044497),
t = k(t, u, v, w, x[o + 8], K, 1873313359),
w = k(w, t, u, v, x[o + 15], L, 4264355552),
v = k(v, w, t, u, x[o + 6], M, 2734768916),
u = k(u, v, w, t, x[o + 13], N, 1309151649),
t = k(t, u, v, w, x[o + 4], K, 4149444226),
w = k(w, t, u, v, x[o + 11], L, 3174756917),
v = k(v, w, t, u, x[o + 2], M, 718787259),
u = k(u, v, w, t, x[o + 9], N, 3951481745),
t = c(t, p),
u = c(u, q),
v = c(v, r),
w = c(w, s);
var O = m(t) + m(u) + m(v) + m(w);
return O.toLowerCase()
}
\ No newline at end of file
...@@ -21,6 +21,6 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; ...@@ -21,6 +21,6 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class) @RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = @ContextConfiguration(locations =
{ "classpath:applicationContext.xml" }) { "classpath:applicationContext.xml" })
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests { public abstract class ObjectTest extends AbstractJUnit4SpringContextTests
{
} }
...@@ -14,7 +14,6 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler; ...@@ -14,7 +14,6 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate; import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.searchhotcrawler.test.KuaiShouHotSearchCrawlerTest; import com.zhiwei.searchhotcrawler.test.KuaiShouHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.test.TaoBaoHotSearchCrawlerTest; import com.zhiwei.searchhotcrawler.test.TaoBaoHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import com.zhiwei.searchhotcrawler.util.TipsUtils; import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import okhttp3.Request; import okhttp3.Request;
...@@ -104,28 +103,27 @@ public class HotSearchTest { ...@@ -104,28 +103,27 @@ public class HotSearchTest {
} }
} }
ad(document); ad(document);
System.out.println(document); System.out.println(document);
} }
private void ad(Document nowDoc) { private void ad(Document nowDoc) {
MongoCollection collection = MongoDBTemplate.getCollection(DBConfig.dbName, DBConfig.searchCacheCollName); MongoCollection collection = MongoDBTemplate.getCollection(DBConfig.dbName, DBConfig.searchCacheCollName);
if (nowDoc.containsKey("topicLead")) { if(nowDoc.containsKey("topicLead")){
nowDoc.put("topicLead", nowDoc.getString("topicLead")); nowDoc.put("topicLead", nowDoc.getString("topicLead"));
} }
if (nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) { if(nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount")) ? Long.valueOf(nowDoc.get("readCount").toString()) : null); nowDoc.put("readCount", nonNull(nowDoc.get("readCount"))?Long.valueOf(nowDoc.get("readCount").toString()):null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount")) ? Long.valueOf(nowDoc.get("discussCount").toString()) : null); nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount"))?Long.valueOf(nowDoc.get("discussCount").toString()):null);
} }
if (nowDoc.containsKey("pictureUrl")) { if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl", nowDoc.getString("pictureUrl")); nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl"));
} }
if (nowDoc.containsKey("downtext")) { if (nowDoc.containsKey("downtext")) {
nowDoc.put("downtext", nowDoc.getString("downtext")); nowDoc.put("downtext",nowDoc.getString("downtext"));
} }
collection.insertOne(nowDoc); collection.insertOne(nowDoc);
} }
/** /**
* 测试淘宝热搜采集 * 测试淘宝热搜采集
*/ */
...@@ -156,21 +154,10 @@ public class HotSearchTest { ...@@ -156,21 +154,10 @@ public class HotSearchTest {
List<HotSearchList> hotSearchLists = BaiDuHotSearchCrawler.baiduHotSearch(new Date()); List<HotSearchList> hotSearchLists = BaiDuHotSearchCrawler.baiduHotSearch(new Date());
System.out.println(hotSearchLists); System.out.println(hotSearchLists);
System.out.println(hotSearchLists.size()); System.out.println(hotSearchLists.size());
}
/**
* 测试解析淘宝js文件
*/
@Test
public void taoBaoJSTest() throws IOException {
long time = new Date().getTime();
String signs="undefined&1625624820156&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
// https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624930984092&sign=acf994dbcee6c0c1d7a8a566a6b8ff0a&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D
String s = TaoBaoUtils.parsJSFunction(signs);
System.out.println(s);
} }
} }
package leiliangliangTest;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import lombok.extern.log4j.Log4j2;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.util.HashMap;
@Log4j2
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{"classpath:applicationContext.xml"})
public class StrTest {
@Test
public void test(){
String dat="{\"appId\":\"10211\",\"params\":\"{\"multi_hintq_show\":\"on\",\"src\":\"c2c\",\"area\":\"active_page\",\"sversion\":\"7.5\",\"bangdan_src\":\"list\"}\"}";
String dats="{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
System.out.println(dat);
System.out.println("-----");
System.out.println(dats);
}
@Test
public void test1() {
}
}
function h(a) {
function b(a, b) {
return a << b | a >>> 32 - b
}
function c(a, b) {
var c, d, e, f, g;
return e = 2147483648 & a,
f = 2147483648 & b,
c = 1073741824 & a,
d = 1073741824 & b,
g = (1073741823 & a) + (1073741823 & b),
c & d ? 2147483648 ^ g ^ e ^ f : c | d ? 1073741824 & g ? 3221225472 ^ g ^ e ^ f : 1073741824 ^ g ^ e ^ f : g ^ e ^ f
}
function d(a, b, c) {
return a & b | ~a & c
}
function e(a, b, c) {
return a & c | b & ~c
}
function f(a, b, c) {
return a ^ b ^ c
}
function g(a, b, c) {
return b ^ (a | ~c)
}
function h(a, e, f, g, h, i, j) {
return a = c(a, c(c(d(e, f, g), h), j)),
c(b(a, i), e)
}
function i(a, d, f, g, h, i, j) {
return a = c(a, c(c(e(d, f, g), h), j)),
c(b(a, i), d)
}
function j(a, d, e, g, h, i, j) {
return a = c(a, c(c(f(d, e, g), h), j)),
c(b(a, i), d)
}
function k(a, d, e, f, h, i, j) {
return a = c(a, c(c(g(d, e, f), h), j)),
c(b(a, i), d)
}
function l(a) {
for (var b, c = a.length, d = c + 8, e = (d - d % 64) / 64, f = 16 * (e + 1), g = new Array(f - 1), h = 0, i = 0; c > i;)
b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | a.charCodeAt(i) << h,
i++;
return b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | 128 << h,
g[f - 2] = c << 3,
g[f - 1] = c >>> 29,
g
}
function m(a) {
var b, c, d = "", e = "";
for (c = 0; 3 >= c; c++)
b = a >>> 8 * c & 255,
e = "0" + b.toString(16),
d += e.substr(e.length - 2, 2);
return d
}
function n(a) {
a = a.replace(/\r\n/g, "\n");
for (var b = "", c = 0; c < a.length; c++) {
var d = a.charCodeAt(c);
128 > d ? b += String.fromCharCode(d) : d > 127 && 2048 > d ? (b += String.fromCharCode(d >> 6 | 192),
b += String.fromCharCode(63 & d | 128)) : (b += String.fromCharCode(d >> 12 | 224),
b += String.fromCharCode(d >> 6 & 63 | 128),
b += String.fromCharCode(63 & d | 128))
}
return b
}
var o, p, q, r, s, t, u, v, w, x = [], y = 7, z = 12, A = 17, B = 22, C = 5, D = 9, E = 14, F = 20, G = 4,
H = 11, I = 16, J = 23, K = 6, L = 10, M = 15, N = 21;
for (a = n(a),
x = l(a),
t = 1732584193,
u = 4023233417,
v = 2562383102,
w = 271733878,
o = 0; o < x.length; o += 16)
p = t,
q = u,
r = v,
s = w,
t = h(t, u, v, w, x[o + 0], y, 3614090360),
w = h(w, t, u, v, x[o + 1], z, 3905402710),
v = h(v, w, t, u, x[o + 2], A, 606105819),
u = h(u, v, w, t, x[o + 3], B, 3250441966),
t = h(t, u, v, w, x[o + 4], y, 4118548399),
w = h(w, t, u, v, x[o + 5], z, 1200080426),
v = h(v, w, t, u, x[o + 6], A, 2821735955),
u = h(u, v, w, t, x[o + 7], B, 4249261313),
t = h(t, u, v, w, x[o + 8], y, 1770035416),
w = h(w, t, u, v, x[o + 9], z, 2336552879),
v = h(v, w, t, u, x[o + 10], A, 4294925233),
u = h(u, v, w, t, x[o + 11], B, 2304563134),
t = h(t, u, v, w, x[o + 12], y, 1804603682),
w = h(w, t, u, v, x[o + 13], z, 4254626195),
v = h(v, w, t, u, x[o + 14], A, 2792965006),
u = h(u, v, w, t, x[o + 15], B, 1236535329),
t = i(t, u, v, w, x[o + 1], C, 4129170786),
w = i(w, t, u, v, x[o + 6], D, 3225465664),
v = i(v, w, t, u, x[o + 11], E, 643717713),
u = i(u, v, w, t, x[o + 0], F, 3921069994),
t = i(t, u, v, w, x[o + 5], C, 3593408605),
w = i(w, t, u, v, x[o + 10], D, 38016083),
v = i(v, w, t, u, x[o + 15], E, 3634488961),
u = i(u, v, w, t, x[o + 4], F, 3889429448),
t = i(t, u, v, w, x[o + 9], C, 568446438),
w = i(w, t, u, v, x[o + 14], D, 3275163606),
v = i(v, w, t, u, x[o + 3], E, 4107603335),
u = i(u, v, w, t, x[o + 8], F, 1163531501),
t = i(t, u, v, w, x[o + 13], C, 2850285829),
w = i(w, t, u, v, x[o + 2], D, 4243563512),
v = i(v, w, t, u, x[o + 7], E, 1735328473),
u = i(u, v, w, t, x[o + 12], F, 2368359562),
t = j(t, u, v, w, x[o + 5], G, 4294588738),
w = j(w, t, u, v, x[o + 8], H, 2272392833),
v = j(v, w, t, u, x[o + 11], I, 1839030562),
u = j(u, v, w, t, x[o + 14], J, 4259657740),
t = j(t, u, v, w, x[o + 1], G, 2763975236),
w = j(w, t, u, v, x[o + 4], H, 1272893353),
v = j(v, w, t, u, x[o + 7], I, 4139469664),
u = j(u, v, w, t, x[o + 10], J, 3200236656),
t = j(t, u, v, w, x[o + 13], G, 681279174),
w = j(w, t, u, v, x[o + 0], H, 3936430074),
v = j(v, w, t, u, x[o + 3], I, 3572445317),
u = j(u, v, w, t, x[o + 6], J, 76029189),
t = j(t, u, v, w, x[o + 9], G, 3654602809),
w = j(w, t, u, v, x[o + 12], H, 3873151461),
v = j(v, w, t, u, x[o + 15], I, 530742520),
u = j(u, v, w, t, x[o + 2], J, 3299628645),
t = k(t, u, v, w, x[o + 0], K, 4096336452),
w = k(w, t, u, v, x[o + 7], L, 1126891415),
v = k(v, w, t, u, x[o + 14], M, 2878612391),
u = k(u, v, w, t, x[o + 5], N, 4237533241),
t = k(t, u, v, w, x[o + 12], K, 1700485571),
w = k(w, t, u, v, x[o + 3], L, 2399980690),
v = k(v, w, t, u, x[o + 10], M, 4293915773),
u = k(u, v, w, t, x[o + 1], N, 2240044497),
t = k(t, u, v, w, x[o + 8], K, 1873313359),
w = k(w, t, u, v, x[o + 15], L, 4264355552),
v = k(v, w, t, u, x[o + 6], M, 2734768916),
u = k(u, v, w, t, x[o + 13], N, 1309151649),
t = k(t, u, v, w, x[o + 4], K, 4149444226),
w = k(w, t, u, v, x[o + 11], L, 3174756917),
v = k(v, w, t, u, x[o + 2], M, 718787259),
u = k(u, v, w, t, x[o + 9], N, 3951481745),
t = c(t, p),
u = c(u, q),
v = c(v, r),
w = c(w, s);
var O = m(t) + m(u) + m(v) + m(w);
return O.toLowerCase()
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment