Commit f986b5c8 by leiliangliang

Merge branch 'working' of D:\IdeaProjects\searchhotcrawler with conflicts.

parent 241bc05a
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" version="4">
<component name="FacetManager">
<facet type="Spring" name="Spring">
<configuration />
......@@ -10,8 +10,8 @@
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
......@@ -77,5 +77,8 @@
<orderEntry type="library" name="Maven: org.apache.commons:commons-pool2:2.4.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.6" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.10" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.10" level="project" />
</component>
</module>
\ No newline at end of file
package com.zhiwei.searchhotcrawler.bean;
public enum HotSearchType {
百度热搜,
微博热搜,
知乎热搜,
抖音热搜,
搜狗微信热搜,
搜狗微信客户端热搜,
微博话题,
今日头条热搜,
知乎热搜榜单,
腾讯新闻,
新浪热榜,
新浪热点,
搜狐话题,
凤凰新闻热榜,
凤凰新闻热搜,
网易热榜,
网易跟帖热议,
微博预热榜,
腾讯较真榜,
脉脉热榜,
B站排行榜,
B站热搜,
人气榜36,
虎嗅热文推荐,
快手热榜,
百度热搜,
微博热搜,
知乎热搜,
抖音热搜,
搜狗微信热搜,
搜狗微信客户端热搜,
微博话题,
今日头条热搜,
知乎热搜榜单,
腾讯新闻,
新浪热榜,
新浪热点,
搜狐话题,
凤凰新闻热榜,
凤凰新闻热搜,
网易热榜,
网易跟帖热议,
微博预热榜,
腾讯较真榜,
脉脉热榜,
B站排行榜,
B站热搜,
人气榜36,
虎嗅热文推荐,
快手热榜,
淘宝热搜,
}
......@@ -53,6 +53,11 @@ public class WeiBoUser implements Serializable {
* 头像地址
*/
private String profileImageUrl;
/**
* 类型
*/
private String type;
public WeiBoUser() {
}
......
......@@ -14,8 +14,6 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
......
......@@ -208,6 +208,16 @@ public class HotSearchCacheDAO {
}
if("微博热搜".equals(type)){
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) {
nowDoc.put("classify",documentPC.get("分类"));
}
if (documentPC.containsKey("地区")) {
nowDoc.put("region", documentPC.get("地区"));
}
if (documentPC.containsKey("标签")) {
nowDoc.put("label", documentPC.get("标签"));
}
if(nowDoc.containsKey("topicLead")){
nowDoc.put("topicLead", nowDoc.getString("topicLead"));
}
......
......@@ -42,8 +42,15 @@ public class WeiBoUserDao {
document.put("userName",weiBoUser.getUserName());
document.put("topic",weiBoUser.getTopic());
document.put("time",weiBoUser.getTime());
document.put("followerCount",weiBoUser.getFollowerCount());
document.put("profileImageUrl",weiBoUser.getProfileImageUrl());
if (Objects.nonNull(weiBoUser.getType())){
document.put("type",weiBoUser.getType());
}
if (Objects.nonNull(weiBoUser.getFollowerCount())){
document.put("followerCount",weiBoUser.getFollowerCount());
}
if (Objects.nonNull(weiBoUser.getProfileImageUrl())){
document.put("profileImageUrl",weiBoUser.getProfileImageUrl());
}
try {
mongoCollection.insertOne(document);
} catch (Exception e) {
......
......@@ -2,17 +2,12 @@ package com.zhiwei.searchhotcrawler.run;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.cache.CacheListener;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.*;
import com.zhiwei.tools.tools.ZhiWeiTools;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class HotSearchRun {
......
......@@ -9,12 +9,6 @@ import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
......
......@@ -4,6 +4,9 @@ package com.zhiwei.searchhotcrawler.test;
import com.zhiwei.crawler.core.proxy.ProxyFactory;
import com.zhiwei.proxy.config.SimpleConfig;
import com.zhiwei.searchhotcrawler.config.ProxyConfig;
import com.zhiwei.searchhotcrawler.timer.BaiduHotSearchRun;
import com.zhiwei.searchhotcrawler.timer.WeiboHotSearchRun;
import java.text.ParseException;
public class HotSearchRunTest {
......@@ -17,5 +20,7 @@ public class HotSearchRunTest {
// new WeiboHotSearchRun().start();
//快手热榜开始采集
// new KuaiShouHotSearchRun().start();
//百度热搜
new BaiduHotSearchRun().run();
}
}
package com.zhiwei.searchhotcrawler.test;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import com.zhiwei.searchhotcrawler.bean.HotSearchType;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.time.Duration;
import java.util.*;
/**
* @author ll
* @ClassName:TaoBaoHotSearchCrawler
* @Description:
* @date 2021年6月18日 下午16:33:31
*/
@Log4j2
public class TaoBaoHotSearchCrawlerTest {
private static HttpBoot httpBoot = new HttpBoot.Builder().throwException(false).retryTimes(3).connectTimeout(Duration.ofSeconds(60)).build();
public static List<HotSearchList> taoBaoHotSearch(Date date) {
long time = new Date().getTime();
String signs="undefined&"+time+"&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
//String signs="undefined&1624862377708&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = TaoBaoUtils.parsJSFunction(signs);
//String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t="+time+"&sign="+sign+"&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624929605260&sign=ada01d783dc9772d2f84124d293bac26&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
Map<String, String> headerMap = new HashMap<>();
headerMap.put("cookie", "_m_h5_tk=975fb07b671f12a689d4ec36cf2e9047_1624937028814; _m_h5_tk_enc=ffb83d60b283eee5992d5e32429c2597;");
String htmlBody = null;
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
htmlBody = response.body().string();
System.out.println(htmlBody);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
if (htmlBody != null && htmlBody.contains("data")) {
return ansysData(htmlBody, date);
} else {
log.info("解析淘宝热搜时出现解析错误,页面结构有问题");
}
return Collections.emptyList();
}
private static List<HotSearchList> ansysData(String htmlBody, Date date) {
List<HotSearchList> list = new ArrayList<>();
try {
String sub = htmlBody.substring(htmlBody.indexOf("searchdoor"), htmlBody.indexOf("searchdoorFrom"));
String substring = sub.substring(sub.indexOf("showReminder") + 27, sub.indexOf("multi_bangdan_flag") - 2).trim();
JSONArray objects = JSONObject.parseArray(substring);
JSONArray jsonArray = objects.getJSONObject(0).getJSONObject("result").getJSONArray("text");
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject jsonObject = jsonArray.getJSONObject(i);
String name = jsonObject.getString("showtext");
String showmark = jsonObject.getString("showmark");
Integer rank = Integer.valueOf(showmark);
String searchtext = jsonObject.getString("searchtext");
String url = "https://s.m.taobao.com/h5?q=" + searchtext;
String tagText = jsonObject.getString("tagText");
Long count = 0L;
HotSearchList hotSearchList = new HotSearchList(url,name,count,true,rank, HotSearchType.淘宝热搜.name(),tagText,date);
list.add(hotSearchList);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误",e);
}
}
System.out.println(jsonArray.size());
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,数据不是json结构", e);
}
return list;
}
}
......@@ -56,18 +56,18 @@ public class BaiduHotSearchRun extends Thread{
// }
TipsUtils.addHotList("百度热搜",baiduList);
log.info("百度风云榜采集结束........");
ZhiWeiTools.sleep(2000L);
log.info("搜狗微信采集开始........");
List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(new Date());
log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
TipsUtils.addHotList("搜狗微信热搜",sougouList);
log.info("搜狗微信采集结束........");
ZhiWeiTools.sleep(2000L);
log.info("知乎话题采集开始........");
List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(new Date());
log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
TipsUtils.addHotList("知乎热搜",zhihuList);
log.info("知乎话题采集结束........");
// ZhiWeiTools.sleep(2000L);
// log.info("搜狗微信采集开始........");
// List<HotSearchList> sougouList = SougoHotSearchCrawler.sougoHotSearch(new Date());
// log.info("{}, 此轮采集到的数据量为:{}", new Date(), Integer.valueOf(sougouList != null ? sougouList.size() : 0));
// TipsUtils.addHotList("搜狗微信热搜",sougouList);
// log.info("搜狗微信采集结束........");
// ZhiWeiTools.sleep(2000L);
// log.info("知乎话题采集开始........");
// List<HotSearchList> zhihuList = ZhihuHotSearchCrawler.getMobileZhihuHotList(new Date());
// log.info("{}, 知乎此轮采集到的数据量为:{}", new Date(), Integer.valueOf(zhihuList != null ? zhihuList.size() : 0));
// TipsUtils.addHotList("知乎热搜",zhihuList);
// log.info("知乎话题采集结束........");
}
}
\ No newline at end of file
package com.zhiwei.searchhotcrawler.timer;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
......
package com.zhiwei.searchhotcrawler.util;
import lombok.extern.log4j.Log4j2;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import java.io.FileReader;
@Log4j2
public class TaoBaoUtils {
public static String parsJSFunction(String sign) {
String scriptResult ="";//脚本的执行结果
ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");//1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎
try {
//2.引擎读取 脚本字符串
//engine.eval(new StringReader(routeScript));
//如果js存在文件里
Resource aesJs = new ClassPathResource("taobao.js");
engine.eval(new FileReader(aesJs.getFile()));
//3.将引擎转换为Invocable,这样才可以掉用js的方法
Invocable invocable = (Invocable) engine;
//4.使用 invocable.invokeFunction掉用js脚本里的方法,第一個参数为方法名,后面的参数为被调用的js方法的入参
scriptResult = (String) invocable.invokeFunction("h", sign);
}catch(Exception e){
log.error("Error executing script: ",e.getMessage());
}
return scriptResult;
}
}
......@@ -4,11 +4,11 @@
#线上old
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#线上new
mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#local
#mongoLocalUri=mongodb://192.168.0.81:27017/istarshine_data
#service
#mongoUri=mongodb://127.0.0.1:27017/
mongoUri=mongodb://127.0.0.1:27017/
#备用库
#mongoUri=mongodb://202.107.192.94:37017/hot_search_list
#mongoUri=mongodb://192.168.0.66:27017/
......
registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
group=hangzhou
#registry=zookeeper://192.168.0.203:2182?backup=192.168.0.104:2182,192.168.0.105:2182&timeout=60000
#group=hangzhou
########################################################
#registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
#group=local
registry=zookeeper://192.168.0.35:2181?backup=192.168.0.30:2181,192.168.0.11:2181&timeout=60000
group=local
#redis.host=115.236.59.91
#redis.port=7382
redis.host=127.0.0.1
redis.port=6379
#redis.password=
#redis
#redis.host = 192.168.0.39
#redis.port = 7382
#redis.database = 3
#redis
redis.host = 192.168.0.39
redis.port = 6379
#redis.host = 192.168.0.39
#redis.port = 6379
redis.database = 1
#maxIdle
......
function h(a) {
function b(a, b) {
return a << b | a >>> 32 - b
}
function c(a, b) {
var c, d, e, f, g;
return e = 2147483648 & a,
f = 2147483648 & b,
c = 1073741824 & a,
d = 1073741824 & b,
g = (1073741823 & a) + (1073741823 & b),
c & d ? 2147483648 ^ g ^ e ^ f : c | d ? 1073741824 & g ? 3221225472 ^ g ^ e ^ f : 1073741824 ^ g ^ e ^ f : g ^ e ^ f
}
function d(a, b, c) {
return a & b | ~a & c
}
function e(a, b, c) {
return a & c | b & ~c
}
function f(a, b, c) {
return a ^ b ^ c
}
function g(a, b, c) {
return b ^ (a | ~c)
}
function h(a, e, f, g, h, i, j) {
return a = c(a, c(c(d(e, f, g), h), j)),
c(b(a, i), e)
}
function i(a, d, f, g, h, i, j) {
return a = c(a, c(c(e(d, f, g), h), j)),
c(b(a, i), d)
}
function j(a, d, e, g, h, i, j) {
return a = c(a, c(c(f(d, e, g), h), j)),
c(b(a, i), d)
}
function k(a, d, e, f, h, i, j) {
return a = c(a, c(c(g(d, e, f), h), j)),
c(b(a, i), d)
}
function l(a) {
for (var b, c = a.length, d = c + 8, e = (d - d % 64) / 64, f = 16 * (e + 1), g = new Array(f - 1), h = 0, i = 0; c > i;)
b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | a.charCodeAt(i) << h,
i++;
return b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | 128 << h,
g[f - 2] = c << 3,
g[f - 1] = c >>> 29,
g
}
function m(a) {
var b, c, d = "", e = "";
for (c = 0; 3 >= c; c++)
b = a >>> 8 * c & 255,
e = "0" + b.toString(16),
d += e.substr(e.length - 2, 2);
return d
}
function n(a) {
a = a.replace(/\r\n/g, "\n");
for (var b = "", c = 0; c < a.length; c++) {
var d = a.charCodeAt(c);
128 > d ? b += String.fromCharCode(d) : d > 127 && 2048 > d ? (b += String.fromCharCode(d >> 6 | 192),
b += String.fromCharCode(63 & d | 128)) : (b += String.fromCharCode(d >> 12 | 224),
b += String.fromCharCode(d >> 6 & 63 | 128),
b += String.fromCharCode(63 & d | 128))
}
return b
}
var o, p, q, r, s, t, u, v, w, x = [], y = 7, z = 12, A = 17, B = 22, C = 5, D = 9, E = 14, F = 20, G = 4,
H = 11, I = 16, J = 23, K = 6, L = 10, M = 15, N = 21;
for (a = n(a),
x = l(a),
t = 1732584193,
u = 4023233417,
v = 2562383102,
w = 271733878,
o = 0; o < x.length; o += 16)
p = t,
q = u,
r = v,
s = w,
t = h(t, u, v, w, x[o + 0], y, 3614090360),
w = h(w, t, u, v, x[o + 1], z, 3905402710),
v = h(v, w, t, u, x[o + 2], A, 606105819),
u = h(u, v, w, t, x[o + 3], B, 3250441966),
t = h(t, u, v, w, x[o + 4], y, 4118548399),
w = h(w, t, u, v, x[o + 5], z, 1200080426),
v = h(v, w, t, u, x[o + 6], A, 2821735955),
u = h(u, v, w, t, x[o + 7], B, 4249261313),
t = h(t, u, v, w, x[o + 8], y, 1770035416),
w = h(w, t, u, v, x[o + 9], z, 2336552879),
v = h(v, w, t, u, x[o + 10], A, 4294925233),
u = h(u, v, w, t, x[o + 11], B, 2304563134),
t = h(t, u, v, w, x[o + 12], y, 1804603682),
w = h(w, t, u, v, x[o + 13], z, 4254626195),
v = h(v, w, t, u, x[o + 14], A, 2792965006),
u = h(u, v, w, t, x[o + 15], B, 1236535329),
t = i(t, u, v, w, x[o + 1], C, 4129170786),
w = i(w, t, u, v, x[o + 6], D, 3225465664),
v = i(v, w, t, u, x[o + 11], E, 643717713),
u = i(u, v, w, t, x[o + 0], F, 3921069994),
t = i(t, u, v, w, x[o + 5], C, 3593408605),
w = i(w, t, u, v, x[o + 10], D, 38016083),
v = i(v, w, t, u, x[o + 15], E, 3634488961),
u = i(u, v, w, t, x[o + 4], F, 3889429448),
t = i(t, u, v, w, x[o + 9], C, 568446438),
w = i(w, t, u, v, x[o + 14], D, 3275163606),
v = i(v, w, t, u, x[o + 3], E, 4107603335),
u = i(u, v, w, t, x[o + 8], F, 1163531501),
t = i(t, u, v, w, x[o + 13], C, 2850285829),
w = i(w, t, u, v, x[o + 2], D, 4243563512),
v = i(v, w, t, u, x[o + 7], E, 1735328473),
u = i(u, v, w, t, x[o + 12], F, 2368359562),
t = j(t, u, v, w, x[o + 5], G, 4294588738),
w = j(w, t, u, v, x[o + 8], H, 2272392833),
v = j(v, w, t, u, x[o + 11], I, 1839030562),
u = j(u, v, w, t, x[o + 14], J, 4259657740),
t = j(t, u, v, w, x[o + 1], G, 2763975236),
w = j(w, t, u, v, x[o + 4], H, 1272893353),
v = j(v, w, t, u, x[o + 7], I, 4139469664),
u = j(u, v, w, t, x[o + 10], J, 3200236656),
t = j(t, u, v, w, x[o + 13], G, 681279174),
w = j(w, t, u, v, x[o + 0], H, 3936430074),
v = j(v, w, t, u, x[o + 3], I, 3572445317),
u = j(u, v, w, t, x[o + 6], J, 76029189),
t = j(t, u, v, w, x[o + 9], G, 3654602809),
w = j(w, t, u, v, x[o + 12], H, 3873151461),
v = j(v, w, t, u, x[o + 15], I, 530742520),
u = j(u, v, w, t, x[o + 2], J, 3299628645),
t = k(t, u, v, w, x[o + 0], K, 4096336452),
w = k(w, t, u, v, x[o + 7], L, 1126891415),
v = k(v, w, t, u, x[o + 14], M, 2878612391),
u = k(u, v, w, t, x[o + 5], N, 4237533241),
t = k(t, u, v, w, x[o + 12], K, 1700485571),
w = k(w, t, u, v, x[o + 3], L, 2399980690),
v = k(v, w, t, u, x[o + 10], M, 4293915773),
u = k(u, v, w, t, x[o + 1], N, 2240044497),
t = k(t, u, v, w, x[o + 8], K, 1873313359),
w = k(w, t, u, v, x[o + 15], L, 4264355552),
v = k(v, w, t, u, x[o + 6], M, 2734768916),
u = k(u, v, w, t, x[o + 13], N, 1309151649),
t = k(t, u, v, w, x[o + 4], K, 4149444226),
w = k(w, t, u, v, x[o + 11], L, 3174756917),
v = k(v, w, t, u, x[o + 2], M, 718787259),
u = k(u, v, w, t, x[o + 9], N, 3951481745),
t = c(t, p),
u = c(u, q),
v = c(v, r),
w = c(w, s);
var O = m(t) + m(u) + m(v) + m(w);
return O.toLowerCase()
}
\ No newline at end of file
......@@ -21,6 +21,6 @@ import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{ "classpath:applicationContext.xml" })
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests
{
public abstract class ObjectTest extends AbstractJUnit4SpringContextTests {
}
......@@ -14,6 +14,7 @@ import com.zhiwei.searchhotcrawler.crawler.BaiDuHotSearchCrawler;
import com.zhiwei.searchhotcrawler.dbtemplate.MongoDBTemplate;
import com.zhiwei.searchhotcrawler.test.KuaiShouHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.test.TaoBaoHotSearchCrawlerTest;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import com.zhiwei.searchhotcrawler.util.TipsUtils;
import lombok.extern.log4j.Log4j2;
import okhttp3.Request;
......@@ -103,27 +104,28 @@ public class HotSearchTest {
}
}
ad(document);
ad(document);
System.out.println(document);
}
private void ad(Document nowDoc) {
MongoCollection collection = MongoDBTemplate.getCollection(DBConfig.dbName, DBConfig.searchCacheCollName);
if(nowDoc.containsKey("topicLead")){
MongoCollection collection = MongoDBTemplate.getCollection(DBConfig.dbName, DBConfig.searchCacheCollName);
if (nowDoc.containsKey("topicLead")) {
nowDoc.put("topicLead", nowDoc.getString("topicLead"));
}
if(nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount"))?Long.valueOf(nowDoc.get("readCount").toString()):null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount"))?Long.valueOf(nowDoc.get("discussCount").toString()):null);
if (nowDoc.containsKey("readCount") && nowDoc.containsKey("discussCount")) {
nowDoc.put("readCount", nonNull(nowDoc.get("readCount")) ? Long.valueOf(nowDoc.get("readCount").toString()) : null);
nowDoc.put("discussCount", nonNull(nowDoc.get("discussCount")) ? Long.valueOf(nowDoc.get("discussCount").toString()) : null);
}
if (nowDoc.containsKey("pictureUrl")) {
nowDoc.put("pictureUrl",nowDoc.getString("pictureUrl"));
nowDoc.put("pictureUrl", nowDoc.getString("pictureUrl"));
}
if (nowDoc.containsKey("downtext")) {
nowDoc.put("downtext",nowDoc.getString("downtext"));
nowDoc.put("downtext", nowDoc.getString("downtext"));
}
collection.insertOne(nowDoc);
}
/**
* 测试淘宝热搜采集
*/
......@@ -154,9 +156,20 @@ public class HotSearchTest {
List<HotSearchList> hotSearchLists = BaiDuHotSearchCrawler.baiduHotSearch(new Date());
System.out.println(hotSearchLists);
System.out.println(hotSearchLists.size());
}
/**
* 测试解析淘宝js文件
*/
@Test
public void taoBaoJSTest() throws IOException {
long time = new Date().getTime();
String signs="undefined&1625624820156&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
// https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=1624930984092&sign=acf994dbcee6c0c1d7a8a566a6b8ff0a&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D
String s = TaoBaoUtils.parsJSFunction(signs);
System.out.println(s);
}
}
......
package leiliangliangTest;
import com.zhiwei.searchhotcrawler.util.TaoBaoUtils;
import lombok.extern.log4j.Log4j2;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import java.util.HashMap;
@Log4j2
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations =
{"classpath:applicationContext.xml"})
public class StrTest {
@Test
public void test(){
String dat="{\"appId\":\"10211\",\"params\":\"{\"multi_hintq_show\":\"on\",\"src\":\"c2c\",\"area\":\"active_page\",\"sversion\":\"7.5\",\"bangdan_src\":\"list\"}\"}";
String dats="{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
System.out.println(dat);
System.out.println("-----");
System.out.println(dats);
}
@Test
public void test1() {
}
}
function h(a) {
function b(a, b) {
return a << b | a >>> 32 - b
}
function c(a, b) {
var c, d, e, f, g;
return e = 2147483648 & a,
f = 2147483648 & b,
c = 1073741824 & a,
d = 1073741824 & b,
g = (1073741823 & a) + (1073741823 & b),
c & d ? 2147483648 ^ g ^ e ^ f : c | d ? 1073741824 & g ? 3221225472 ^ g ^ e ^ f : 1073741824 ^ g ^ e ^ f : g ^ e ^ f
}
function d(a, b, c) {
return a & b | ~a & c
}
function e(a, b, c) {
return a & c | b & ~c
}
function f(a, b, c) {
return a ^ b ^ c
}
function g(a, b, c) {
return b ^ (a | ~c)
}
function h(a, e, f, g, h, i, j) {
return a = c(a, c(c(d(e, f, g), h), j)),
c(b(a, i), e)
}
function i(a, d, f, g, h, i, j) {
return a = c(a, c(c(e(d, f, g), h), j)),
c(b(a, i), d)
}
function j(a, d, e, g, h, i, j) {
return a = c(a, c(c(f(d, e, g), h), j)),
c(b(a, i), d)
}
function k(a, d, e, f, h, i, j) {
return a = c(a, c(c(g(d, e, f), h), j)),
c(b(a, i), d)
}
function l(a) {
for (var b, c = a.length, d = c + 8, e = (d - d % 64) / 64, f = 16 * (e + 1), g = new Array(f - 1), h = 0, i = 0; c > i;)
b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | a.charCodeAt(i) << h,
i++;
return b = (i - i % 4) / 4,
h = i % 4 * 8,
g[b] = g[b] | 128 << h,
g[f - 2] = c << 3,
g[f - 1] = c >>> 29,
g
}
function m(a) {
var b, c, d = "", e = "";
for (c = 0; 3 >= c; c++)
b = a >>> 8 * c & 255,
e = "0" + b.toString(16),
d += e.substr(e.length - 2, 2);
return d
}
function n(a) {
a = a.replace(/\r\n/g, "\n");
for (var b = "", c = 0; c < a.length; c++) {
var d = a.charCodeAt(c);
128 > d ? b += String.fromCharCode(d) : d > 127 && 2048 > d ? (b += String.fromCharCode(d >> 6 | 192),
b += String.fromCharCode(63 & d | 128)) : (b += String.fromCharCode(d >> 12 | 224),
b += String.fromCharCode(d >> 6 & 63 | 128),
b += String.fromCharCode(63 & d | 128))
}
return b
}
var o, p, q, r, s, t, u, v, w, x = [], y = 7, z = 12, A = 17, B = 22, C = 5, D = 9, E = 14, F = 20, G = 4,
H = 11, I = 16, J = 23, K = 6, L = 10, M = 15, N = 21;
for (a = n(a),
x = l(a),
t = 1732584193,
u = 4023233417,
v = 2562383102,
w = 271733878,
o = 0; o < x.length; o += 16)
p = t,
q = u,
r = v,
s = w,
t = h(t, u, v, w, x[o + 0], y, 3614090360),
w = h(w, t, u, v, x[o + 1], z, 3905402710),
v = h(v, w, t, u, x[o + 2], A, 606105819),
u = h(u, v, w, t, x[o + 3], B, 3250441966),
t = h(t, u, v, w, x[o + 4], y, 4118548399),
w = h(w, t, u, v, x[o + 5], z, 1200080426),
v = h(v, w, t, u, x[o + 6], A, 2821735955),
u = h(u, v, w, t, x[o + 7], B, 4249261313),
t = h(t, u, v, w, x[o + 8], y, 1770035416),
w = h(w, t, u, v, x[o + 9], z, 2336552879),
v = h(v, w, t, u, x[o + 10], A, 4294925233),
u = h(u, v, w, t, x[o + 11], B, 2304563134),
t = h(t, u, v, w, x[o + 12], y, 1804603682),
w = h(w, t, u, v, x[o + 13], z, 4254626195),
v = h(v, w, t, u, x[o + 14], A, 2792965006),
u = h(u, v, w, t, x[o + 15], B, 1236535329),
t = i(t, u, v, w, x[o + 1], C, 4129170786),
w = i(w, t, u, v, x[o + 6], D, 3225465664),
v = i(v, w, t, u, x[o + 11], E, 643717713),
u = i(u, v, w, t, x[o + 0], F, 3921069994),
t = i(t, u, v, w, x[o + 5], C, 3593408605),
w = i(w, t, u, v, x[o + 10], D, 38016083),
v = i(v, w, t, u, x[o + 15], E, 3634488961),
u = i(u, v, w, t, x[o + 4], F, 3889429448),
t = i(t, u, v, w, x[o + 9], C, 568446438),
w = i(w, t, u, v, x[o + 14], D, 3275163606),
v = i(v, w, t, u, x[o + 3], E, 4107603335),
u = i(u, v, w, t, x[o + 8], F, 1163531501),
t = i(t, u, v, w, x[o + 13], C, 2850285829),
w = i(w, t, u, v, x[o + 2], D, 4243563512),
v = i(v, w, t, u, x[o + 7], E, 1735328473),
u = i(u, v, w, t, x[o + 12], F, 2368359562),
t = j(t, u, v, w, x[o + 5], G, 4294588738),
w = j(w, t, u, v, x[o + 8], H, 2272392833),
v = j(v, w, t, u, x[o + 11], I, 1839030562),
u = j(u, v, w, t, x[o + 14], J, 4259657740),
t = j(t, u, v, w, x[o + 1], G, 2763975236),
w = j(w, t, u, v, x[o + 4], H, 1272893353),
v = j(v, w, t, u, x[o + 7], I, 4139469664),
u = j(u, v, w, t, x[o + 10], J, 3200236656),
t = j(t, u, v, w, x[o + 13], G, 681279174),
w = j(w, t, u, v, x[o + 0], H, 3936430074),
v = j(v, w, t, u, x[o + 3], I, 3572445317),
u = j(u, v, w, t, x[o + 6], J, 76029189),
t = j(t, u, v, w, x[o + 9], G, 3654602809),
w = j(w, t, u, v, x[o + 12], H, 3873151461),
v = j(v, w, t, u, x[o + 15], I, 530742520),
u = j(u, v, w, t, x[o + 2], J, 3299628645),
t = k(t, u, v, w, x[o + 0], K, 4096336452),
w = k(w, t, u, v, x[o + 7], L, 1126891415),
v = k(v, w, t, u, x[o + 14], M, 2878612391),
u = k(u, v, w, t, x[o + 5], N, 4237533241),
t = k(t, u, v, w, x[o + 12], K, 1700485571),
w = k(w, t, u, v, x[o + 3], L, 2399980690),
v = k(v, w, t, u, x[o + 10], M, 4293915773),
u = k(u, v, w, t, x[o + 1], N, 2240044497),
t = k(t, u, v, w, x[o + 8], K, 1873313359),
w = k(w, t, u, v, x[o + 15], L, 4264355552),
v = k(v, w, t, u, x[o + 6], M, 2734768916),
u = k(u, v, w, t, x[o + 13], N, 1309151649),
t = k(t, u, v, w, x[o + 4], K, 4149444226),
w = k(w, t, u, v, x[o + 11], L, 3174756917),
v = k(v, w, t, u, x[o + 2], M, 718787259),
u = k(u, v, w, t, x[o + 9], N, 3951481745),
t = c(t, p),
u = c(u, q),
v = c(v, r),
w = c(w, s);
var O = m(t) + m(u) + m(v) + m(w);
return O.toLowerCase()
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment