Commit a1a13c88 by chenweitao

Merge branch 'working' into 'master'

更新淘宝爬取程序

See merge request !113
parents d26cf984 ae578372
......@@ -124,11 +124,6 @@
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.7</version>
</dependency>
</dependencies>
......
......@@ -46,19 +46,14 @@ public class TaoBaoHotSearchCrawler {
String[] splitEnc = enc.split(";");
String _m_h5_tk_enc = splitEnc[0];
headerMap.put("cookie", _m_h5_tk + ";" + _m_h5_tk_enc);
log.info("获取cookie: {}",_m_h5_tk + ";" + _m_h5_tk_enc);
log.info("获取第一次响应结果: {}",response);
} catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
}
String signs = token + "&" + time + "&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = TaoBaoUtils.parsJSFunction(signs);
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
log.info("获取time: {}",time);
log.info("获取sign: {}",sign);
Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
log.info("获取第二次请求访问结果: {}",response);
htmlBody = response.body().string();
ht = !htmlBody.contains("非法请求");
} catch (Exception e) {
......
......@@ -520,17 +520,17 @@ public class GatherTimer {
TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
logger.info("快手热榜采集结束...");
}
// /**
// *淘宝热搜采集
// */
// @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ")
// public void crawlerTaoBao(){
// logger.info("淘宝热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> taoBaoList = TaoBaoHotSearchCrawler.taoBaoHotSearch(date);
// logger.info("{}, 淘宝热搜此轮采集到的数据量为:{}", new Date(), taoBaoList != null ? taoBaoList.size() : 0);
// TipsUtils.addHotList(HotSearchType.淘宝热搜.name(), taoBaoList);
// logger.info("淘宝热搜采集结束...");
// }
/**
*淘宝热搜采集
*/
@Async(value = "myScheduler")
@Scheduled(cron = "0 * * * * ? ")
public void crawlerTaoBao(){
logger.info("淘宝热搜开始采集...");
Date date = DateUtils.getMillSecondTime(new Date());
List<HotSearchList> taoBaoList = TaoBaoHotSearchCrawler.taoBaoHotSearch(date);
logger.info("{}, 淘宝热搜此轮采集到的数据量为:{}", new Date(), taoBaoList != null ? taoBaoList.size() : 0);
TipsUtils.addHotList(HotSearchType.淘宝热搜.name(), taoBaoList);
logger.info("淘宝热搜采集结束...");
}
}
package com.zhiwei.searchhotcrawler.util;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.FileUtils;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.io.*;
@Log4j2
public class TaoBaoUtils {
......@@ -18,22 +14,13 @@ public class TaoBaoUtils {
public static String parsJSFunction(String sign) {
//脚本的执行结果
String scriptResult = "";
ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");//1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎
//1.得到脚本引擎
ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");
try {
//2.引擎读取 脚本字符串
//engine.eval(new StringReader(routeScript));
//如果js存在文件里
Resource aesJs = new ClassPathResource("taobao.js");
InputStream inputStream = aesJs.getInputStream();
File somethingFile = File.createTempFile("taobao", ".js");
try {
FileUtils.copyInputStreamToFile(inputStream, somethingFile);
} finally {
IOUtils.closeQuietly(inputStream);
}
engine.eval(new FileReader(somethingFile));
engine.eval(new BufferedReader(new InputStreamReader(inputStream)));
//3.将引擎转换为Invocable,这样才可以掉用js的方法
Invocable invocable = (Invocable) engine;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment