Commit ae578372 by leiliangliang

更新淘宝爬取程序

parent c43ead7f
...@@ -124,11 +124,6 @@ ...@@ -124,11 +124,6 @@
<artifactId>httpclient</artifactId> <artifactId>httpclient</artifactId>
<version>4.5.6</version> <version>4.5.6</version>
</dependency> </dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.7</version>
</dependency>
</dependencies> </dependencies>
......
...@@ -46,19 +46,14 @@ public class TaoBaoHotSearchCrawler { ...@@ -46,19 +46,14 @@ public class TaoBaoHotSearchCrawler {
String[] splitEnc = enc.split(";"); String[] splitEnc = enc.split(";");
String _m_h5_tk_enc = splitEnc[0]; String _m_h5_tk_enc = splitEnc[0];
headerMap.put("cookie", _m_h5_tk + ";" + _m_h5_tk_enc); headerMap.put("cookie", _m_h5_tk + ";" + _m_h5_tk_enc);
log.info("获取cookie: {}",_m_h5_tk + ";" + _m_h5_tk_enc);
log.info("获取第一次响应结果: {}",response);
} catch (Exception e) { } catch (Exception e) {
log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e); log.error("解析淘宝热搜时出现解析错误,页面结构有问题", e);
} }
String signs = token + "&" + time + "&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}"; String signs = token + "&" + time + "&12574478&{\"appId\":\"10211\",\"params\":\"{\\\"multi_hintq_show\\\":\\\"on\\\",\\\"src\\\":\\\"c2c\\\",\\\"area\\\":\\\"active_page\\\",\\\"sversion\\\":\\\"7.5\\\",\\\"bangdan_src\\\":\\\"list\\\"}\"}";
String sign = TaoBaoUtils.parsJSFunction(signs); String sign = TaoBaoUtils.parsJSFunction(signs);
String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D"; String url = "https://acs.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/?appKey=12574478&t=" + time + "&sign=" + sign + "&api=mtop.relationrecommend.WirelessRecommend.recommend&v=2.0&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22appId%22%3A%2210211%22%2C%22params%22%3A%22%7B%5C%22multi_hintq_show%5C%22%3A%5C%22on%5C%22%2C%5C%22src%5C%22%3A%5C%22c2c%5C%22%2C%5C%22area%5C%22%3A%5C%22active_page%5C%22%2C%5C%22sversion%5C%22%3A%5C%227.5%5C%22%2C%5C%22bangdan_src%5C%22%3A%5C%22list%5C%22%7D%22%7D";
log.info("获取time: {}",time);
log.info("获取sign: {}",sign);
Request request = RequestUtils.wrapGet(url, headerMap); Request request = RequestUtils.wrapGet(url, headerMap);
try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) { try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
log.info("获取第二次请求访问结果: {}",response);
htmlBody = response.body().string(); htmlBody = response.body().string();
ht = !htmlBody.contains("非法请求"); ht = !htmlBody.contains("非法请求");
} catch (Exception e) { } catch (Exception e) {
......
...@@ -520,17 +520,17 @@ public class GatherTimer { ...@@ -520,17 +520,17 @@ public class GatherTimer {
TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList); TipsUtils.addHotList(HotSearchType.快手热榜.name(), kuaiShouList);
logger.info("快手热榜采集结束..."); logger.info("快手热榜采集结束...");
} }
// /** /**
// *淘宝热搜采集 *淘宝热搜采集
// */ */
// @Async(value = "myScheduler") @Async(value = "myScheduler")
// @Scheduled(cron = "0 * * * * ? ") @Scheduled(cron = "0 * * * * ? ")
// public void crawlerTaoBao(){ public void crawlerTaoBao(){
// logger.info("淘宝热搜开始采集..."); logger.info("淘宝热搜开始采集...");
// Date date = DateUtils.getMillSecondTime(new Date()); Date date = DateUtils.getMillSecondTime(new Date());
// List<HotSearchList> taoBaoList = TaoBaoHotSearchCrawler.taoBaoHotSearch(date); List<HotSearchList> taoBaoList = TaoBaoHotSearchCrawler.taoBaoHotSearch(date);
// logger.info("{}, 淘宝热搜此轮采集到的数据量为:{}", new Date(), taoBaoList != null ? taoBaoList.size() : 0); logger.info("{}, 淘宝热搜此轮采集到的数据量为:{}", new Date(), taoBaoList != null ? taoBaoList.size() : 0);
// TipsUtils.addHotList(HotSearchType.淘宝热搜.name(), taoBaoList); TipsUtils.addHotList(HotSearchType.淘宝热搜.name(), taoBaoList);
// logger.info("淘宝热搜采集结束..."); logger.info("淘宝热搜采集结束...");
// } }
} }
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.FileUtils;
import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
import javax.script.Invocable; import javax.script.Invocable;
import javax.script.ScriptEngine; import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager; import javax.script.ScriptEngineManager;
import java.io.File; import java.io.*;
import java.io.FileReader;
import java.io.InputStream;
@Log4j2 @Log4j2
public class TaoBaoUtils { public class TaoBaoUtils {
...@@ -18,22 +14,13 @@ public class TaoBaoUtils { ...@@ -18,22 +14,13 @@ public class TaoBaoUtils {
public static String parsJSFunction(String sign) { public static String parsJSFunction(String sign) {
//脚本的执行结果 //脚本的执行结果
String scriptResult = ""; String scriptResult = "";
ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");//1.得到脚本引擎 //1.得到脚本引擎
//ScriptEngine engine = new ScriptEngineManager().getEngineByName("nashorn");//1.得到脚本引擎 ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");
try { try {
//2.引擎读取 脚本字符串
//engine.eval(new StringReader(routeScript));
//如果js存在文件里 //如果js存在文件里
Resource aesJs = new ClassPathResource("taobao.js"); Resource aesJs = new ClassPathResource("taobao.js");
InputStream inputStream = aesJs.getInputStream(); InputStream inputStream = aesJs.getInputStream();
File somethingFile = File.createTempFile("taobao", ".js"); engine.eval(new BufferedReader(new InputStreamReader(inputStream)));
try {
FileUtils.copyInputStreamToFile(inputStream, somethingFile);
} finally {
IOUtils.closeQuietly(inputStream);
}
engine.eval(new FileReader(somethingFile));
//3.将引擎转换为Invocable,这样才可以掉用js的方法 //3.将引擎转换为Invocable,这样才可以掉用js的方法
Invocable invocable = (Invocable) engine; Invocable invocable = (Invocable) engine;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment