Commit 9e236728 by [zhangzhiwei]

微头条添加头信息中cookie,不然无数据

parent fdb8e380
...@@ -175,8 +175,10 @@ public class TouTiaoArticleParse { ...@@ -175,8 +175,10 @@ public class TouTiaoArticleParse {
} }
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
System.out.println(url);
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
System.out.println(htmlBody);
if (htmlBody != null) { if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate); Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){ if(dataMap!=null && dataMap.size()>0){
...@@ -189,7 +191,6 @@ public class TouTiaoArticleParse { ...@@ -189,7 +191,6 @@ public class TouTiaoArticleParse {
logger.info("获取数据出错::{},数据为null", e.fillInStackTrace()); logger.info("获取数据出错::{},数据为null", e.fillInStackTrace());
return null; return null;
} }
return null; return null;
} }
...@@ -245,9 +246,10 @@ public class TouTiaoArticleParse { ...@@ -245,9 +246,10 @@ public class TouTiaoArticleParse {
continue; continue;
} }
} }
System.out.println(source+"========="+dataList.size());
/**验证是否有下一页数据**/ /**验证是否有下一页数据**/
if(more){ if(more){
if(max_behot_time!=null && !"0".equals(max_behot_time)){ if(max_behot_time!=null && max_behot_time!=0){
if(endDate.after(date)){ if(endDate.after(date)){
max_behot_time = null; max_behot_time = null;
} }
...@@ -259,7 +261,7 @@ public class TouTiaoArticleParse { ...@@ -259,7 +261,7 @@ public class TouTiaoArticleParse {
e.printStackTrace(); e.printStackTrace();
} }
map.put("max_behot_time", max_behot_time); map.put("max_behot_time", max_behot_time+"");
map.put("data", dataList); map.put("data", dataList);
return map; return map;
......
...@@ -161,6 +161,7 @@ public class Tools { ...@@ -161,6 +161,7 @@ public class Tools {
headerMap.put("Upgrade-Insecure-Requests", "1"); headerMap.put("Upgrade-Insecure-Requests", "1");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
headerMap.put("Accept-Encoding", "deflate, br"); headerMap.put("Accept-Encoding", "deflate, br");
headerMap.put("cookie", "");
return headerMap; return headerMap;
} }
......
///** /**
// * @Title: TouTiaoExample.java * @Title: TouTiaoExample.java
// * @Package com.zhiwei.toutiao.test * @Package com.zhiwei.toutiao.test
// * @Description: * @Description:
// * @author hero * @author hero
// * @date 2016年9月2日 上午11:48:51 * @date 2016年9月2日 上午11:48:51
// * @version V1.0 * @version V1.0
// */ */
///** /**
//* *
//*/ */
//package com.zhiwei.toutiao.test; package com.zhiwei.toutiao.test;
//
//import java.util.ArrayList; import java.util.ArrayList;
//import java.util.Date; import java.util.Date;
//import java.util.List; import java.util.List;
//import java.util.Map; import java.util.Map;
//
//import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
//import com.zhiwei.toutiao.bean.TouTiaoArticle; import com.zhiwei.toutiao.bean.TouTiaoArticle;
//import com.zhiwei.toutiao.parse.TouTiaoArticleParse; import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
//
///** /**
// * @Description: * @Description:
// * @author hero * @author hero
// * @date 2016年9月2日 上午11:48:51 * @date 2016年9月2日 上午11:48:51
// */ */
//public class TouTiaoExample { public class TouTiaoExample {
//
// @SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
// public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
// long a = System.currentTimeMillis(); long a = System.currentTimeMillis();
// List<String> urlList = new ArrayList<String>(); List<String> urlList = new ArrayList<String>();
// urlList.add("6859134443"); urlList.add("5757091251");
//
// System.out.println(urlList.size()); System.out.println(urlList.size());
//
// Date endTime = TimeParse.stringFormartDate("2018-04-01"); Date endTime = TimeParse.stringFormartDate("2018-04-01");
//
// for (String url : urlList) { for (String url : urlList) {
// String mid = url; String mid = url;
// String max_behot_time = "0"; String max_behot_time = "0";
// while (true) { while (true) {
// Map<String, Object> dataMap = null; Map<String, Object> dataMap = null;
// dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null); dataMap = TouTiaoArticleParse.getMicroTouTiaoCrawler(mid, endTime, null, max_behot_time);
// if (dataMap != null) { if (dataMap != null) {
// List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data"); List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
// max_behot_time = (String) dataMap.get("max_behot_time"); max_behot_time = (String) dataMap.get("max_behot_time");
// System.out.println(max_behot_time + "=======" + ttlist.size()); System.out.println(max_behot_time + "=======" + ttlist.size());
// if (max_behot_time == null || ttlist.isEmpty()) { if (max_behot_time == null || ttlist.isEmpty()) {
// break; break;
// } else { } else {
// if (ttlist.size() > 0) { if (ttlist.size() > 0) {
// for (TouTiaoArticle tt : ttlist) { for (TouTiaoArticle tt : ttlist) {
// System.out.println(tt); System.out.println(tt);
// } }
// } }
// } }
// } }
// } }
// } }
// long b = System.currentTimeMillis(); long b = System.currentTimeMillis();
// System.out.println("一轮的采集时间为:" + (b - a) / 1000); System.out.println("一轮的采集时间为:" + (b - a) / 1000);
// } }
//
//} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment