Commit 9e236728 by [zhangzhiwei]

微头条添加头信息中cookie,不然无数据

parent fdb8e380
......@@ -175,8 +175,10 @@ public class TouTiaoArticleParse {
}
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
System.out.println(url);
try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
System.out.println(htmlBody);
if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){
......@@ -189,7 +191,6 @@ public class TouTiaoArticleParse {
logger.info("获取数据出错::{},数据为null", e.fillInStackTrace());
return null;
}
return null;
}
......@@ -245,9 +246,10 @@ public class TouTiaoArticleParse {
continue;
}
}
System.out.println(source+"========="+dataList.size());
/**验证是否有下一页数据**/
if(more){
if(max_behot_time!=null && !"0".equals(max_behot_time)){
if(max_behot_time!=null && max_behot_time!=0){
if(endDate.after(date)){
max_behot_time = null;
}
......@@ -259,7 +261,7 @@ public class TouTiaoArticleParse {
e.printStackTrace();
}
map.put("max_behot_time", max_behot_time);
map.put("max_behot_time", max_behot_time+"");
map.put("data", dataList);
return map;
......
......@@ -161,6 +161,7 @@ public class Tools {
headerMap.put("Upgrade-Insecure-Requests", "1");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
headerMap.put("Accept-Encoding", "deflate, br");
headerMap.put("cookie", "");
return headerMap;
}
......
///**
// * @Title: TouTiaoExample.java
// * @Package com.zhiwei.toutiao.test
// * @Description:
// * @author hero
// * @date 2016年9月2日 上午11:48:51
// * @version V1.0
// */
///**
//*
//*/
//package com.zhiwei.toutiao.test;
//
//import java.util.ArrayList;
//import java.util.Date;
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.tools.timeparse.TimeParse;
//import com.zhiwei.toutiao.bean.TouTiaoArticle;
//import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
//
///**
// * @Description:
// * @author hero
// * @date 2016年9月2日 上午11:48:51
// */
//public class TouTiaoExample {
//
// @SuppressWarnings("unchecked")
// public static void main(String[] args) throws Exception {
// long a = System.currentTimeMillis();
// List<String> urlList = new ArrayList<String>();
// urlList.add("6859134443");
//
// System.out.println(urlList.size());
//
// Date endTime = TimeParse.stringFormartDate("2018-04-01");
//
// for (String url : urlList) {
// String mid = url;
// String max_behot_time = "0";
// while (true) {
// Map<String, Object> dataMap = null;
// dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null);
// if (dataMap != null) {
// List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
// max_behot_time = (String) dataMap.get("max_behot_time");
// System.out.println(max_behot_time + "=======" + ttlist.size());
// if (max_behot_time == null || ttlist.isEmpty()) {
// break;
// } else {
// if (ttlist.size() > 0) {
// for (TouTiaoArticle tt : ttlist) {
// System.out.println(tt);
// }
// }
// }
// }
// }
// }
// long b = System.currentTimeMillis();
// System.out.println("一轮的采集时间为:" + (b - a) / 1000);
// }
//
//}
/**
* @Title: TouTiaoExample.java
* @Package com.zhiwei.toutiao.test
* @Description:
* @author hero
* @date 2016年9月2日 上午11:48:51
* @version V1.0
*/
/**
*
*/
package com.zhiwei.toutiao.test;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
/**
* @Description:
* @author hero
* @date 2016年9月2日 上午11:48:51
*/
public class TouTiaoExample {
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
long a = System.currentTimeMillis();
List<String> urlList = new ArrayList<String>();
urlList.add("5757091251");
System.out.println(urlList.size());
Date endTime = TimeParse.stringFormartDate("2018-04-01");
for (String url : urlList) {
String mid = url;
String max_behot_time = "0";
while (true) {
Map<String, Object> dataMap = null;
dataMap = TouTiaoArticleParse.getMicroTouTiaoCrawler(mid, endTime, null, max_behot_time);
if (dataMap != null) {
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
max_behot_time = (String) dataMap.get("max_behot_time");
System.out.println(max_behot_time + "=======" + ttlist.size());
if (max_behot_time == null || ttlist.isEmpty()) {
break;
} else {
if (ttlist.size() > 0) {
for (TouTiaoArticle tt : ttlist) {
System.out.println(tt);
}
}
}
}
}
}
long b = System.currentTimeMillis();
System.out.println("一轮的采集时间为:" + (b - a) / 1000);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment