Commit ee3aa8bd by [zhangzhiwei]

修改并更新今日头条采集程序

parent 2191591c
...@@ -6,12 +6,13 @@ import java.util.Date; ...@@ -6,12 +6,13 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.TouTiaoAccount; import com.zhiwei.toutiao.bean.TouTiaoAccount;
...@@ -27,7 +28,7 @@ public class TouTiaoAccountParse { ...@@ -27,7 +28,7 @@ public class TouTiaoAccountParse {
private TouTiaoAccountParse() {} private TouTiaoAccountParse() {}
private static Map<String, String> headerMap; private static Map<String, String> headerMap;
private static Logger logger = LoggerFactory.getLogger(TouTiaoAccountParse.class); private static Logger logger = LogManager.getLogger(TouTiaoAccountParse.class);
/** /**
* @Title: getTouTiaoAccountInfo * @Title: getTouTiaoAccountInfo
...@@ -44,13 +45,13 @@ public class TouTiaoAccountParse { ...@@ -44,13 +45,13 @@ public class TouTiaoAccountParse {
TouTiaoAccount tta = null; TouTiaoAccount tta = null;
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null ){ if(htmlBody != null ){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
if(tta == null){ if(tta == null){
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media"; url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8")); headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){ if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
} }
...@@ -58,7 +59,7 @@ public class TouTiaoAccountParse { ...@@ -58,7 +59,7 @@ public class TouTiaoAccountParse {
}else { }else {
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media"; url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8")); headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){ if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
} }
...@@ -79,7 +80,7 @@ public class TouTiaoAccountParse { ...@@ -79,7 +80,7 @@ public class TouTiaoAccountParse {
TouTiaoAccount tta = null; TouTiaoAccount tta = null;
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){ if(htmlBody != null){
tta = parseAccountByUserId(htmlBody, user_id, proxy); tta = parseAccountByUserId(htmlBody, user_id, proxy);
} }
...@@ -112,7 +113,7 @@ public class TouTiaoAccountParse { ...@@ -112,7 +113,7 @@ public class TouTiaoAccountParse {
headerMap = Tools.getTouTiaoHeader(); headerMap = Tools.getTouTiaoHeader();
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){ if(htmlBody != null){
JSONObject json = JSONObject.parseObject(htmlBody); JSONObject json = JSONObject.parseObject(htmlBody);
list.addAll(parseHtmlByWord(json, proxy)); list.addAll(parseHtmlByWord(json, proxy));
...@@ -152,7 +153,7 @@ public class TouTiaoAccountParse { ...@@ -152,7 +153,7 @@ public class TouTiaoAccountParse {
headerMap.put("Host", "is.snssdk.com"); headerMap.put("Host", "is.snssdk.com");
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("name")){ if(htmlBody != null && htmlBody.contains("name")){
JSONObject json = JSONObject.parseObject(htmlBody); JSONObject json = JSONObject.parseObject(htmlBody);
more = json.getJSONObject("data").getBooleanValue("has_more"); more = json.getJSONObject("data").getBooleanValue("has_more");
......
...@@ -19,12 +19,13 @@ import java.util.HashMap; ...@@ -19,12 +19,13 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.TouTiaoArticle; import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
...@@ -36,7 +37,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -36,7 +37,7 @@ import com.zhiwei.toutiao.util.Tools;
*/ */
public class TouTiaoArticleParse { public class TouTiaoArticleParse {
private TouTiaoArticleParse() {} private TouTiaoArticleParse() {}
private static Logger logger = LoggerFactory.getLogger(TouTiaoArticleParse.class); private static Logger logger = LogManager.getLogger(TouTiaoArticleParse.class);
/*** /***
* 获取头条数据 * 获取头条数据
...@@ -59,7 +60,7 @@ public class TouTiaoArticleParse { ...@@ -59,7 +60,7 @@ public class TouTiaoArticleParse {
headerMap.put("Referer", url); headerMap.put("Referer", url);
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){ if(htmlBody != null && htmlBody.contains("behot_time")){
Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData); Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData);
if(ttList!=null && ttList.size()>0){ if(ttList!=null && ttList.size()>0){
...@@ -154,7 +155,7 @@ public class TouTiaoArticleParse { ...@@ -154,7 +155,7 @@ public class TouTiaoArticleParse {
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
try { try {
String htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if (htmlBody != null) { if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate); Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){ if(dataMap!=null && dataMap.size()>0){
...@@ -182,9 +183,7 @@ public class TouTiaoArticleParse { ...@@ -182,9 +183,7 @@ public class TouTiaoArticleParse {
* @param @return 设定文件 * @param @return 设定文件
* @return Map<String,Object> 返回类型 * @return Map<String,Object> 返回类型
*/ */
@SuppressWarnings("unlikely-arg-type")
private static Map<String, Object> parseHtmlByMicroAccount(String htmlBody, Date endDate) { private static Map<String, Object> parseHtmlByMicroAccount(String htmlBody, Date endDate) {
Map<String, Object> map = new HashMap<String, Object>(); Map<String, Object> map = new HashMap<String, Object>();
Long max_behot_time = null; Long max_behot_time = null;
List<TouTiaoArticle> dataList = new ArrayList<TouTiaoArticle>(); List<TouTiaoArticle> dataList = new ArrayList<TouTiaoArticle>();
......
...@@ -7,8 +7,8 @@ import java.util.HashMap; ...@@ -7,8 +7,8 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException; import com.alibaba.fastjson.JSONException;
...@@ -26,7 +26,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -26,7 +26,7 @@ import com.zhiwei.toutiao.util.Tools;
*/ */
public class TouTiaoChannelParse { public class TouTiaoChannelParse {
private static Map<String, String> headerMap; private static Map<String, String> headerMap;
private static Logger logger = LoggerFactory.getLogger(TouTiaoChannelParse.class); private static Logger logger = LogManager.getLogger(TouTiaoChannelParse.class);
/** /**
* @Title: touTiaoChannel * @Title: touTiaoChannel
......
...@@ -8,8 +8,8 @@ import java.util.Date; ...@@ -8,8 +8,8 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -26,7 +26,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -26,7 +26,7 @@ import com.zhiwei.toutiao.util.Tools;
*/ */
public class TouTiaoCommentParse { public class TouTiaoCommentParse {
private static Logger logger = LoggerFactory.getLogger(TouTiaoCommentParse.class); private static Logger logger = LogManager.getLogger(TouTiaoCommentParse.class);
/** /**
......
...@@ -20,10 +20,10 @@ import java.util.HashMap; ...@@ -20,10 +20,10 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -40,7 +40,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -40,7 +40,7 @@ import com.zhiwei.toutiao.util.Tools;
public class TouTiaoParse { public class TouTiaoParse {
private Map<String, String> headerMap ; private Map<String, String> headerMap ;
private Logger logger = LoggerFactory.getLogger(TouTiaoCommentParse.class); private static Logger logger = LogManager.getLogger(TouTiaoCommentParse.class);
/*** /***
* 获取头条数据 * 获取头条数据
......
...@@ -7,10 +7,10 @@ import java.util.HashMap; ...@@ -7,10 +7,10 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -28,7 +28,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -28,7 +28,7 @@ import com.zhiwei.toutiao.util.Tools;
public class TouTiaoQuestionAnswerParse { public class TouTiaoQuestionAnswerParse {
private static Map<String, String> headerMap ; private static Map<String, String> headerMap ;
private static Logger logger = LoggerFactory.getLogger(TouTiaoQuestionAnswerParse.class); private static Logger logger = LogManager.getLogger(TouTiaoQuestionAnswerParse.class);
public static Map<String,Object> getAnserList(String questionId,int page,int req_type,Proxy proxy){ public static Map<String,Object> getAnserList(String questionId,int page,int req_type,Proxy proxy){
......
...@@ -6,8 +6,8 @@ import java.util.Date; ...@@ -6,8 +6,8 @@ import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
...@@ -25,7 +25,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -25,7 +25,7 @@ import com.zhiwei.toutiao.util.Tools;
public class TouTiaoQuestionParse { public class TouTiaoQuestionParse {
private static Map<String, String> headerMap; private static Map<String, String> headerMap;
private static Logger logger = LoggerFactory.getLogger(TouTiaoQuestionParse.class); private static Logger logger = LogManager.getLogger(TouTiaoQuestionParse.class);
/** /**
* @Title: getSearchTouTiaoQuestion * @Title: getSearchTouTiaoQuestion
......
...@@ -7,8 +7,8 @@ import java.util.HashMap; ...@@ -7,8 +7,8 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.apache.logging.log4j.LogManager;
import org.slf4j.LoggerFactory; import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException; import com.alibaba.fastjson.JSONException;
...@@ -27,7 +27,7 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -27,7 +27,7 @@ import com.zhiwei.toutiao.util.Tools;
public class TouTiaoSearchParse { public class TouTiaoSearchParse {
private static Map<String, String> headerMap; private static Map<String, String> headerMap;
private static Logger logger = LoggerFactory.getLogger(TouTiaoSearchParse.class); private static Logger logger = LogManager.getLogger(TouTiaoSearchParse.class);
/** /**
* @Title: touTiaoSearchByWord * @Title: touTiaoSearchByWord
......
package com.zhiwei.wangyi.parse; package com.zhiwei.wangyi.parse;
import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.slf4j.Logger; import org.apache.logging.log4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
import com.zhiwei.wangyi.bean.WangYiNews; import com.zhiwei.wangyi.bean.WangYiNews;
public class WangyiNewParse { public class WangyiNewParse {
private static Logger logger = LoggerFactory.getLogger(WangyiNewParse.class); private static Logger logger = LogManager.getLogger(WangyiNewParse.class);
private static boolean finish = true; private static boolean finish = true;
/** /**
* @Title: getWYHistory * @Title: getWYHistory
...@@ -27,7 +27,7 @@ public class WangyiNewParse { ...@@ -27,7 +27,7 @@ public class WangyiNewParse {
* @return List<WangYiNews> 返回类型 * @return List<WangYiNews> 返回类型
* @throws Exception * @throws Exception
*/ */
public static List<WangYiNews> getWYHistory(String tid,Date endTime) throws Exception public static List<WangYiNews> getWYHistory(String tid,Date endTime,Proxy proxy) throws Exception
{ {
List<WangYiNews> list = new ArrayList<WangYiNews>(); List<WangYiNews> list = new ArrayList<WangYiNews>();
Map<String,String> headerMap = Tools.getWangYiHeader(); Map<String,String> headerMap = Tools.getWangYiHeader();
...@@ -38,7 +38,7 @@ public class WangyiNewParse { ...@@ -38,7 +38,7 @@ public class WangyiNewParse {
{ {
String url = "http://c.m.163.com/nc/subscribe/list/"+tid+"/all/"+page*20+"-20.html"; String url = "http://c.m.163.com/nc/subscribe/list/"+tid+"/all/"+page*20+"-20.html";
System.out.println(url); System.out.println(url);
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap); String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody!=null) if(htmlBody!=null)
{ {
List<WangYiNews> wyList = analysis(htmlBody,endTime); List<WangYiNews> wyList = analysis(htmlBody,endTime);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment