Commit c3de2f29 by [zhangzhiwei]

因修改采集核心包版本,修改相应的方法

parent 48b45d95
......@@ -3,13 +3,13 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>toutiao</artifactId>
<version>0.2.2-SNAPSHOT</version>
<version>0.2.3-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.0.5-SNAPSHOT</version>
<version>0.0.8-SNAPSHOT</version>
</dependency>
</dependencies>
......
......@@ -29,7 +29,7 @@ public class TouTiaoAccountParse {
private TouTiaoAccountParse() {}
private static Map<String, String> headerMap;
private static Logger logger = LogManager.getLogger(TouTiaoAccountParse.class);
private static HttpBoot httpBoot = new HttpBoot();
/**
* @Title: getTouTiaoAccountInfo
* @author hero
......@@ -45,13 +45,13 @@ public class TouTiaoAccountParse {
TouTiaoAccount tta = null;
try {
String htmlBody = null;
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null ){
tta = parseHtmlByAccount(htmlBody, name, proxy);
if(tta == null){
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy);
}
......@@ -59,7 +59,7 @@ public class TouTiaoAccountParse {
}else {
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy);
}
......@@ -80,7 +80,7 @@ public class TouTiaoAccountParse {
TouTiaoAccount tta = null;
try {
String htmlBody = null;
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){
tta = parseAccountByUserId(htmlBody, user_id, proxy);
}
......@@ -113,7 +113,7 @@ public class TouTiaoAccountParse {
headerMap = Tools.getTouTiaoHeader();
try {
String htmlBody = null;
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null){
JSONObject json = JSONObject.parseObject(htmlBody);
list.addAll(parseHtmlByWord(json, proxy));
......@@ -153,7 +153,7 @@ public class TouTiaoAccountParse {
headerMap.put("Host", "is.snssdk.com");
try {
String htmlBody = null;
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("name")){
JSONObject json = JSONObject.parseObject(htmlBody);
more = json.getJSONObject("data").getBooleanValue("has_more");
......
......@@ -38,7 +38,7 @@ import com.zhiwei.toutiao.util.Tools;
public class TouTiaoArticleParse {
private TouTiaoArticleParse() {}
private static Logger logger = LogManager.getLogger(TouTiaoArticleParse.class);
private static HttpBoot httpBoot = new HttpBoot();
/***
* 获取头条数据
*
......@@ -60,7 +60,7 @@ public class TouTiaoArticleParse {
headerMap.put("Referer", url);
String htmlBody = null;
try {
htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){
Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData);
if(ttList!=null && ttList.size()>0){
......@@ -155,7 +155,7 @@ public class TouTiaoArticleParse {
Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
try {
String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){
......
......@@ -19,6 +19,7 @@ import com.zhiwei.wangyi.bean.WangYiNews;
public class WangyiNewParse {
private static Logger logger = LogManager.getLogger(WangyiNewParse.class);
private static boolean finish = true;
private static HttpBoot httpBoot = new HttpBoot();
/**
* @Title: getWYHistory
* @Description: TODO(根据文章地址解析网易号历史文章)
......@@ -38,7 +39,7 @@ public class WangyiNewParse {
{
String url = "http://c.m.163.com/nc/subscribe/list/"+tid+"/all/"+page*20+"-20.html";
System.out.println(url);
String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody!=null)
{
List<WangYiNews> wyList = analysis(htmlBody,endTime);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment