Commit 74dd753d by yangchen

头条 评论数采集修改

parent fca86187
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.0.2-SNAPSHOT</version> <version>0.0.5-SNAPSHOT</version>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -13,6 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,6 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.TouTiaoComment; import com.zhiwei.toutiao.bean.TouTiaoComment;
...@@ -190,7 +192,34 @@ public class TouTiaoCommentParse { ...@@ -190,7 +192,34 @@ public class TouTiaoCommentParse {
return 0; return 0;
} }
/**
* @Title: findCommentCount
* @author hero
* @Description: 根据id获取头条评论数
* @param @param url
* @param @param proxy
* @param @return 设定文件
* @return int 返回类型
*/
public static int findCommentCountByNewProxy(String url,Proxy proxy)
{
try {
//设置头信息
String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("commentInfo"))
{
try {
return Integer.valueOf(htmlBody.split("comments_count: ")[1].split(",")[0]);
} catch (Exception e) {
logger.error("解析头条评论数错误:::{}", e.fillInStackTrace());
return -1;
}
}
} catch (Exception e) {
}
return 0;
}
/** /**
* @Title: getCommentCount * @Title: getCommentCount
......
/** ///**
* @Title: TouTiaoExample.java // * @Title: TouTiaoExample.java
* @Package com.zhiwei.toutiao.test // * @Package com.zhiwei.toutiao.test
* @Description: // * @Description:
* @author hero // * @author hero
* @date 2016年9月2日 上午11:48:51 // * @date 2016年9月2日 上午11:48:51
* @version V1.0 // * @version V1.0
*/ // */
/** ///**
* //*
*/ //*/
package com.zhiwei.toutiao.test; //package com.zhiwei.toutiao.test;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.Date; //import java.util.Date;
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
//
import com.zhiwei.toutiao.bean.TouTiaoArticle; //import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.parse.TouTiaoArticleParse; //import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; //import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
//
/** ///**
* @Description: // * @Description:
* @author hero // * @author hero
* @date 2016年9月2日 上午11:48:51 // * @date 2016年9月2日 上午11:48:51
*/ // */
public class TouTiaoExample { //public class TouTiaoExample {
//
@SuppressWarnings("unchecked") // @SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception { // public static void main(String[] args) throws Exception {
long a = System.currentTimeMillis(); // long a = System.currentTimeMillis();
List<String> urlList = new ArrayList<String>(); // List<String> urlList = new ArrayList<String>();
urlList.add("6859134443"); // urlList.add("6859134443");
//
System.out.println(urlList.size()); // System.out.println(urlList.size());
//
Date endTime = TimeParse.stringFormartDate("2018-04-01"); // Date endTime = TimeParse.stringFormartDate("2018-04-01");
//
for (String url : urlList) { // for (String url : urlList) {
String mid = url; // String mid = url;
String max_behot_time = "0"; // String max_behot_time = "0";
while (true) { // while (true) {
Map<String, Object> dataMap = null; // Map<String, Object> dataMap = null;
dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null); // dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null);
if (dataMap != null) { // if (dataMap != null) {
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data"); // List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
max_behot_time = (String) dataMap.get("max_behot_time"); // max_behot_time = (String) dataMap.get("max_behot_time");
System.out.println(max_behot_time + "=======" + ttlist.size()); // System.out.println(max_behot_time + "=======" + ttlist.size());
if (max_behot_time == null || ttlist.isEmpty()) { // if (max_behot_time == null || ttlist.isEmpty()) {
break; // break;
} else { // } else {
if (ttlist.size() > 0) { // if (ttlist.size() > 0) {
for (TouTiaoArticle tt : ttlist) { // for (TouTiaoArticle tt : ttlist) {
System.out.println(tt); // System.out.println(tt);
} // }
} // }
} // }
} // }
} // }
} // }
long b = System.currentTimeMillis(); // long b = System.currentTimeMillis();
System.out.println("一轮的采集时间为:" + (b - a) / 1000); // System.out.println("一轮的采集时间为:" + (b - a) / 1000);
} // }
//
} //}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment