Commit 74dd753d by yangchen

头条 评论数采集修改

parent fca86187
......@@ -9,7 +9,7 @@
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.0.2-SNAPSHOT</version>
<version>0.0.5-SNAPSHOT</version>
</dependency>
</dependencies>
......
......@@ -13,6 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.TouTiaoComment;
......@@ -190,7 +192,34 @@ public class TouTiaoCommentParse {
return 0;
}
/**
* @Title: findCommentCount
* @author hero
* @Description: 根据id获取头条评论数
* @param @param url
* @param @param proxy
* @param @return 设定文件
* @return int 返回类型
*/
public static int findCommentCountByNewProxy(String url,Proxy proxy)
{
try {
//设置头信息
String htmlBody = HttpBoot.syncCall(RequestUtils.wrapGet(url)).body().string();
if(htmlBody!=null && htmlBody.contains("commentInfo"))
{
try {
return Integer.valueOf(htmlBody.split("comments_count: ")[1].split(",")[0]);
} catch (Exception e) {
logger.error("解析头条评论数错误:::{}", e.fillInStackTrace());
return -1;
}
}
} catch (Exception e) {
}
return 0;
}
/**
* @Title: getCommentCount
......
/**
* @Title: TouTiaoExample.java
* @Package com.zhiwei.toutiao.test
* @Description:
* @author hero
* @date 2016年9月2日 上午11:48:51
* @version V1.0
*/
/**
*
*/
package com.zhiwei.toutiao.test;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
/**
* @Description:
* @author hero
* @date 2016年9月2日 上午11:48:51
*/
public class TouTiaoExample {
@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
long a = System.currentTimeMillis();
List<String> urlList = new ArrayList<String>();
urlList.add("6859134443");
System.out.println(urlList.size());
Date endTime = TimeParse.stringFormartDate("2018-04-01");
for (String url : urlList) {
String mid = url;
String max_behot_time = "0";
while (true) {
Map<String, Object> dataMap = null;
dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null);
if (dataMap != null) {
List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
max_behot_time = (String) dataMap.get("max_behot_time");
System.out.println(max_behot_time + "=======" + ttlist.size());
if (max_behot_time == null || ttlist.isEmpty()) {
break;
} else {
if (ttlist.size() > 0) {
for (TouTiaoArticle tt : ttlist) {
System.out.println(tt);
}
}
}
}
}
}
long b = System.currentTimeMillis();
System.out.println("一轮的采集时间为:" + (b - a) / 1000);
}
}
///**
// * @Title: TouTiaoExample.java
// * @Package com.zhiwei.toutiao.test
// * @Description:
// * @author hero
// * @date 2016年9月2日 上午11:48:51
// * @version V1.0
// */
///**
//*
//*/
//package com.zhiwei.toutiao.test;
//
//import java.util.ArrayList;
//import java.util.Date;
//import java.util.List;
//import java.util.Map;
//
//import com.zhiwei.tools.timeparse.TimeParse;
//import com.zhiwei.toutiao.bean.TouTiaoArticle;
//import com.zhiwei.toutiao.parse.TouTiaoArticleParse;
//
///**
// * @Description:
// * @author hero
// * @date 2016年9月2日 上午11:48:51
// */
//public class TouTiaoExample {
//
// @SuppressWarnings("unchecked")
// public static void main(String[] args) throws Exception {
// long a = System.currentTimeMillis();
// List<String> urlList = new ArrayList<String>();
// urlList.add("6859134443");
//
// System.out.println(urlList.size());
//
// Date endTime = TimeParse.stringFormartDate("2018-04-01");
//
// for (String url : urlList) {
// String mid = url;
// String max_behot_time = "0";
// while (true) {
// Map<String, Object> dataMap = null;
// dataMap = TouTiaoArticleParse.getTouTiaoList(mid, max_behot_time, endTime,null);
// if (dataMap != null) {
// List<TouTiaoArticle> ttlist = (List<TouTiaoArticle>) dataMap.get("data");
// max_behot_time = (String) dataMap.get("max_behot_time");
// System.out.println(max_behot_time + "=======" + ttlist.size());
// if (max_behot_time == null || ttlist.isEmpty()) {
// break;
// } else {
// if (ttlist.size() > 0) {
// for (TouTiaoArticle tt : ttlist) {
// System.out.println(tt);
// }
// }
// }
// }
// }
// }
// long b = System.currentTimeMillis();
// System.out.println("一轮的采集时间为:" + (b - a) / 1000);
// }
//
//}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment