Commit 862fa762 by cwy

头条评论采集修改

parent e5fa93cf
...@@ -4,12 +4,11 @@ import java.io.IOException; ...@@ -4,12 +4,11 @@ import java.io.IOException;
import java.net.Proxy; import java.net.Proxy;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
...@@ -23,7 +22,6 @@ import com.zhiwei.toutiao.bean.TouTiaoComment; ...@@ -23,7 +22,6 @@ import com.zhiwei.toutiao.bean.TouTiaoComment;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
import okhttp3.Response; import okhttp3.Response;
import org.jsoup.Jsoup;
/** /**
* @ClassName: TouTiaoComment * @ClassName: TouTiaoComment
...@@ -36,6 +34,15 @@ public class TouTiaoCommentParse { ...@@ -36,6 +34,15 @@ public class TouTiaoCommentParse {
private static Logger logger = LogManager.getLogger(TouTiaoCommentParse.class); private static Logger logger = LogManager.getLogger(TouTiaoCommentParse.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
// public static void main(String[] args) {
// ProxyFactory.init(SimpleConfig.builder().registry("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181").group("local").appId(10000002).appName("toutiao").build());
// try {
// getTouTiaoComment("https://www.toutiao.com/a6774213907079758344/", 0, null);
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// }
/** /**
* *
...@@ -67,9 +74,8 @@ public class TouTiaoCommentParse { ...@@ -67,9 +74,8 @@ public class TouTiaoCommentParse {
String urlNew = "http://is.snssdk.com/article/v2/tab_comments/?app_name=news_article&offset=" String urlNew = "http://is.snssdk.com/article/v2/tab_comments/?app_name=news_article&offset="
+i*20+"&group_id="+group_id+"&aggr_type=1&count=20&fold=1&item_id="+group_id+"&ts="+System.currentTimeMillis(); +i*20+"&group_id="+group_id+"&aggr_type=1&count=20&fold=1&item_id="+group_id+"&ts="+System.currentTimeMillis();
//设置头信息 //设置头信息
Map<String,String> headerMap = Tools.getTouTiaoHeader(); Map<String,String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "News 6.6.5 rv:6.6.5.03 (iPhone; iOS 11.3; zh_CN) Cronet"); headerMap.put("User-Agent", "News 6.6.5 rv:6.6.5.03 (iPhone; iOS 11.3; zh_CN) Cronet");
headerMap.put("Host", "is.snssdk.com");
for(int j=1; j<=3; j++){ for(int j=1; j<=3; j++){
try { try {
String htmlBody = downloadHtml(urlNew, proxy, headerMap); String htmlBody = downloadHtml(urlNew, proxy, headerMap);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment