天涯论坛添加采集开始时间，知乎添加图片量采集

88e4e8c0 · win 10 · ed4f527e · 88e4e8c0 · 88e4e8c0 · 88e4e8c0
Commit 88e4e8c0 authored Apr 13, 2020 by win 10
9 changed files
--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduInforCrawlerParse.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduInforCrawlerParse.java
@@ -95,6 +95,68 @@ public class BaiduInforCrawlerParse {
        return list;
    }
    
+    /**
+     * @Title: getBaiduNewsData
+     * @author hero
+     * @Description: 采集百度新闻数据
+     * @param @param
+     *            word
+     * @param @param
+     *            proxy
+     * @param @return
+     *            设定文件
+     * @return List<NewsData> 返回类型
+     * @throws Exception 
+     */
+    @SuppressWarnings("unchecked")
+    public static List<NewsData> getBaiduInforDataManyWord(String word,String endTime,String saveWord) throws Exception {
+        List<NewsData> list = new ArrayList<>();
+        GroupSync groupSync = new GroupSync(); 
+        for(int i = 0;i< 10;i++) {
+            groupSync.add();
+            String url = getUrl(word, i,endTime);
+            TaskBoot.blockingAsync(() -> {
+                try {
+                    String htmlBody = downloadHtml(url);
+                    if (htmlBody != null) {
+                        Map<String, Object> dataMap = analysisData(htmlBody,saveWord);
+                        List<NewsData> dataList = (List<NewsData>) dataMap.get("data");
+                        System.out.println(url);
+                        list.addAll(dataList);
+                    }
+                } catch (Exception e) {
+
+                } finally {
+                    groupSync.done();
+                }
+            });
+        }
+        groupSync.await();
+        
+//        while (more) {
+//            String htmlBody = downloadHtml(word, page,null);
+//            if (htmlBody != null) {
+//                Map<String, Object> dataMap = analysisData(htmlBody, word);
+//                List<NewsData> dataList = (List<NewsData>) dataMap.get("data");
+//                list.addAll(dataList);
+//                logger.info("第 {} 页  ，采集到 {} 条",page,list.size());
+//                System.out.println("第 "+page+" 页  ，采集到 "+list.size()+" 条");
+//                more = (Boolean) dataMap.get("more");
+//            } else {
+//                more = false;
+//            }
+//            page++;
+//            if(DataCrawler.sleepTime != null ){
+//                ZhiWeiTools.sleep(DataCrawler.sleepTime);
+//            }
+//            // 最大页数为30
+//            if (page > 30) {
+//                more = false;
+//            }
+//        }
+        return list;
+    }
+    
   /**
    * @Title: downloadHtml
    * @author hero
@@ -303,7 +365,7 @@ public class BaiduInforCrawlerParse {
    //https://www.baidu.com/s?tn=news&rtt=4&bsst=1&cl=2&wd=%E6%B5%99%E6%B1%9F&medium=1&rn=50&gpc=stf%3D1546272000%2C1548950400%7Cstftype%3D2&tngroupname=organic_news&rsv_dl=news_b_pn&pn=0
 //    public static void main(String[] args) throws Exception {
 //        String url = "http://www.baidu.com/s?tn=news&rtt=4&bsst=1&cl=1&wd=%E5%A5%94%E9%A9%B0+%E6%BC%8F%E6%B1%BD%E6%B2%B9&medium=0&rn=50&gpc=stf%3D0%2C1496246399%7Cstftype%3D2&tngroupname=organic_news&rsv_dl=news_l_more&x_bfe_rqs=03E80&x_bfe_tjscore=0.332314&scs=2546086922&sortBy=0&pn=0";
-//        ProxyFactory.init("zookeeper://192.168.0.36:2181", "local", GroupType.PROVIDER);
+//        ProxyFactory.init("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181", "local", GroupType.PROVIDER, 10000008);//初始化代理
 //        List<NewsData> ndList = getBaiduInforData("马云","2019-07-04 23:59:59");
 //        System.out.println(ndList.size());
 //        String result = downloadHtml(url,0);

--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
@@ -17,24 +17,65 @@ import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;

 import com.zhiwei.crawler.core.HttpBoot;
+import com.zhiwei.crawler.proxy.ProxyFactory;
 import com.zhiwei.crawler.proxy.ProxyHolder;
 import com.zhiwei.crawler.utils.RequestUtils;
 import com.zhiwei.media_data_crawler.data.DataCrawler;
 import com.zhiwei.media_data_crawler.entity.TiebaData;
+import com.zhiwei.media_data_crawler.excelentity.DataExcel;
+import com.zhiwei.proxy.config.SimpleConfig;
 import com.zhiwei.tools.httpclient.HeaderTool;
+import com.zhiwei.tools.timeparse.TimeParse;
 import com.zhiwei.tools.tools.URLCodeUtil;
 import com.zhiwei.tools.tools.ZhiWeiTools;

 import okhttp3.Response;

+/**
+ * 百度贴吧采集
+ * @author xMx 
+ * @date 2019年10月31日 下午5:47:28
+ */
 public class BaiduTiebaCrawlerParse {
 	private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
 	private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
 	
+//	public static void main(String[] args) {
+//	    ProxyFactory.init(SimpleConfig.builder().registry("zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181")
+//	            .appName("xumiaoxin").appId(10000008).group("local").build());
+//	    
+//	    List<DataExcel> bodyList = new ArrayList<>();
+//	    
+//	    try {
+//	        List<String> wordList = WordsReadFile.getWords("D:\\crawlerdata\\关键词6.txt");
+//	        for(String s:wordList) {
+//    	        List<TiebaData> dataList = getBaiduTiebaData(s, null, null);
+//    	        dataList.forEach(data -> {
+//    	            DataExcel dataExcel = new DataExcel();
+//    	            dataExcel.setAuthor(data.getAuthor());
+//    	            dataExcel.setContent(data.getContent());
+//    	            dataExcel.setSource(data.getSource());
+//    	            dataExcel.setTid(data.getTid());
+//    	            dataExcel.setTime(data.getTime());
+//    	            dataExcel.setTitle(data.getTitle());
+//    	            dataExcel.setUrl(data.getUrl());
+//    	            dataExcel.setWord(data.getWord());
+//    	            
+//    	            bodyList.add(dataExcel);
+//    	        });
+//	        }
+//        } catch (Exception e) {
+//            e.toString();
+//        }
+//	    
+//	    EasyExcel.write("D:\\crawlerdata\\百度贴吧-花木兰2.xlsx", DataExcel.class).sheet("数据").doWrite(bodyList);
+//	    System.out.println("导出成功");
+//    }
+	
 	/**
 	 * @Title: getBaiduTiebaData 
 	 * @author hero 
-	 * @Description: 根據關鍵詞獲取百度貼吧數據（最多50頁）
+	 * @Description: 根据关键词获取百度贴吧数据
 	 * @param @param word
 	 * @param @param proxy
 	 * @param @param tiebaName
@@ -43,28 +84,29 @@ public class BaiduTiebaCrawlerParse {
 	 * @return List<TiebaData> 返回类型
 	 */
 	@SuppressWarnings("unchecked")
-	public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName) throws Exception {
+	public static List<TiebaData> getBaiduTiebaData(String word, Proxy proxy, String tiebaName, String startTime) throws Exception {
 		List<TiebaData> list = new ArrayList<TiebaData>();
-		int page = 0;
+		int page = 1;
 		boolean more = true;
 		while (more) {
-			// 最大页数为20
-			if (page > 50) {
-				more = false;
-			}
-			String htmlBody = downloadHtml(word, proxy, tiebaName, page);
-			if (htmlBody != null) {
-				Map<String, Object> dataMap = analysisData(htmlBody, proxy, word);
-				List<TiebaData> dataList = (List<TiebaData>) dataMap.get("data");
-				list.addAll(dataList);
-				more = (Boolean) dataMap.get("more");
-			} else {
-				more = false;
-			}
-			page++;
-			if(DataCrawler.sleepTime!=null){
-				 ZhiWeiTools.sleep(DataCrawler.sleepTime);
-			}
+			try {
+                String htmlBody = downloadHtml(word, proxy, tiebaName, page);
+                if (htmlBody != null) {
+                	Map<String, Object> dataMap = analysisData(htmlBody, proxy, word, startTime);
+                	List<TiebaData> dataList = (List<TiebaData>) dataMap.get("data");
+                	list.addAll(dataList);
+                	more = (Boolean) dataMap.get("more");
+                }
+                
+                page++;
+            } catch (Exception e) {
+                logger.error("百度贴吧数据获取失败", e);
+            }
+			
+//			//最大页数为75页
+//			if (page > 20) {
+//			    more = false;
+//			}  
 		}
 		return list;
 	}
@@ -85,7 +127,7 @@ public class BaiduTiebaCrawlerParse {
 	public static Map<String,Object> getBaiduTiebaData(String word, Proxy proxy, String tiebaName,int page) throws Exception {
 		String htmlBody = downloadHtml(word, proxy, tiebaName, page);
 		if (htmlBody != null) {
-			return analysisData(htmlBody, proxy, word);
+			return analysisData(htmlBody, proxy, word, null);
 		}
 		return null;
 	}
@@ -270,6 +312,9 @@ public class BaiduTiebaCrawlerParse {
 		Map<String, String> headerMap = HeaderTool.getCommonHead();
 		// 获取链接地址
 		String url = getUrl(word, tiebaName, page);
+		
+		logger.info("采集进度 {} === {}", word , url);
+		
 		headerMap.put("Host", "tieba.baidu.com");
 		headerMap.put("Referer", url);
 		// 下载数据页面
@@ -283,11 +328,9 @@ public class BaiduTiebaCrawlerParse {
 				}
 				return response.body().string();
 			} catch (Exception e) {
-				logger.error("获取数据时出现问题,问题为：{}", e.fillInStackTrace());
+				logger.error("获取数据时出现问题", e);
 				if(i==3){
 					throw e;
-				}else{
-					continue;
 				}
 			}
 		}
@@ -306,7 +349,7 @@ public class BaiduTiebaCrawlerParse {
 	 * @param @throws Exception 设定文件 
 	 * @return Map<String,Object> 返回类型
 	 */
-	private static Map<String, Object> analysisData(String htmlBody, Proxy proxy, String word) throws Exception{
+	private static Map<String, Object> analysisData(String htmlBody, Proxy proxy, String word, String startTime) throws Exception{
 		Map<String, Object> resultMap = new HashMap<String, Object>();
 		List<TiebaData> list = new ArrayList<TiebaData>();
 		boolean more = true;
@@ -338,16 +381,21 @@ public class BaiduTiebaCrawlerParse {
            try {
            	author = element.select("a").select("font.p_violet").text().split(" ")[1];
            	time = element.select("font.p_date").text();
+            	
+                long artTime = TimeParse.stringFormartDate(time).getTime();//文章时间
+                long star = TimeParse.stringFormartDate(startTime).getTime();//采集开始时间
+                if(artTime < star) {
+                    more = false;
+                    break;
+                }
+            	
                TiebaData tiebaData = new TiebaData(link, title, time, tid, source, author, content, word);
                list.add(tiebaData);
            }catch (Exception e) {
                logger.debug("无作者 或者 无来源");
-                continue;
            }
        }
-		if(elementes.size()==0){
-			more = false;
-		}
+		
 		resultMap.put("data", list);
 		resultMap.put("more", more);
 		return resultMap;

--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/CrawlerTest.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/CrawlerTest.java
+package com.zhiwei.media_data_crawler.crawler;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import com.zhiwei.crawler.proxy.ProxyFactory;
+import com.zhiwei.crawler.proxy.ProxyHolder;
+import com.zhiwei.excelpoi.excel.PoiExcelUtil;
+import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
+import com.zhiwei.proxy.config.SimpleConfig;
+import com.zhiwei.tools.timeparse.TimeParse;
+
+/**
+ * 出知乎评论(图片数据量和用户评论排名)
+ * @author xMx 
+ * @date 2019年10月19日 上午11:01:29
+ */
+public class CrawlerTest {
+    
+    public static void main(String[] args) throws Exception {
+        //代理地址
+        String address = "zookeeper://192.168.0.11:2181?backup=192.168.0.30:2181,192.168.0.35:2181";
+        String appName = "xumaioxin";
+        long appId = 10000008L;
+        ProxyFactory.init(SimpleConfig.builder().registry(address).appName(appName).appId(appId).group("local").build());
+      
+        String wordFileName = "D://crawlerdata/关键词5.txt";
+        String dataFileName = "D://crawlerdata/知乎2.xlsx";
+        String endTime = "1970-01-01 23:59:59";
+        
+        List<String> wordList = WordsReadFile.getWords(wordFileName);
+        List<Map<String, Object>> resultList = new ArrayList<>(); 
+        for(String s:wordList) {
+//            List<ZhihuAnswer> zhihuAnswer = ZhihuAnwserCrawlerParse.getAnswerList(s,TimeParse.stringFormartDate(endTime),ProxyHolder.NAT_HEAVY_PROXY);
+            List<ZhihuAnswer> zhihuAnswer = ZhihuAnwserCrawlerParse.getPictureCount(s);
+            
+            for(ZhihuAnswer z:zhihuAnswer) {
+                Map<String, Object> map = new HashMap<>();
+                map.put("地址", z.getUrl());
+                map.put("问题地址", z.getFrom_url());
+                map.put("标题", z.getTitle());
+                map.put("时间", z.getTime());
+                map.put("发布者", z.getAuthor());
+                map.put("作者地址", z.getAuthorUrl());
+                map.put("内容", z.getContent());
+                map.put("回答点赞数", z.getAttitudes_count());
+                map.put("回答评论数", z.getComment_count());
+                map.put("问题点赞数", z.getFollow_count());
+                map.put("问题评论数", z.getBord_count());
+                map.put("图片数量", z.getImgCount());
+                map.put("排名", z.getSort());
+                resultList.add(map);
+            }
+        }
+        
+        PoiExcelUtil poi = PoiExcelUtil.getInstance();
+        
+        List<String> headList = new ArrayList<>();
+        headList.add("地址");
+        headList.add("问题地址");
+        headList.add("标题");
+        headList.add("时间");
+        headList.add("发布者");
+        headList.add("作者地址");
+        headList.add("内容");
+        headList.add("回答点赞数");
+        headList.add("回答评论数");
+        headList.add("问题点赞数");
+        headList.add("问题评论数");
+        headList.add("图片数量");
+        headList.add("排名");
+        
+        poi.exportExcel(dataFileName, "数据", headList, resultList);
+        System.out.println("导出成功");
+    }
+
+    
+}
--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/JianshuCrawler.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/JianshuCrawler.java
@@ -17,6 +17,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
 import com.zhiwei.media_data_crawler.entity.JianshuUser;
 import com.zhiwei.tools.tools.URLCodeUtil;

+import okhttp3.MediaType;
 import okhttp3.Response;

 /**
@@ -43,7 +44,7 @@ public class JianshuCrawler {
            headers.put("origin", "https://www.jianshu.com");
            headers.put("accept", "application/json");
            headers.put("user-agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36");
-            try (Response response = httpBoot.syncCall(RequestUtils.wrapPost(url,headers,null), ProxyHolder.NAT_HEAVY_PROXY)){
+            try (Response response = httpBoot.syncCall(RequestUtils.wrapPost(url,okhttp3.RequestBody.create(MediaType.parse("application/json"), headers.toString())), ProxyHolder.NAT_HEAVY_PROXY)){
                String result = response.body().string();
                System.out.println(result);
                if(result.contains("搜索过于频繁")) {

--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/WordsReadFile.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/WordsReadFile.java
+package com.zhiwei.media_data_crawler.crawler;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class WordsReadFile {
+	
+	private static Logger logger = LoggerFactory.getLogger(WordsReadFile.class);
+	
+	/**
+	 * 
+	 * @Title: getWords 
+	 * @author hero 
+	 * @Description: 从txt文件中读取关键词
+	 * @param @param 
+	 * 				wordFileName  关键词文件全路径
+	 * @param @return 设定文件 
+	 * @return List<String> 返回类型
+	 */
+	public static List<String> getWords(String wordFileName) {
+
+		List<String> list = null;
+		try {
+			list = new ArrayList<String>();
+			BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(wordFileName),"GBK"));
+			String line = "";
+			while((line = br.readLine())!=null)
+			{
+			    if(line.length() >= 1) {
+			        list.add(line);
+			    }
+			}
+			br.close();
+			return list;
+		} catch (IOException e) {
+			logger.debug("读取关键词文件失败 {}",e.getMessage());
+			return Collections.emptyList();
+		}
+	}
+
+}
--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
@@ -22,6 +22,11 @@ import com.zhiwei.tools.tools.ZhiWeiTools;

 import okhttp3.Response;

+/**
+ * 获取用户的回答列表，https://www.zhihu.com/people/xie-yu-shi-29/answers
+ * @author xMx 
+ * @date 2020年3月3日 上午9:17:16
+ */
 public class ZhihuUserAnswerCrawlerParse {

    private static final Logger logger = LoggerFactory.getLogger(ZhihuUserAnswerCrawlerParse.class);

--- a/src/main/java/com/zhiwei/media_data_crawler/entity/ZhihuAnswer.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/entity/ZhihuAnswer.java
@@ -29,11 +29,15 @@ public class ZhihuAnswer implements Serializable {

    private Integer bord_count;   //问题评论数

+    private Integer imgCount;   //图片数量
+    
+    private Integer sort;   //排名
+    
    public ZhihuAnswer(){}

    public ZhihuAnswer(String url, String from_url,String title, Date time, String author,
                     String authorUrl ,String content, Integer attitudes_count,
-                       Integer comment_count,Integer follow_count,Integer bord_count){
+                       Integer comment_count,Integer follow_count,Integer bord_count, Integer imgCount, Integer sort){
        this.url = url;
        this.from_url = from_url;
        this.title = title;
@@ -45,7 +49,8 @@ public class ZhihuAnswer implements Serializable {
        this.comment_count = comment_count;
        this.follow_count = follow_count;
        this.bord_count = bord_count;
-
+        this.imgCount = imgCount;
+        this.sort = sort;
    }

    @Override
@@ -62,6 +67,8 @@ public class ZhihuAnswer implements Serializable {
                ", comment_count=" + comment_count +
                ", follow_count=" + follow_count +
                ", bord_count=" + bord_count +
+                ", imgCount=" + imgCount +
+                ", sort=" + sort +
                '}';
    }

@@ -148,6 +155,22 @@ public class ZhihuAnswer implements Serializable {
    public void setBord_count(Integer bord_count) {
        this.bord_count = bord_count;
    }
+    
+    public Integer getImgCount() {
+        return imgCount;
+    }
+
+    public void setImgCount(Integer imgCount) {
+        this.imgCount = imgCount;
+    }
+    
+    public Integer getSort() {
+        return sort;
+    }
+
+    public void setSort(Integer sort) {
+        this.sort = sort;
+    }

    public void setComment_count(Integer comment_count) {
        this.comment_count = comment_count;

--- a/src/main/java/com/zhiwei/media_data_crawler/excelentity/DataExcel.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/excelentity/DataExcel.java
+package com.zhiwei.media_data_crawler.excelentity;
+
+import com.alibaba.excel.annotation.ExcelProperty;
+
+/**
+ * easy导出文件标题
+ * @author xMx 
+ * @date 2019年10月29日 上午9:15:40
+ */
+public class DataExcel {
+    
+    @ExcelProperty(value = "地址",index = 0)
+    private String url;
+    
+    @ExcelProperty("标题")
+    private String title;
+    
+    @ExcelProperty("时间")
+    private String time;
+    
+    @ExcelProperty("tid")
+    private String tid;
+    
+    @ExcelProperty("来源")
+    private String source;
+    
+    @ExcelProperty("回复者或楼主")
+    private String author;
+    
+    @ExcelProperty("回复内容")
+    private String content;
+    
+    @ExcelProperty("关键词")
+    private String word;
+
+    public String getUrl() {
+        return url;
+    }
+
+    public void setUrl(String url) {
+        this.url = url;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public void setTitle(String title) {
+        this.title = title;
+    }
+
+    public String getTime() {
+        return time;
+    }
+
+    public void setTime(String time) {
+        this.time = time;
+    }
+
+    public String getTid() {
+        return tid;
+    }
+
+    public void setTid(String tid) {
+        this.tid = tid;
+    }
+
+    public String getSource() {
+        return source;
+    }
+
+    public void setSource(String source) {
+        this.source = source;
+    }
+
+    public String getAuthor() {
+        return author;
+    }
+
+    public void setAuthor(String author) {
+        this.author = author;
+    }
+
+    public String getContent() {
+        return content;
+    }
+
+    public void setContent(String content) {
+        this.content = content;
+    }
+
+    public String getWord() {
+        return word;
+    }
+
+    public void setWord(String word) {
+        this.word = word;
+    }
+    
+}
--- a/src/test/java/com/zhiwei/media_data_crawler/test/DataCrawlerTest.java
+++ b/src/test/java/com/zhiwei/media_data_crawler/test/DataCrawlerTest.java
@@ -7,8 +7,10 @@
 //import java.util.List;
 //import java.util.Map;
 //
+//import com.zhiwei.crawler.proxy.ProxyHolder;
 //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
 //import com.zhiwei.media_data_crawler.data.DataCrawler;
+//import com.zhiwei.media_data_crawler.entity.LunTanData;
 //import com.zhiwei.media_data_crawler.entity.ZhiHuData;
 //import com.zhiwei.tools.timeparse.TimeParse;
 //
@@ -24,7 +26,7 @@
 //		String word = "58同城";     //关键词
 //		String startTime = "2018-10-23 23:00:00";  //开始时间
 //		String endTime = "2018-10-23 23:59:59";    //结束时间
-//		Proxy proxy = null;      //代理IP，不用可不填写
+//		ProxyHolder proxy = null;      //代理IP，不用可不填写
 //		try {
 ////			//百度新闻采集demo
 ////			List<NewsData> list = DataCrawler.getBaiduNewsData(word, startTime, endTime, proxy);
@@ -35,8 +37,8 @@
 ////			//Baidu貼吧採集
 ////			String tiebaName = "京东";  //贴吧名称，指定贴吧内采集，无则为null
 ////			List<TiebaData> tiebaList = DataCrawler.getBaiduTiebaData(word, proxy, tiebaName);
-////			//天涯论坛采集
-////			List<LunTanData> list = DataCrawler.getLunTanData(word, proxy, endTime);
+//			//天涯论坛采集
+//			List<LunTanData> list = DataCrawler.getLunTanData(word, proxy, startTime, endTime);
 //			//豆瓣采集
 ////			String type = "topic";   //topic 为指定话题采集，note为指定日记采集
 ////			List<DouBanData> list = DataCrawler.getDouBanData(word, type, proxy);
@@ -62,7 +64,7 @@
 //			
 //			for(int i=0;i<words.length;i++){
 //				System.out.println(words[i]+"   开始采集");
-//				List<ZhiHuData> zhihuList = DataCrawler.getZhihuByWord(words[i], "a_week",endDate, proxy);
+//				List<ZhiHuData> zhihuList = DataCrawler.getZhihuByWord(words[i], "a_week",endDate, null);
 //				System.out.println(words[i]+"=============="+zhihuList.size());
 //				for(ZhiHuData zhiHuData : zhihuList) {
 //					Map<String,Object> map = new HashMap<String,Object>();
@@ -90,14 +92,4 @@
 //		}
 //	}
 //	
-//	
-//	
-//	
-//	
-//	
-//	
-//	
-//	
-//	
-//	
 //}