Merge branch 'master' of http://git.zhiweidata.top/zhangzhiwei/media_data_crawler.git

b8eef493 · zhiwei · f0ddce27 · 59dd3601 · b8eef493 · b8eef493
Commit b8eef493 authored Aug 11, 2018 by zhiwei
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduNewsCrawlerParse.java
+0 -1

src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
+3 -1

No files found.
--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduNewsCrawlerParse.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduNewsCrawlerParse.java
@@ -93,7 +93,6 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
    public static int getBaiduNewsCount(String word, String startTime, String endTime, Proxy proxy,String cookie) throws Exception {
        try {
            String result = downloadHtml(word, startTime, endTime, proxy, "newsdy", 1,cookie);
-            System.out.println(result);
            String s = result.split("找到相关新闻")[1];
            String s1 = s.split("篇")[0];
            s1 = s1.replace(",", "").replace("约", "");

--- a/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
+++ b/src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
@@ -138,6 +138,8 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
        if(title == null || title.length() < 1) {
            title = document.select("#j_core_title_wrap > h3").text();
        }
+        String source = null;
+        source = document.select("div.card_top.clearfix > div.card_title > a").text();
        System.out.println(title);
        for(Element element : elementes) {
            String time = null;
@@ -159,7 +161,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
            }
            if(time != null && time.length() > 1) {
-                TiebaData tbd = new TiebaData("http://tieba.baidu.com/p/"+aid, title, time, tid, null, author, content, aid);
+                TiebaData tbd = new TiebaData("http://tieba.baidu.com/p/"+aid, title, time, tid, source, author, content, aid);
                System.out.println(tbd.toString());
                list.add(tbd);
            }