Commit b8eef493 by zhiwei
parents f0ddce27 59dd3601
......@@ -93,7 +93,6 @@ public class BaiduNewsCrawlerParse extends HttpClientTemplateOK {
public static int getBaiduNewsCount(String word, String startTime, String endTime, Proxy proxy,String cookie) throws Exception {
try {
String result = downloadHtml(word, startTime, endTime, proxy, "newsdy", 1,cookie);
System.out.println(result);
String s = result.split("找到相关新闻")[1];
String s1 = s.split("篇")[0];
s1 = s1.replace(",", "").replace("约", "");
......
......@@ -138,6 +138,8 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
if(title == null || title.length() < 1) {
title = document.select("#j_core_title_wrap > h3").text();
}
String source = null;
source = document.select("div.card_top.clearfix > div.card_title > a").text();
System.out.println(title);
for(Element element : elementes) {
String time = null;
......@@ -159,7 +161,7 @@ public class BaiduTiebaCrawlerParse extends HttpClientTemplateOK {
}
if(time != null && time.length() > 1) {
TiebaData tbd = new TiebaData("http://tieba.baidu.com/p/"+aid, title, time, tid, null, author, content, aid);
TiebaData tbd = new TiebaData("http://tieba.baidu.com/p/"+aid, title, time, tid, source, author, content, aid);
System.out.println(tbd.toString());
list.add(tbd);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment