Commit 0a584d05 by yangchen

搜狗修改

parent f81fdcab
......@@ -6,7 +6,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
......@@ -57,7 +56,6 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
more = false;
}
ZhiWeiTools.sleep(5000);
System.out.println(list.size());
page++;
}
return list;
......@@ -106,7 +104,6 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
Map<String,String> headerMap = HeaderTool.getCommonHead();
//获取链接地址
String url = getMediaNewsUrl(word, mode, page);
// System.out.println("搜狗主页=="+url);
headerMap.put("Host", "news.sogou.com");
headerMap.put("Referer", url.split("&page=")[0]+"&page="+(page-1));
//下载数据页面
......@@ -125,8 +122,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
//获取通用请求头
Map<String,String> headerMap = HeaderTool.getCommonHead();
//获取链接地址
url = url + "&page=" + page;
// System.out.println("搜狗相似新闻=="+url);
url = url + "&page" + page;
headerMap.put("Host", "news.sogou.com");
headerMap.put("Referer", url);
//下载数据页面
......@@ -193,9 +189,6 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
}
if(time!=null && !time.equals("")){
/**文章发布时间处理**/
if(time.contains("\n")) {
time = time.split("\n")[0];
}
time = TimeParse.dateFormartString(TimeParse.stringFormartDate(time), "yyyy-MM-dd HH:mm:ss") ;
}
// 处理文章简介
......@@ -256,7 +249,6 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
}else{
more = false;
}
ZhiWeiTools.sleep(500);
page++;
}
return list;
......@@ -277,7 +269,7 @@ public class SougouNewsCrawlerParse extends HttpClientTemplateOK {
String url = null;
if(word!=null){
url = "http://news.sogou.com/news?mode="+ mode +"&media=&query="
+ URLCodeUtil.getURLEncode(word, "utf-8") + "&time=0&clusterId=&sort=1&dp=1&page="+page;
+ URLCodeUtil.getURLEncode(word, "utf-8") + "&time=0&clusterId=&sort=1&dp=1";
}
return url;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment