Commit 5f4ff8d3 by zhiwei

360网页采集

parent 491f1e25
......@@ -215,7 +215,7 @@ public class SoCrawlerParse extends HttpClientTemplateOK {
String time = dataJson.getJSONObject("data").getJSONArray("question").getJSONObject(0).getString("create_time");
String source = dataJson.getJSONObject("data").getJSONArray("question").getJSONObject(0).getJSONObject("user").getString("uname");
String user_id = dataJson.getJSONObject("data").getJSONArray("question").getJSONObject(0).getJSONObject("user").getString("user_id");
String link = "https://www.toutiao.com/a"+dataJson.getJSONObject("data").getJSONArray("question").getJSONObject(0).getString("qid")+"/";
String link = "https://www.toutiao.com/a"+dataJson.getJSONObject("data").getJSONArray("question").getJSONObject(0).getString("qid");
return new NewsData(link, title, source, time, content, "头条问答", word, user_id);
}else if(htmlBody.contains("var BASE_DATA = ")){
......@@ -226,7 +226,7 @@ public class SoCrawlerParse extends HttpClientTemplateOK {
String time = dataJson.getJSONObject("articleInfo").getJSONObject("subInfo").getString("time");
String source = dataJson.getJSONObject("mediaInfo").getString("name");
String user_id = dataJson.getJSONObject("mediaInfo").getString("uid");
String link = "https://www.toutiao.com/a"+dataJson.getJSONObject("articleInfo").getString("groupId")+"/";
String link = "https://www.toutiao.com/a"+dataJson.getJSONObject("articleInfo").getString("groupId");
return new NewsData(link, title, source, time, content, "今日头条", word, user_id);
}else if(htmlBody.contains("404错误页")){
logger.info("{}:::数据有问题,该文章已被删除}", url);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment