Commit 0709d18e by yangchen

新增 部分网站评论采集 验证版

parent 41208eb5
...@@ -35,12 +35,12 @@ public class QicheHome { ...@@ -35,12 +35,12 @@ public class QicheHome {
count = qicheHomeKwyWordAnalysis.getCount(result); count = qicheHomeKwyWordAnalysis.getCount(result);
} }
bodyList.addAll(qicheHomeKwyWordAnalysis.getData(result)); bodyList.addAll(qicheHomeKwyWordAnalysis.getData(result));
page++;
logger.info("采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}",articleid,count,page,bodyList.size()); logger.info("采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}",articleid,count,page,bodyList.size());
ZhiWeiTools.sleep(3000); ZhiWeiTools.sleep(3000);
if(page > count) { if(page > count) {
break; break;
} }
page++;
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
n++; n++;
......
...@@ -130,9 +130,9 @@ public class FenghuangCommentAnalysis { ...@@ -130,9 +130,9 @@ public class FenghuangCommentAnalysis {
private Map<String,Object> getcommentData2(JSONObject json) { private Map<String,Object> getcommentData2(JSONObject json) {
Map<String,Object> map = new HashMap<>(); Map<String,Object> map = new HashMap<>();
try { try {
map.put("nickname", json.getString("uname")); map.put("source", json.getString("uname"));
map.put("content", json.getString("comment_contents").replaceAll("&quot;", "")); map.put("content", json.getString("comment_contents").replaceAll("&quot;", ""));
map.put("id", json.getString("article_id")); map.put("id", json.getString("comment_temid"));
map.put("like", json.getString("uptimes")); map.put("like", json.getString("uptimes"));
map.put("from", json.getString("ip_from")); map.put("from", json.getString("ip_from"));
map.put("time", TimeParse.stringFormartDate(json.getString("comment_date"))); map.put("time", TimeParse.stringFormartDate(json.getString("comment_date")));
......
...@@ -33,7 +33,9 @@ public class PcautoCommentAnalysis { ...@@ -33,7 +33,9 @@ public class PcautoCommentAnalysis {
String time = element.select("div.cmtMain > div > div.thTB > span.cmtTime").text(); String time = element.select("div.cmtMain > div > div.thTB > span.cmtTime").text();
map.put("time", TimeParse.stringFormartDate(time)); map.put("time", TimeParse.stringFormartDate(time));
String like = element.select("span.cmtSupportNum").text(); String like = element.select("span.cmtSupportNum").text();
String id = element.select("a.cmtVote").attr("data-id");
map.put("like", like); map.put("like", like);
map.put("id", id);
dataList.add(map); dataList.add(map);
} }
boolean next = false; boolean next = false;
......
...@@ -31,6 +31,7 @@ public class QicheHomeKwyWordAnalysis { ...@@ -31,6 +31,7 @@ public class QicheHomeKwyWordAnalysis {
String time = data.getString("RReplyDate"); String time = data.getString("RReplyDate");
map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss")); map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss"));
map.put("like", data.getInteger("RUp")); map.put("like", data.getInteger("RUp"));
map.put("id", data.getString("RMemberId"));
bodyList.add(map); bodyList.add(map);
} }
return bodyList; return bodyList;
......
...@@ -29,6 +29,7 @@ public class SinaKejiCommentAnalysis { ...@@ -29,6 +29,7 @@ public class SinaKejiCommentAnalysis {
map.put("source", data.getString("nick")); map.put("source", data.getString("nick"));
map.put("like", data.getString("agree")); map.put("like", data.getString("agree"));
map.put("location", data.getString("area")); map.put("location", data.getString("area"));
map.put("id", data.getString("status_cmnt_mid"));
dataList.add(map); dataList.add(map);
} }
return dataList; return dataList;
......
...@@ -16,13 +16,13 @@ public class WangyiCommentAnalysis { ...@@ -16,13 +16,13 @@ public class WangyiCommentAnalysis {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public List<Map<String,Object>> getWangyiCommentData(String result,List<String> idList) { public List<Map<String,Object>> getWangyiCommentData(String result,List<String> idList) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>(); List<Map<String,Object>> dataList = new ArrayList<>();
try { try {
JSONObject json = JSONObject.parseObject(result); JSONObject json = JSONObject.parseObject(result);
Map<String,Object> dataMap = (Map<String, Object>) json.get("comments"); Map<String,Object> dataMap = (Map<String, Object>) json.get("comments");
for(Map.Entry<String, Object> entry : dataMap.entrySet()) { for(Map.Entry<String, Object> entry : dataMap.entrySet()) {
JSONObject data = JSONObject.parseObject(entry.getValue().toString()); JSONObject data = JSONObject.parseObject(entry.getValue().toString());
Map<String,Object> map = new HashMap<String,Object>(); Map<String,Object> map = new HashMap<>();
String id = data.getString("postId"); String id = data.getString("postId");
if(idList.contains(id)) { if(idList.contains(id)) {
continue; continue;
...@@ -38,7 +38,7 @@ public class WangyiCommentAnalysis { ...@@ -38,7 +38,7 @@ public class WangyiCommentAnalysis {
} }
return dataList; return dataList;
} catch (Exception e) { } catch (Exception e) {
logger.error("解析出错",e.getMessage()); logger.error("解析出错 {}",e);
return dataList; return dataList;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment