Commit 0709d18e by yangchen

新增 部分网站评论采集 验证版

parent 41208eb5
......@@ -35,12 +35,12 @@ public class QicheHome {
count = qicheHomeKwyWordAnalysis.getCount(result);
}
bodyList.addAll(qicheHomeKwyWordAnalysis.getData(result));
page++;
logger.info("采集 articleid {} 总页数 {} 第 {} 页 , 采集总数 {}",articleid,count,page,bodyList.size());
ZhiWeiTools.sleep(3000);
if(page > count) {
break;
}
page++;
} catch (IOException e) {
e.printStackTrace();
n++;
......
......@@ -130,9 +130,9 @@ public class FenghuangCommentAnalysis {
private Map<String,Object> getcommentData2(JSONObject json) {
Map<String,Object> map = new HashMap<>();
try {
map.put("nickname", json.getString("uname"));
map.put("source", json.getString("uname"));
map.put("content", json.getString("comment_contents").replaceAll("&quot;", ""));
map.put("id", json.getString("article_id"));
map.put("id", json.getString("comment_temid"));
map.put("like", json.getString("uptimes"));
map.put("from", json.getString("ip_from"));
map.put("time", TimeParse.stringFormartDate(json.getString("comment_date")));
......
......@@ -33,7 +33,9 @@ public class PcautoCommentAnalysis {
String time = element.select("div.cmtMain > div > div.thTB > span.cmtTime").text();
map.put("time", TimeParse.stringFormartDate(time));
String like = element.select("span.cmtSupportNum").text();
String id = element.select("a.cmtVote").attr("data-id");
map.put("like", like);
map.put("id", id);
dataList.add(map);
}
boolean next = false;
......
......@@ -31,6 +31,7 @@ public class QicheHomeKwyWordAnalysis {
String time = data.getString("RReplyDate");
map.put("time", TimeParse.dateFormartString(TimeParse.stringFormartDate(time.split("Date\\(")[1].split("\\+")[0]),"yyyy-MM-dd HH:mm:ss"));
map.put("like", data.getInteger("RUp"));
map.put("id", data.getString("RMemberId"));
bodyList.add(map);
}
return bodyList;
......
......@@ -29,6 +29,7 @@ public class SinaKejiCommentAnalysis {
map.put("source", data.getString("nick"));
map.put("like", data.getString("agree"));
map.put("location", data.getString("area"));
map.put("id", data.getString("status_cmnt_mid"));
dataList.add(map);
}
return dataList;
......
......@@ -16,13 +16,13 @@ public class WangyiCommentAnalysis {
@SuppressWarnings("unchecked")
public List<Map<String,Object>> getWangyiCommentData(String result,List<String> idList) {
List<Map<String,Object>> dataList = new ArrayList<Map<String,Object>>();
List<Map<String,Object>> dataList = new ArrayList<>();
try {
JSONObject json = JSONObject.parseObject(result);
Map<String,Object> dataMap = (Map<String, Object>) json.get("comments");
for(Map.Entry<String, Object> entry : dataMap.entrySet()) {
JSONObject data = JSONObject.parseObject(entry.getValue().toString());
Map<String,Object> map = new HashMap<String,Object>();
Map<String,Object> map = new HashMap<>();
String id = data.getString("postId");
if(idList.contains(id)) {
continue;
......@@ -38,7 +38,7 @@ public class WangyiCommentAnalysis {
}
return dataList;
} catch (Exception e) {
logger.error("解析出错",e.getMessage());
logger.error("解析出错 {}",e);
return dataList;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment