Commit 255db67e by [zhangzhiwei]

文章添加采集标签

parent cf30abcd
......@@ -12,6 +12,7 @@ package com.zhiwei.toutiao.bean;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
/**
* @Description:
......@@ -33,6 +34,8 @@ public class TouTiaoArticle implements Serializable{
private String playCount;
private String readNum;
private String shareNum;
private List<String> labelList;
public String getCommentCount() {
return commentCount;
}
......@@ -82,8 +85,6 @@ public class TouTiaoArticle implements Serializable{
this.content = content;
}
public TouTiaoArticle(){}
public String getUser_id() {
return user_id;
}
......@@ -102,6 +103,14 @@ public class TouTiaoArticle implements Serializable{
public void setShareNum(String shareNum) {
this.shareNum = shareNum;
}
public List<String> getLabelList() {
return labelList;
}
public void setLabelList(List<String> labelList) {
this.labelList = labelList;
}
public TouTiaoArticle(){}
public TouTiaoArticle(String url,String title,String user_id,
String source,Date time,String content,String commentCount,
String playCount,String readNum, String shareNum,String type)
......@@ -133,7 +142,9 @@ public class TouTiaoArticle implements Serializable{
+ ", playCount = " + playCount
+ ", readNum = " + readNum
+ ", shareNum = " + shareNum
+ ", labelList = " + labelList
+ "]";
}
}
......@@ -191,37 +191,46 @@ public class TouTiaoAccountParse {
try {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONArray jsonArray = json.getJSONArray("data");
Long user_id = null;
String name = null;
Long media_id = -1L;
String description = null;
Integer user_verified = -1;
String verify_content = null;
Integer follow_count = -1;
Date create_time = null;
String img_url = null;
String gender = null;
String user_type = null;
TouTiaoAccount tta = null;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
if(data.getLong("id") == null) {
continue;
}
long user_id = data.getLong("id");
String name = data.getString("name");
long media_id = -1L;
user_id = data.getLong("id");
name = data.getString("name");
if(data.getLong("media_id") != null) {
media_id = data.getLong("media_id");
}
String description = data.getString("description");
int user_verified = -1;
description = data.getString("description");
if(data.getInteger("user_verified") != null) {
user_verified = data.getInteger("user_verified");
}
String verify_content = data.getString("verify_content");
int follow_count = -1;
verify_content = data.getString("verify_content");
if(data.getInteger("follow_count") != null) {
follow_count = data.getInteger("follow_count");
}
String img_url = "https:"+data.getString("avatar_url");
Date create_time = null;
img_url = "https:"+data.getString("avatar_url");
if(data.getString("create_time") != null) {
create_time = new Date(Long.valueOf(data.getString("create_time"))*1000);
}
String gender = data.getString("gender");
String user_type = data.getString("user_type");
gender = data.getString("gender");
user_type = data.getString("user_type");
if(name != null && name.equals(word)){
TouTiaoAccount tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified,
tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified,
verify_content, follow_count,img_url,create_time, gender, user_type);
ZhiWeiTools.sleep(1000);
TouTiaoAccount ttaUpdate = getTouTiaoAccountInfoByUserId(user_id+"", proxy);
......
......@@ -57,7 +57,7 @@ public class TouTiaoArticleParse {
if(max_behot_time!=null){
url = url + "&max_behot_time="+max_behot_time;
}
System.out.println(url);
System.out.println("url=========="+url);
Map<String,String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", url);
String htmlBody = null;
......@@ -96,28 +96,42 @@ public class TouTiaoArticleParse {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONArray jsonArray = json.getJSONArray("data");
max_behot_time = json.getJSONObject("next").getString("max_behot_time");
String title = null;
String content = null;
String time = null;
Date date = null;
String readNum = null;
String commentNum = null;
String playNum = null;
String shareNum = null;
String source = null;
String user_id = null;
List<String> labelList = null;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
String href = "https://www.toutiao.com/";
if(data.containsKey("group_id")){
href = href+"a"+data.getLongValue("group_id");
String title = data.getString("title");
String content = data.getString("abstract");
String time = data.getLongValue("behot_time")*1000+"";
Date date = TimeParse.stringFormartDate(time);
String readNum = data.getString("total_read_count");
title = data.getString("title");
content = data.getString("abstract");
time = data.getLongValue("behot_time")*1000+"";
date = TimeParse.stringFormartDate(time);
readNum = data.getString("total_read_count");
if(readNum == null) {
readNum = data.getInteger("internal_visit_count")+"";
}
String commentNum = data.getString("comments_count");
String playNum = data.getString("detail_play_effective_count");
String shareNum = data.getString("share_count");
String source = data.getString("source");
String user_id = data.getLong("creator_uid")+"";
commentNum = data.getString("comments_count");
playNum = data.getString("detail_play_effective_count");
shareNum = data.getString("share_count");
source = data.getString("source");
user_id = data.getLong("creator_uid")+"";
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum, playNum, readNum, shareNum,"今日头条");
if(data.containsKey("label")){
labelList = data.getJSONArray("label").toJavaList(String.class);
tt.setLabelList(labelList);
}
dataList.add(tt);
System.out.println(tt.toString());
}
} catch (Exception e) {
logger.error("数据解析出现问题,{}", e.getMessage());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment