Commit cb6a0b84 by [zhangzhiwei]

添加文章类型

parent efe57d38
...@@ -35,7 +35,14 @@ public class TouTiaoArticle implements Serializable{ ...@@ -35,7 +35,14 @@ public class TouTiaoArticle implements Serializable{
private String readNum; private String readNum;
private String shareNum; private String shareNum;
private List<String> labelList; private List<String> labelList;
private String articleType;
public String getArticleType() {
return articleType;
}
public void setArticleType(String articleType) {
this.articleType = articleType;
}
public String getCommentCount() { public String getCommentCount() {
return commentCount; return commentCount;
} }
...@@ -113,7 +120,8 @@ public class TouTiaoArticle implements Serializable{ ...@@ -113,7 +120,8 @@ public class TouTiaoArticle implements Serializable{
public TouTiaoArticle(){} public TouTiaoArticle(){}
public TouTiaoArticle(String url,String title,String user_id, public TouTiaoArticle(String url,String title,String user_id,
String source,Date time,String content,String commentCount, String source,Date time,String content,String commentCount,
String playCount,String readNum, String shareNum,String type) String playCount,String readNum, String shareNum,String type
,String articleType)
{ {
this.url = url ; this.url = url ;
this.title = title; this.title = title;
...@@ -126,6 +134,7 @@ public class TouTiaoArticle implements Serializable{ ...@@ -126,6 +134,7 @@ public class TouTiaoArticle implements Serializable{
this.playCount = playCount; this.playCount = playCount;
this.shareNum = shareNum; this.shareNum = shareNum;
this.commentCount = commentCount; this.commentCount = commentCount;
this.articleType = articleType;
} }
public String toString() public String toString()
...@@ -143,6 +152,7 @@ public class TouTiaoArticle implements Serializable{ ...@@ -143,6 +152,7 @@ public class TouTiaoArticle implements Serializable{
+ ", readNum = " + readNum + ", readNum = " + readNum
+ ", shareNum = " + shareNum + ", shareNum = " + shareNum
+ ", labelList = " + labelList + ", labelList = " + labelList
+ ", articleType = " + articleType
+ "]"; + "]";
} }
......
...@@ -39,7 +39,9 @@ import com.zhiwei.toutiao.util.Tools; ...@@ -39,7 +39,9 @@ import com.zhiwei.toutiao.util.Tools;
* @date 2016年9月2日 上午11:17:44 * @date 2016年9月2日 上午11:17:44
*/ */
public class TouTiaoArticleParse { public class TouTiaoArticleParse {
private TouTiaoArticleParse() {} private TouTiaoArticleParse() {
}
private static Logger logger = LogManager.getLogger(TouTiaoArticleParse.class); private static Logger logger = LogManager.getLogger(TouTiaoArticleParse.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
...@@ -54,23 +56,25 @@ public class TouTiaoArticleParse { ...@@ -54,23 +56,25 @@ public class TouTiaoArticleParse {
* @throws Exception * @throws Exception
*/ */
@Deprecated @Deprecated
public static Map<String, Object> getTouTiaoList(String media_id, String max_behot_time,Date endData, Proxy proxy ) throws Exception{ public static Map<String, Object> getTouTiaoList(String media_id, String max_behot_time, Date endData, Proxy proxy)
throws Exception {
Signature signature = new Signature(); Signature signature = new Signature();
String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id="+media_id+"&count=20&as="+signature.getAs()+"&cp="+signature.getCp(); String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id=" + media_id + "&count=20&as="
if(max_behot_time!=null){ + signature.getAs() + "&cp=" + signature.getCp();
url = url + "&max_behot_time="+max_behot_time; if (max_behot_time != null) {
url = url + "&max_behot_time=" + max_behot_time;
} }
Map<String,String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", url); headerMap.put("Referer", url);
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){ if (htmlBody != null && htmlBody.contains("behot_time")) {
Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData); Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData);
if(ttList!=null && ttList.size()>0){ if (ttList != null && ttList.size() > 0) {
return ttList; return ttList;
} }
}else{ } else {
logger.info("数据为null"); logger.info("数据为null");
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -81,25 +85,27 @@ public class TouTiaoArticleParse { ...@@ -81,25 +85,27 @@ public class TouTiaoArticleParse {
} }
@Deprecated @Deprecated
public static Map<String, Object> getTouTiaoList(String media_id, Long max_behot_time,Date endData, ProxyHolder proxy ) throws Exception{ public static Map<String, Object> getTouTiaoList(String media_id, Long max_behot_time, Date endData,
ProxyHolder proxy) throws Exception {
Signature signature = new Signature(); Signature signature = new Signature();
String as=signature.getAs(); String as = signature.getAs();
String cp=signature.getCp(); String cp = signature.getCp();
String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id="+media_id+"&count=20&as="+as+"&cp="+cp; String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id=" + media_id + "&count=20&as=" + as + "&cp="
if(max_behot_time!=null){ + cp;
url = url + "&max_behot_time="+max_behot_time; if (max_behot_time != null) {
url = url + "&max_behot_time=" + max_behot_time;
} }
Map<String,String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", url); headerMap.put("Referer", url);
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){ if (htmlBody != null && htmlBody.contains("behot_time")) {
Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData); Map<String, Object> ttList = parseHtmlByAccount(htmlBody, endData);
if(ttList!=null && ttList.size()>0){ if (ttList != null && ttList.size() > 0) {
return ttList; return ttList;
} }
}else{ } else {
logger.info("数据为null"); logger.info("数据为null");
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -111,6 +117,7 @@ public class TouTiaoArticleParse { ...@@ -111,6 +117,7 @@ public class TouTiaoArticleParse {
/** /**
* 获取今日头条历史文章接口新 * 获取今日头条历史文章接口新
*
* @param user_id * @param user_id
* @param max_behot_time * @param max_behot_time
* @param endData * @param endData
...@@ -118,26 +125,29 @@ public class TouTiaoArticleParse { ...@@ -118,26 +125,29 @@ public class TouTiaoArticleParse {
* @return * @return
* @throws Exception * @throws Exception
*/ */
public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time,Date endData, Proxy proxy ) throws Exception{ public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time, Date endData,
for(int i=0;i<3;i++){ Proxy proxy) throws Exception {
for (int i = 0; i < 3; i++) {
Signature signature = new Signature(user_id, max_behot_time); Signature signature = new Signature(user_id, max_behot_time);
String as=signature.getAs(); String as = signature.getAs();
String cp=signature.getCp(); String cp = signature.getCp();
String _signature = signature.getSignature(); String _signature = signature.getSignature();
String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id="+user_id+"&max_behot_time="+max_behot_time+"&count=20&as="+as+"&cp="+cp+"&_signature="+_signature; String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id=" + user_id + "&max_behot_time="
+ max_behot_time + "&count=20&as=" + as + "&cp=" + cp + "&_signature=" + _signature;
System.out.println(url); System.out.println(url);
Map<String,String> headerMap = new HashMap<String,String>(); Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"); headerMap.put("user-agent",
headerMap.put("referer","https://www.toutiao.com/c/user/"+user_id+"/"); "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
headerMap.put("referer", "https://www.toutiao.com/c/user/" + user_id + "/");
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){ if (htmlBody != null && htmlBody.contains("behot_time")) {
Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData); Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData);
if(ttList!=null && ttList.size()>0){ if (ttList != null && ttList.size() > 0) {
return ttList; return ttList;
} }
}else{ } else {
logger.info("数据为null"); logger.info("数据为null");
continue; continue;
} }
...@@ -149,28 +159,31 @@ public class TouTiaoArticleParse { ...@@ -149,28 +159,31 @@ public class TouTiaoArticleParse {
return Collections.emptyMap(); return Collections.emptyMap();
} }
public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time,Date endData, ProxyHolder proxy ) throws Exception{ public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time, Date endData,
for(int i=0;i<3;i++){ ProxyHolder proxy) throws Exception {
for (int i = 0; i < 3; i++) {
Signature signature = new Signature(user_id, max_behot_time); Signature signature = new Signature(user_id, max_behot_time);
String as=signature.getAs(); String as = signature.getAs();
String cp=signature.getCp(); String cp = signature.getCp();
String _signature = signature.getSignature(); String _signature = signature.getSignature();
String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id="+user_id+"&max_behot_time="+max_behot_time+"&count=20&as="+as+"&cp="+cp+"&_signature="+_signature; String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id=" + user_id + "&max_behot_time="
+ max_behot_time + "&count=20&as=" + as + "&cp=" + cp + "&_signature=" + _signature;
logger.info("当前采集的历史文章链接:::{}", url); logger.info("当前采集的历史文章链接:::{}", url);
Map<String,String> headerMap = new HashMap<String,String>(); Map<String, String> headerMap = new HashMap<String, String>();
headerMap.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36"); headerMap.put("user-agent",
headerMap.put("referer","https://www.toutiao.com/c/user/"+user_id+"/"); "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
headerMap.put("referer", "https://www.toutiao.com/c/user/" + user_id + "/");
String htmlBody = null; String htmlBody = null;
try { try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string(); htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){ if (htmlBody != null && htmlBody.contains("behot_time")) {
Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData); Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData);
if(ttList!=null && ttList.size()>0){ if (ttList != null && ttList.size() > 0) {
return ttList; return ttList;
}else{ } else {
break; break;
} }
}else{ } else {
logger.info("数据为null,获取到的文本为:::{}", htmlBody); logger.info("数据为null,获取到的文本为:::{}", htmlBody);
continue; continue;
} }
...@@ -182,7 +195,6 @@ public class TouTiaoArticleParse { ...@@ -182,7 +195,6 @@ public class TouTiaoArticleParse {
return Collections.emptyMap(); return Collections.emptyMap();
} }
/*** /***
* 根据帐号解析历史文章地址 * 根据帐号解析历史文章地址
* *
...@@ -210,25 +222,28 @@ public class TouTiaoArticleParse { ...@@ -210,25 +222,28 @@ public class TouTiaoArticleParse {
String shareNum = null; String shareNum = null;
String source = null; String source = null;
String user_id = null; String user_id = null;
String articleType = null;
List<String> labelList = null; List<String> labelList = null;
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try { try {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
String href = "https://www.toutiao.com/"; String href = "https://www.toutiao.com/";
if(data.containsKey("group_id")){ if (data.containsKey("group_id")) {
href = href+"a"+data.getLongValue("group_id"); href = href + "a" + data.getLongValue("group_id");
title = data.getString("title"); title = data.getString("title");
content = data.getString("abstract"); content = data.getString("abstract");
time = data.getLongValue("behot_time")*1000+""; time = data.getLongValue("behot_time") * 1000 + "";
date = TimeParse.stringFormartDate(time); date = TimeParse.stringFormartDate(time);
readNum = data.getString("go_detail_count"); readNum = data.getString("go_detail_count");
commentNum = data.getString("comments_count"); commentNum = data.getString("comments_count");
playNum = data.getString("detail_play_effective_count"); playNum = data.getString("detail_play_effective_count");
shareNum = data.getString("share_count"); shareNum = data.getString("share_count");
source = data.getString("source"); source = data.getString("source");
user_id = data.getLong("creator_uid")+""; user_id = data.getLong("creator_uid") + "";
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum, playNum, readNum, shareNum,"今日头条"); articleType = data.getString("chinese_tag");
if(data.containsKey("label")){ TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum,
playNum, readNum, shareNum, "今日头条", articleType);
if (data.containsKey("label")) {
labelList = data.getJSONArray("label").toJavaList(String.class); labelList = data.getJSONArray("label").toJavaList(String.class);
tt.setLabelList(labelList); tt.setLabelList(labelList);
} }
...@@ -244,10 +259,10 @@ public class TouTiaoArticleParse { ...@@ -244,10 +259,10 @@ public class TouTiaoArticleParse {
return null; return null;
} }
if(endDate!=null){ if (endDate != null) {
if(max_behot_time!=null && !"0".equals(max_behot_time)){ if (max_behot_time != null && !"0".equals(max_behot_time)) {
Date nextDate = new Date(Long.valueOf(max_behot_time+"000")); Date nextDate = new Date(Long.valueOf(max_behot_time + "000"));
if(endDate.after(nextDate)){ if (endDate.after(nextDate)) {
max_behot_time = null; max_behot_time = null;
} }
} }
...@@ -257,8 +272,6 @@ public class TouTiaoArticleParse { ...@@ -257,8 +272,6 @@ public class TouTiaoArticleParse {
return map; return map;
} }
private static Map<String, Object> parseHtmlByAccount(String user_id, String htmlBody, Date endDate) { private static Map<String, Object> parseHtmlByAccount(String user_id, String htmlBody, Date endDate) {
Map<String, Object> map = new HashMap<String, Object>(); Map<String, Object> map = new HashMap<String, Object>();
Long max_behot_time = null; Long max_behot_time = null;
...@@ -276,28 +289,30 @@ public class TouTiaoArticleParse { ...@@ -276,28 +289,30 @@ public class TouTiaoArticleParse {
String playNum = null; String playNum = null;
String shareNum = null; String shareNum = null;
String source = null; String source = null;
String articleType = null;
List<String> labelList = null; List<String> labelList = null;
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try { try {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
String href = "https://www.toutiao.com/"; String href = "https://www.toutiao.com/";
if(data.containsKey("group_id")){ if (data.containsKey("group_id")) {
href = href+"a"+data.getLongValue("group_id"); href = href + "a" + data.getLongValue("group_id");
title = data.getString("title"); title = data.getString("title");
content = data.getString("abstract"); content = data.getString("abstract");
time = data.getLongValue("behot_time")*1000+""; time = data.getLongValue("behot_time") * 1000 + "";
date = TimeParse.stringFormartDate(time); date = TimeParse.stringFormartDate(time);
readNum = data.getString("go_detail_count"); readNum = data.getString("go_detail_count");
commentNum = data.getString("comments_count"); commentNum = data.getString("comments_count");
playNum = data.getString("detail_play_effective_count"); playNum = data.getString("detail_play_effective_count");
shareNum = data.getString("share_count"); shareNum = data.getString("share_count");
source = data.getString("source"); source = data.getString("source");
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum, playNum, readNum, shareNum,"今日头条"); articleType = data.getString("chinese_tag");
if(data.containsKey("label")){ TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum,
playNum, readNum, shareNum, "今日头条", articleType);
if (data.containsKey("label")) {
labelList = data.getJSONArray("label").toJavaList(String.class); labelList = data.getJSONArray("label").toJavaList(String.class);
tt.setLabelList(labelList); tt.setLabelList(labelList);
} }
System.out.println(tt.toString());
dataList.add(tt); dataList.add(tt);
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -309,10 +324,10 @@ public class TouTiaoArticleParse { ...@@ -309,10 +324,10 @@ public class TouTiaoArticleParse {
logger.error("数据解析出现问题,{}", e.getMessage()); logger.error("数据解析出现问题,{}", e.getMessage());
return null; return null;
} }
if(endDate!=null){ if (endDate != null) {
if(max_behot_time!=null && !"0".equals(max_behot_time)){ if (max_behot_time != null && !"0".equals(max_behot_time)) {
Date nextDate = new Date(Long.valueOf(max_behot_time+"000")); Date nextDate = new Date(Long.valueOf(max_behot_time + "000"));
if(endDate.after(nextDate)){ if (endDate.after(nextDate)) {
max_behot_time = null; max_behot_time = null;
} }
} }
...@@ -322,35 +337,38 @@ public class TouTiaoArticleParse { ...@@ -322,35 +337,38 @@ public class TouTiaoArticleParse {
return map; return map;
} }
/** /**
* @Title: getMicroTouTiaoCrawler * @Title: getMicroTouTiaoCrawler
* @author hero * @author hero
* @Description: 根据用户user_id查询用户微头条数据 * @Description: 根据用户user_id查询用户微头条数据
* @param @param user_id * @param @param
* @param @param endDate * user_id
* @param @param proxy * @param @param
* endDate
* @param @param
* proxy
* @param @return * @param @return
* @param @throws IOException 设定文件 * @param @throws
* IOException 设定文件
* @return List<Map<String,Object>> 返回类型 * @return List<Map<String,Object>> 返回类型
*/ */
public static Map<String, Object> getMicroTouTiaoCrawler(String user_id, Date endDate, Proxy proxy, String max_behot_time) throws IOException { public static Map<String, Object> getMicroTouTiaoCrawler(String user_id, Date endDate, Proxy proxy,
String url = "https://www.toutiao.com/c/ugc/content/list/" + user_id+"/"; String max_behot_time) throws IOException {
if(max_behot_time!=null){ String url = "https://www.toutiao.com/c/ugc/content/list/" + user_id + "/";
if (max_behot_time != null) {
url = url + "?max_time=" + max_behot_time; url = url + "?max_time=" + max_behot_time;
} }
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
System.out.println(url); System.out.println(url);
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null) { if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate); Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){ if (dataMap != null && dataMap.size() > 0) {
return dataMap; return dataMap;
} }
}else{ } else {
logger.info("数据为null"); logger.info("数据为null");
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -360,24 +378,23 @@ public class TouTiaoArticleParse { ...@@ -360,24 +378,23 @@ public class TouTiaoArticleParse {
return null; return null;
} }
public static Map<String, Object> getMicroTouTiaoCrawler(String user_id, Date endDate, ProxyHolder proxy,
Long max_behot_time) throws IOException {
public static Map<String, Object> getMicroTouTiaoCrawler(String user_id, Date endDate, ProxyHolder proxy, Long max_behot_time) throws IOException { String url = "https://www.toutiao.com/c/ugc/content/list/" + user_id + "/";
String url = "https://www.toutiao.com/c/ugc/content/list/" + user_id+"/"; if (max_behot_time != null) {
if(max_behot_time!=null){
url = url + "?max_time=" + max_behot_time; url = url + "?max_time=" + max_behot_time;
} }
Map<String, String> headerMap = Tools.getTouTiaoHeader(); Map<String, String> headerMap = Tools.getTouTiaoHeader();
headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/"); headerMap.put("Referer", "https://www.toutiao.com/c/user/" + user_id + "/");
System.out.println(url); System.out.println(url);
try { try {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if (htmlBody != null) { if (htmlBody != null) {
Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate); Map<String, Object> dataMap = parseHtmlByMicroAccount(htmlBody, endDate);
if(dataMap!=null && dataMap.size()>0){ if (dataMap != null && dataMap.size() > 0) {
return dataMap; return dataMap;
} }
}else{ } else {
logger.info("数据为null"); logger.info("数据为null");
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -391,9 +408,12 @@ public class TouTiaoArticleParse { ...@@ -391,9 +408,12 @@ public class TouTiaoArticleParse {
* @Title: parseHtmlByMicroAccount * @Title: parseHtmlByMicroAccount
* @author hero * @author hero
* @Description: 解析微头条数据 * @Description: 解析微头条数据
* @param @param htmlBody * @param @param
* @param @param endDate * htmlBody
* @param @return 设定文件 * @param @param
* endDate
* @param @return
* 设定文件
* @return Map<String,Object> 返回类型 * @return Map<String,Object> 返回类型
*/ */
private static Map<String, Object> parseHtmlByMicroAccount(String htmlBody, Date endDate) { private static Map<String, Object> parseHtmlByMicroAccount(String htmlBody, Date endDate) {
...@@ -413,38 +433,40 @@ public class TouTiaoArticleParse { ...@@ -413,38 +433,40 @@ public class TouTiaoArticleParse {
String commentNum = null; String commentNum = null;
String playNum = null; String playNum = null;
String user_id = null; String user_id = null;
String articleType = null;
int count = 16; int count = 16;
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try { try {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
max_behot_time = data.getLongValue("create_time"); max_behot_time = data.getLongValue("create_time");
date = new Date(max_behot_time*1000); date = new Date(max_behot_time * 1000);
href = "https://www.toutiao.com/a" + data.getString("thread_id"); href = "https://www.toutiao.com/a" + data.getString("thread_id");
source = data.getJSONObject("ugc_user").getString("name"); source = data.getJSONObject("ugc_user").getString("name");
content = data.getString("content"); content = data.getString("content");
readNum = data.getInteger("read_count")+""; readNum = data.getInteger("read_count") + "";
commentNum = data.getInteger("comment_count")+""; commentNum = data.getInteger("comment_count") + "";
user_id = data.getJSONObject("ugc_user").getString("user_id"); user_id = data.getJSONObject("ugc_user").getString("user_id");
if(content!=null && !"".equals(content)){ if (content != null && !"".equals(content)) {
if(content.length()<16){ if (content.length() < 16) {
count = content.length(); count = content.length();
} }
title = content.substring(0, count); title = content.substring(0, count);
} }
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source,date, content, commentNum, playNum, readNum, "0","微头条"); TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum,
playNum, readNum, "0", "微头条", articleType);
dataList.add(tt); dataList.add(tt);
} catch (Exception e) { } catch (Exception e) {
continue; continue;
} }
} }
/**验证是否有下一页数据**/ /** 验证是否有下一页数据 **/
if(more){ if (more) {
if(max_behot_time!=null && max_behot_time!=0){ if (max_behot_time != null && max_behot_time != 0) {
if(endDate.after(date)){ if (endDate.after(date)) {
max_behot_time = null; max_behot_time = null;
} }
} }
}else{ } else {
max_behot_time = null; max_behot_time = null;
} }
} catch (Exception e) { } catch (Exception e) {
...@@ -457,7 +479,4 @@ public class TouTiaoArticleParse { ...@@ -457,7 +479,4 @@ public class TouTiaoArticleParse {
return map; return map;
} }
} }
...@@ -98,7 +98,7 @@ public class TouTiaoChannelParse { ...@@ -98,7 +98,7 @@ public class TouTiaoChannelParse {
} }
url = getUrl(url); url = getUrl(url);
date = TimeParse.stringFormartDate(time); date = TimeParse.stringFormartDate(time);
TouTiaoArticle tt = new TouTiaoArticle(url, title, null,source, date, content, comment_count, "-1", "-1", "-1","今日头条"); TouTiaoArticle tt = new TouTiaoArticle(url, title, null,source, date, content, comment_count, "-1", "-1", "-1","今日头条",null);
ttList.add(tt); ttList.add(tt);
} catch (JSONException e) { } catch (JSONException e) {
continue; continue;
......
...@@ -152,7 +152,7 @@ public class TouTiaoParse { ...@@ -152,7 +152,7 @@ public class TouTiaoParse {
String shareNum = data.getString("share_count"); String shareNum = data.getString("share_count");
if (endData.before(date)) { if (endData.before(date)) {
TouTiaoArticle tt = new TouTiaoArticle(href, title, null,source, date, content, commentNum, playNum, readNum, shareNum,"今日头条"); TouTiaoArticle tt = new TouTiaoArticle(href, title, null,source, date, content, commentNum, playNum, readNum, shareNum,"今日头条",null);
dataList.add(tt); dataList.add(tt);
}else }else
{ {
......
...@@ -90,7 +90,7 @@ public class TouTiaoSearchParse { ...@@ -90,7 +90,7 @@ public class TouTiaoSearchParse {
String user_id = jso.getString("user_id"); String user_id = jso.getString("user_id");
Date date = TimeParse.stringFormartDate(time); Date date = TimeParse.stringFormartDate(time);
TouTiaoArticle tt = new TouTiaoArticle(url, title, user_id,source, date, content, comment_count, "-1", "-1", "-1","今日头条"); TouTiaoArticle tt = new TouTiaoArticle(url, title, user_id,source, date, content, comment_count, "-1", "-1", "-1","今日头条",null);
ttList.add(tt); ttList.add(tt);
} catch (JSONException e) { } catch (JSONException e) {
logger.debug("解析数据出现问题", e.fillInStackTrace()); logger.debug("解析数据出现问题", e.fillInStackTrace());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment