Commit f061889a by zhiwei

处理账号匹配规则

parent eedbd670
...@@ -44,13 +44,13 @@ public class TouTiaoAccountParse { ...@@ -44,13 +44,13 @@ public class TouTiaoAccountParse {
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap);
if(htmlBody != null && htmlBody.contains("media_id")){ if(htmlBody != null ){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
if(tta == null){ if(tta == null){
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media"; url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8")); headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap);
if(htmlBody != null && htmlBody.contains("media_id")){ if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
} }
} }
...@@ -58,7 +58,7 @@ public class TouTiaoAccountParse { ...@@ -58,7 +58,7 @@ public class TouTiaoAccountParse {
url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media"; url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword="+URLCodeUtil.getURLEncode(name, "utf-8")+"&autoload=true&count=20&cur_tab=4&from=media";
headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8")); headerMap.put("Referer","https://www.toutiao.com/search/?keyword="+URLCodeUtil.getURLEncode(name, "utf-8"));
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap);
if(htmlBody != null && htmlBody.contains("media_id")){ if(htmlBody != null){
tta = parseHtmlByAccount(htmlBody, name, proxy); tta = parseHtmlByAccount(htmlBody, name, proxy);
} }
} }
...@@ -79,7 +79,7 @@ public class TouTiaoAccountParse { ...@@ -79,7 +79,7 @@ public class TouTiaoAccountParse {
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap);
if(htmlBody != null && htmlBody.contains("mediaId")){ if(htmlBody != null){
tta = parseAccountByUserId(htmlBody, user_id); tta = parseAccountByUserId(htmlBody, user_id);
} }
...@@ -113,7 +113,7 @@ public class TouTiaoAccountParse { ...@@ -113,7 +113,7 @@ public class TouTiaoAccountParse {
try { try {
String htmlBody = null; String htmlBody = null;
htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap); htmlBody = HttpClientTemplateOK.get(url, proxy, headerMap);
if(htmlBody != null && htmlBody.contains("media_id")){ if(htmlBody != null){
JSONObject json = JSONObject.parseObject(htmlBody); JSONObject json = JSONObject.parseObject(htmlBody);
list.addAll(parseHtmlByWord(json, proxy)); list.addAll(parseHtmlByWord(json, proxy));
if(json.getIntValue("has_more")==0){ if(json.getIntValue("has_more")==0){
...@@ -192,32 +192,29 @@ public class TouTiaoAccountParse { ...@@ -192,32 +192,29 @@ public class TouTiaoAccountParse {
JSONArray jsonArray = json.getJSONArray("data"); JSONArray jsonArray = json.getJSONArray("data");
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try { try {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
if(data.containsKey("media_id")){ long user_id = data.getLong("id");
long user_id = data.getLong("id"); String name = data.getString("name");
String name = data.getString("name"); long media_id = data.getLong("media_id");
long media_id = data.getLong("media_id"); String description = data.getString("description");
String description = data.getString("description"); int user_verified = data.getInteger("user_verified");
int user_verified = data.getInteger("user_verified"); String verify_content = data.getString("verify_content");
String verify_content = data.getString("verify_content"); int follow_count = data.getInteger("follow_count");
int follow_count = data.getInteger("follow_count"); String img_url = "https:"+data.getString("avatar_url");
String img_url = "https:"+data.getString("avatar_url"); System.out.println(data.getString("create_time"));
System.out.println(data.getString("create_time")); Date create_time = new Date(Long.valueOf(data.getString("create_time"))*1000);
Date create_time = new Date(Long.valueOf(data.getString("create_time"))*1000); String gender = data.getString("gender");
String gender = data.getString("gender"); String user_type = data.getString("user_type");
String user_type = data.getString("user_type"); if(name.equals(word)){
if(name.equals(word)){ TouTiaoAccount tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified,
TouTiaoAccount tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified, verify_content, follow_count,img_url,create_time, gender, user_type);
verify_content, follow_count,img_url,create_time, gender, user_type); ZhiWeiTools.sleep(1000);
ZhiWeiTools.sleep(1000); TouTiaoAccount ttaUpdate = getTouTiaoAccountInfoByUserId(user_id+"", proxy);
TouTiaoAccount ttaUpdate = getTouTiaoAccountInfoByUserId(user_id+"", proxy); if(ttaUpdate != null){
if(ttaUpdate != null){ tta.setFriend_count(ttaUpdate.getFriend_count());
tta.setFriend_count(ttaUpdate.getFriend_count()); tta.setUser_type(ttaUpdate.getUser_type());
tta.setUser_type(ttaUpdate.getUser_type());
}
return tta;
} }
return tta;
} }
} catch (Exception e) { } catch (Exception e) {
logger.error("数据解析出现问题,{}", e); logger.error("数据解析出现问题,{}", e);
...@@ -299,29 +296,27 @@ public class TouTiaoAccountParse { ...@@ -299,29 +296,27 @@ public class TouTiaoAccountParse {
for (int i = 0; i < jsonArray.size(); i++) { for (int i = 0; i < jsonArray.size(); i++) {
try { try {
JSONObject data = jsonArray.getJSONObject(i); JSONObject data = jsonArray.getJSONObject(i);
if(data.containsKey("media_id")){ user_id = data.getLong("id");
user_id = data.getLong("id"); name = data.getString("name");
name = data.getString("name"); media_id = data.getLong("media_id");
media_id = data.getLong("media_id"); description = data.getString("description");
description = data.getString("description"); user_verified = data.getInteger("user_verified");
user_verified = data.getInteger("user_verified"); verify_content = data.getString("verify_content");
verify_content = data.getString("verify_content"); follow_count = data.getInteger("follow_count");
follow_count = data.getInteger("follow_count"); img_url = "https:"+data.getString("avatar_url");
img_url = "https:"+data.getString("avatar_url"); create_time = new Date(Integer.valueOf(data.getString("create_time")+"000"));
create_time = new Date(Integer.valueOf(data.getString("create_time")+"000")); gender = data.getString("gender");
gender = data.getString("gender"); user_type = data.getString("user_type");
user_type = data.getString("user_type");
tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified,
tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified, verify_content, follow_count,img_url,create_time, gender, user_type);
verify_content, follow_count,img_url,create_time, gender, user_type); ZhiWeiTools.sleep(1000);
ZhiWeiTools.sleep(1000); TouTiaoAccount ttaUpdate = getTouTiaoAccountInfoByUserId(user_id+"", proxy);
TouTiaoAccount ttaUpdate = getTouTiaoAccountInfoByUserId(user_id+"", proxy); if(ttaUpdate != null){
if(ttaUpdate != null){ tta.setFriend_count(ttaUpdate.getFriend_count());
tta.setFriend_count(ttaUpdate.getFriend_count()); tta.setUser_type(ttaUpdate.getUser_type());
tta.setUser_type(ttaUpdate.getUser_type());
}
ttaList.add(tta);
} }
ttaList.add(tta);
} catch (Exception e) { } catch (Exception e) {
logger.error("数据解析出现问题,{}", e); logger.error("数据解析出现问题,{}", e);
continue; continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment