Commit 37e3924e by zhiwei

添加知乎账号采集

parent f4b4b293
......@@ -5,6 +5,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -15,6 +16,8 @@ import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.media_data_crawler.entity.ZhihuAuthor;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
......@@ -78,4 +81,53 @@ public class ZhihuUserAnswerCrawlerParse {
return dataList;
}
/**
*
* @Description 知乎获取用户
* @param name
* @param proxy
* @return
*/
public static List<ZhihuAuthor> getZhihuAuthorList(String name,ProxyHolder proxy) {
List<ZhihuAuthor> zhihuList = new ArrayList<>();
String url = "https://www.zhihu.com/api/v4/search_v3?t=people&q=" + URLCodeUtil.getURLEncode(name, "UTF-8") + "&correction=1&limit=50&lc_idx=40&show_all_topics=0&offset=";
int page = 0;
boolean f = true;
int n = 0;
while(f) {
try (Response response = httpBoot.syncCall(RequestUtils.wrapGet(url+page), proxy)){
String result = response.body().string();
JSONObject json = JSONObject.parseObject(result);
if(json.containsKey("data")) {
JSONArray jsonArray = json.getJSONArray("data");
if(Objects.nonNull(jsonArray)) {
for(int i = 0;i < jsonArray.size();i++) {
JSONObject data = jsonArray.getJSONObject(i);
ZhihuAuthor za = new ZhihuAuthor();
za.setName(data.getJSONObject("object").getString("name").replaceAll("<.*?>", ""));
za.setUrl( "https://www.zhihu.com/people/" + data.getJSONObject("object").getString("url_token"));
za.setAuthentication(data.getJSONObject("object").getString("headline"));
za.setGuanzhu(data.getJSONObject("object").getIntValue("follower_count"));
za.setLike(data.getJSONObject("object").getIntValue("voteup_count"));
za.setAnswerCount(data.getJSONObject("object").getInteger("answer_count"));
za.setArticleCount(data.getJSONObject("object").getInteger("articles_count"));
zhihuList.add(za);
}
}
}else {
n++;
f = n<4;
}
ZhiWeiTools.sleep(50);
page += 50;
} catch (Exception e) {
logger.error(" {} ",e);
n++;
f = n<4;
}
}
return zhihuList;
}
}
......@@ -18,6 +18,10 @@ public class ZhihuAuthor {
private int fensi; //粉丝数
private int answerCount; //回答数
private int articleCount; //文章数
private int thank; //感谢数
private int collection; //收藏数
......@@ -32,6 +36,22 @@ public class ZhihuAuthor {
private String business; //所在行业
public int getArticleCount() {
return articleCount;
}
public void setArticleCount(int articleCount) {
this.articleCount = articleCount;
}
public int getAnswerCount() {
return answerCount;
}
public void setAnswerCount(int answerCount) {
this.answerCount = answerCount;
}
public String getBusiness() {
return business;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment