Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
media_data_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
media_data_crawler
Commits
37e3924e
Commit
37e3924e
authored
Feb 20, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加知乎账号采集
parent
f4b4b293
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
72 additions
and
0 deletions
+72
-0
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
+52
-0
src/main/java/com/zhiwei/media_data_crawler/entity/ZhihuAuthor.java
+20
-0
No files found.
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
View file @
37e3924e
...
@@ -5,6 +5,7 @@ import java.util.Date;
...
@@ -5,6 +5,7 @@ import java.util.Date;
import
java.util.HashMap
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
...
@@ -15,6 +16,8 @@ import com.zhiwei.crawler.core.HttpBoot;
...
@@ -15,6 +16,8 @@ import com.zhiwei.crawler.core.HttpBoot;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAnswer
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAnswer
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAuthor
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
okhttp3.Response
;
...
@@ -78,4 +81,53 @@ public class ZhihuUserAnswerCrawlerParse {
...
@@ -78,4 +81,53 @@ public class ZhihuUserAnswerCrawlerParse {
return
dataList
;
return
dataList
;
}
}
/**
*
* @Description 知乎获取用户
* @param name
* @param proxy
* @return
*/
public
static
List
<
ZhihuAuthor
>
getZhihuAuthorList
(
String
name
,
ProxyHolder
proxy
)
{
List
<
ZhihuAuthor
>
zhihuList
=
new
ArrayList
<>();
String
url
=
"https://www.zhihu.com/api/v4/search_v3?t=people&q="
+
URLCodeUtil
.
getURLEncode
(
name
,
"UTF-8"
)
+
"&correction=1&limit=50&lc_idx=40&show_all_topics=0&offset="
;
int
page
=
0
;
boolean
f
=
true
;
int
n
=
0
;
while
(
f
)
{
try
(
Response
response
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
+
page
),
proxy
)){
String
result
=
response
.
body
().
string
();
JSONObject
json
=
JSONObject
.
parseObject
(
result
);
if
(
json
.
containsKey
(
"data"
))
{
JSONArray
jsonArray
=
json
.
getJSONArray
(
"data"
);
if
(
Objects
.
nonNull
(
jsonArray
))
{
for
(
int
i
=
0
;
i
<
jsonArray
.
size
();
i
++)
{
JSONObject
data
=
jsonArray
.
getJSONObject
(
i
);
ZhihuAuthor
za
=
new
ZhihuAuthor
();
za
.
setName
(
data
.
getJSONObject
(
"object"
).
getString
(
"name"
).
replaceAll
(
"<.*?>"
,
""
));
za
.
setUrl
(
"https://www.zhihu.com/people/"
+
data
.
getJSONObject
(
"object"
).
getString
(
"url_token"
));
za
.
setAuthentication
(
data
.
getJSONObject
(
"object"
).
getString
(
"headline"
));
za
.
setGuanzhu
(
data
.
getJSONObject
(
"object"
).
getIntValue
(
"follower_count"
));
za
.
setLike
(
data
.
getJSONObject
(
"object"
).
getIntValue
(
"voteup_count"
));
za
.
setAnswerCount
(
data
.
getJSONObject
(
"object"
).
getInteger
(
"answer_count"
));
za
.
setArticleCount
(
data
.
getJSONObject
(
"object"
).
getInteger
(
"articles_count"
));
zhihuList
.
add
(
za
);
}
}
}
else
{
n
++;
f
=
n
<
4
;
}
ZhiWeiTools
.
sleep
(
50
);
page
+=
50
;
}
catch
(
Exception
e
)
{
logger
.
error
(
" {} "
,
e
);
n
++;
f
=
n
<
4
;
}
}
return
zhihuList
;
}
}
}
src/main/java/com/zhiwei/media_data_crawler/entity/ZhihuAuthor.java
View file @
37e3924e
...
@@ -18,6 +18,10 @@ public class ZhihuAuthor {
...
@@ -18,6 +18,10 @@ public class ZhihuAuthor {
private
int
fensi
;
//粉丝数
private
int
fensi
;
//粉丝数
private
int
answerCount
;
//回答数
private
int
articleCount
;
//文章数
private
int
thank
;
//感谢数
private
int
thank
;
//感谢数
private
int
collection
;
//收藏数
private
int
collection
;
//收藏数
...
@@ -32,6 +36,22 @@ public class ZhihuAuthor {
...
@@ -32,6 +36,22 @@ public class ZhihuAuthor {
private
String
business
;
//所在行业
private
String
business
;
//所在行业
public
int
getArticleCount
()
{
return
articleCount
;
}
public
void
setArticleCount
(
int
articleCount
)
{
this
.
articleCount
=
articleCount
;
}
public
int
getAnswerCount
()
{
return
answerCount
;
}
public
void
setAnswerCount
(
int
answerCount
)
{
this
.
answerCount
=
answerCount
;
}
public
String
getBusiness
()
{
public
String
getBusiness
()
{
return
business
;
return
business
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment