Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
media_data_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
media_data_crawler
Commits
0537cf5a
Commit
0537cf5a
authored
Jun 15, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
111
parent
1b78ab01
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
148 additions
and
104 deletions
+148
-104
src/main/java/com/zhiwei/media_data_crawler/data/DataCrawler.java
+148
-104
No files found.
src/main/java/com/zhiwei/media_data_crawler/data/DataCrawler.java
View file @
0537cf5a
...
...
@@ -17,62 +17,80 @@ import com.zhiwei.media_data_crawler.entity.TiebaData;
import
com.zhiwei.media_data_crawler.entity.ZhiHuData
;
public
class
DataCrawler
{
public
static
Long
sleepTime
;
public
void
setSleepTime
(
Long
sleepTime
)
{
DataCrawler
.
sleepTime
=
sleepTime
;
}
/**
*
* @Title: getBaiduNewsData
* @author hero
* @Description: 根据关键词和时间,全文匹配百度新闻数据
* @param @param word
* @param @param startTime
* @param @param endTime
* @param @param proxy
* @param @return 设定文件
* @Title: getBaiduNewsData
* @author hero
* @Description: 根据关键词和时间,全文匹配百度新闻数据
* @param @param
* word
* @param @param
* startTime
* @param @param
* endTime
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getBaiduNewsData
(
String
word
,
String
startTime
,
String
endTime
,
Proxy
proxy
,
Long
sleepTime
){
public
static
List
<
NewsData
>
getBaiduNewsData
(
String
word
,
String
startTime
,
String
endTime
,
Proxy
proxy
)
{
try
{
return
BaiduNewsCrawlerParse
.
getBaiduNewsData
(
word
,
startTime
,
endTime
,
proxy
,
sleepTime
);
return
BaiduNewsCrawlerParse
.
getBaiduNewsData
(
word
,
startTime
,
endTime
,
proxy
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
*
* @Title: getBaiduNewsDataByTitle
* @author hero
* @Description: 根据关键词和时间,标题匹配百度新闻数据
* @param @param word
* @param @param startTime
* @param @param endTime
* @param @param proxy
* @param @return 设定文件
* @Title: getBaiduNewsDataByTitle
* @author hero
* @Description: 根据关键词和时间,标题匹配百度新闻数据
* @param @param
* word
* @param @param
* startTime
* @param @param
* endTime
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getBaiduNewsDataByTitle
(
String
word
,
String
startTime
,
String
endTime
,
Proxy
proxy
,
Long
sleepTime
)
{
public
static
List
<
NewsData
>
getBaiduNewsDataByTitle
(
String
word
,
String
startTime
,
String
endTime
,
Proxy
proxy
)
{
try
{
return
BaiduNewsCrawlerParse
.
getBaiduNewsDataByTitle
(
word
,
startTime
,
endTime
,
proxy
,
sleepTime
);
return
BaiduNewsCrawlerParse
.
getBaiduNewsDataByTitle
(
word
,
startTime
,
endTime
,
proxy
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
*
* @Title: getSoNewsData
* @author hero
* @Title: getSoNewsData
* @author hero
* @Description: 采集360新闻数据,按照全文匹配
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getSoNewsData
(
String
word
,
Proxy
proxy
){
public
static
List
<
NewsData
>
getSoNewsData
(
String
word
,
Proxy
proxy
)
{
try
{
return
SoNewsCrawlerParse
.
getSoNewsData
(
word
,
proxy
);
}
catch
(
Exception
e
)
{
...
...
@@ -80,18 +98,21 @@ public class DataCrawler {
return
null
;
}
}
/**
*
* @Title: getSoNewsDataByTitle
* @author hero
* @Title: getSoNewsDataByTitle
* @author hero
* @Description: 采集360新闻数据 ,按照标题匹配
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getSoNewsDataByTitle
(
String
word
,
Proxy
proxy
){
public
static
List
<
NewsData
>
getSoNewsDataByTitle
(
String
word
,
Proxy
proxy
)
{
try
{
return
SoNewsCrawlerParse
.
getSoNewsDataByTitle
(
word
,
proxy
);
}
catch
(
Exception
e
)
{
...
...
@@ -99,133 +120,156 @@ public class DataCrawler {
return
null
;
}
}
/**
*
* @Title: getSougouNewsData
* @author hero
* @Description: 搜狗新闻采集,全文匹配
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @Title: getSougouNewsData
* @author hero
* @Description: 搜狗新闻采集,全文匹配
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getSougouNewsData
(
String
word
,
Proxy
proxy
,
Long
sleepTime
)
{
public
static
List
<
NewsData
>
getSougouNewsData
(
String
word
,
Proxy
proxy
)
{
try
{
return
SougouNewsCrawlerParse
.
getSougouNewsData
(
word
,
proxy
,
sleepTime
);
return
SougouNewsCrawlerParse
.
getSougouNewsData
(
word
,
proxy
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
*
* @Title: getSougouNewsDataByTitle
* @author hero
* @Description: 搜狗新闻采集,标题匹配
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @Title: getSougouNewsDataByTitle
* @author hero
* @Description: 搜狗新闻采集,标题匹配
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<NewsData> 返回类型
*/
public
static
List
<
NewsData
>
getSougouNewsDataByTitle
(
String
word
,
Proxy
proxy
,
Long
sleepTime
)
{
public
static
List
<
NewsData
>
getSougouNewsDataByTitle
(
String
word
,
Proxy
proxy
)
{
try
{
return
SougouNewsCrawlerParse
.
getSougouNewsDataByTitle
(
word
,
proxy
,
sleepTime
);
return
SougouNewsCrawlerParse
.
getSougouNewsDataByTitle
(
word
,
proxy
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
* @Title: getSougouZhihuData
* @author hero
* @Title: getSougouZhihuData
* @author hero
* @Description: 根据关键词在搜狗知乎采集相应的知乎数据
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<ZhiHuData> 返回类型
*/
public
static
List
<
ZhiHuData
>
getSougouZhihuData
(
String
word
,
Proxy
proxy
,
Long
sleepTime
)
{
public
static
List
<
ZhiHuData
>
getSougouZhihuData
(
String
word
,
Proxy
proxy
)
{
try
{
return
SougouZhihuCrawlerParse
.
getSougouZhihuData
(
word
,
proxy
,
sleepTime
);
return
SougouZhihuCrawlerParse
.
getSougouZhihuData
(
word
,
proxy
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
* @Title: getBaiduTiebaData
* @author hero
* @Title: getBaiduTiebaData
* @author hero
* @Description: 根据关键词采集贴吧数据
* @param @param word
* @param @param proxy
* @param @return 设定文件
* @param @param
* word
* @param @param
* proxy
* @param @return
* 设定文件
* @return List<TiebaData> 返回类型
*/
public
static
List
<
TiebaData
>
getBaiduTiebaData
(
String
word
,
Proxy
proxy
,
Long
sleepTime
)
{
public
static
List
<
TiebaData
>
getBaiduTiebaData
(
String
word
,
Proxy
proxy
)
{
try
{
return
BaiduTiebaCrawlerParse
.
getBaiduTiebaData
(
word
,
proxy
,
null
,
sleepTime
);
return
BaiduTiebaCrawlerParse
.
getBaiduTiebaData
(
word
,
proxy
,
null
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
* @Title: getBaiduTiebaData
* @author hero
* @Description: 根据关键词采集指定贴吧内数据
* @param @param word
* @param @param proxy
* @param @param tiebaName
* @param @return 设定文件
* @Title: getBaiduTiebaData
* @author hero
* @Description: 根据关键词采集指定贴吧内数据
* @param @param
* word
* @param @param
* proxy
* @param @param
* tiebaName
* @param @return
* 设定文件
* @return List<TiebaData> 返回类型
*/
public
static
List
<
TiebaData
>
getBaiduTiebaData
(
String
word
,
Proxy
proxy
,
String
tiebaName
,
Long
sleepTime
)
{
public
static
List
<
TiebaData
>
getBaiduTiebaData
(
String
word
,
Proxy
proxy
,
String
tiebaName
)
{
try
{
return
BaiduTiebaCrawlerParse
.
getBaiduTiebaData
(
word
,
proxy
,
tiebaName
,
sleepTime
);
return
BaiduTiebaCrawlerParse
.
getBaiduTiebaData
(
word
,
proxy
,
tiebaName
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
* @Title: getLunTanData
* @author hero
* @Title: getLunTanData
* @author hero
* @Description: 根据关键词采集天涯论坛数据
* @param @param word
* @param @param proxy
* @param @param endTime
* @param @return 设定文件
* @param @param
* word
* @param @param
* proxy
* @param @param
* endTime
* @param @return
* 设定文件
* @return List<LunTanData> 返回类型
*/
public
static
List
<
LunTanData
>
getLunTanData
(
String
word
,
Proxy
proxy
,
String
endTime
,
Long
sleepTime
)
{
public
static
List
<
LunTanData
>
getLunTanData
(
String
word
,
Proxy
proxy
,
String
endTime
)
{
try
{
return
TianYaCrawlerParse
.
getLunTanData
(
word
,
proxy
,
endTime
,
sleepTime
);
return
TianYaCrawlerParse
.
getLunTanData
(
word
,
proxy
,
endTime
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
return
null
;
}
}
/**
* @Title: getDouBanData
* @author hero
* @Description: 根据关键词采集豆瓣数据
* @param @param word
* @param @param type type=topic,type=note
* @param @param proxy
* @param @param endTime
* @param @return 设定文件
* @Title: getDouBanData
* @author hero
* @Description: 根据关键词采集豆瓣数据
* @param @param
* word
* @param @param
* type type=topic,type=note
* @param @param
* proxy
* @param @param
* endTime
* @param @return
* 设定文件
* @return List<DouBanData> 返回类型
*/
public
static
List
<
DouBanData
>
getDouBanData
(
String
word
,
String
type
,
Proxy
proxy
){
public
static
List
<
DouBanData
>
getDouBanData
(
String
word
,
String
type
,
Proxy
proxy
)
{
try
{
return
DoubanCrawlerParse
.
getDoubanData
(
word
,
type
,
proxy
);
}
catch
(
Exception
e
)
{
...
...
@@ -233,5 +277,5 @@ public class DataCrawler {
return
null
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment