Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
media_data_crawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
media_data_crawler
Commits
d12dad92
Commit
d12dad92
authored
Mar 19, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
升级核心包版本
parent
76baa808
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
137 additions
and
115 deletions
+137
-115
pom.xml
+2
-2
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduNewsCrawlerParse.java
+17
-15
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
+17
-15
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduZhidaoCrawlerParse.java
+1
-1
src/main/java/com/zhiwei/media_data_crawler/crawler/DoubanCrawlerParse.java
+15
-13
src/main/java/com/zhiwei/media_data_crawler/crawler/SoCrawlerParse.java
+15
-13
src/main/java/com/zhiwei/media_data_crawler/crawler/SoNewsCrawlerParse.java
+15
-13
src/main/java/com/zhiwei/media_data_crawler/crawler/SougouNewsCrawlerParse.java
+16
-15
src/main/java/com/zhiwei/media_data_crawler/crawler/SougouZhihuCrawlerParse.java
+19
-14
src/main/java/com/zhiwei/media_data_crawler/crawler/TianYaCrawlerParse.java
+16
-10
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuAnswerCommentParse.java
+1
-1
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuAnwserCrawlerParse.java
+1
-1
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuCrawlerParse.java
+1
-1
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
+1
-1
No files found.
pom.xml
View file @
d12dad92
...
...
@@ -2,7 +2,7 @@
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<artifactId>
media_data_crawler
</artifactId>
<version>
0.0.
8
-SNAPSHOT
</version>
<version>
0.0.
9
-SNAPSHOT
</version>
<name>
media_data_crawler
</name>
<description>
网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等
</description>
...
...
@@ -16,7 +16,7 @@
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<version>
0.
2.6
-RELEASE
</version>
<version>
0.
3.0
-RELEASE
</version>
<scope>
provided
</scope>
</dependency>
</dependencies>
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduNewsCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.NewsData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
okhttp3.Response
;
public
class
BaiduNewsCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduTiebaCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.TiebaData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
okhttp3.Response
;
public
class
BaiduTiebaCrawlerParse
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/BaiduZhidaoCrawlerParse.java
View file @
d12dad92
...
...
@@ -15,8 +15,8 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
public
class
BaiduZhidaoCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/DoubanCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.DouBanData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
okhttp3.Response
;
public
class
DoubanCrawlerParse
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
BaiduTiebaCrawlerParse
.
class
);
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/SoCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.NewsData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
...
...
@@ -11,19 +24,8 @@ import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
okhttp3.Response
;
public
class
SoCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/SoNewsCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.NewsData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
okhttp3.Response
;
public
class
SoNewsCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/SougouNewsCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.NewsData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
...
...
@@ -19,11 +14,17 @@ import org.jsoup.nodes.Document;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.NewsData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
SougouNewsCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/SougouZhihuCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.ZhiHuData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
...
...
@@ -19,10 +17,17 @@ import org.jsoup.nodes.Document;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.*
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.ZhiHuData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
public
class
SougouZhihuCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/TianYaCrawlerParse.java
View file @
d12dad92
package
com
.
zhiwei
.
media_data_crawler
.
crawler
;
import
java.net.Proxy
;
import
java.util.ArrayList
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.LunTanData
;
import
com.zhiwei.tools.httpclient.HeaderTool
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
import
okhttp3.Response
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
java.net.Proxy
;
import
java.util.*
;
import
okhttp3.Response
;
public
class
TianYaCrawlerParse
{
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuAnswerCommentParse.java
View file @
d12dad92
...
...
@@ -12,9 +12,9 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyFactory
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAnswerComment
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuAnwserCrawlerParse.java
View file @
d12dad92
...
...
@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAnswer
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuCrawlerParse.java
View file @
d12dad92
...
...
@@ -15,7 +15,7 @@ import org.apache.logging.log4j.Logger;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.
core
.RequestUtils
;
import
com.zhiwei.crawler.
utils
.RequestUtils
;
import
com.zhiwei.media_data_crawler.data.DataCrawler
;
import
com.zhiwei.media_data_crawler.entity.ZhiHuData
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAuthor
;
...
...
src/main/java/com/zhiwei/media_data_crawler/crawler/ZhihuUserAnswerCrawlerParse.java
View file @
d12dad92
...
...
@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.RequestUtils
;
import
com.zhiwei.crawler.proxy.ProxyHolder
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAnswer
;
import
com.zhiwei.media_data_crawler.entity.ZhihuAuthor
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment