Commit d12dad92 by zhiwei

升级核心包版本

parent 76baa808
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>media_data_crawler</artifactId> <artifactId>media_data_crawler</artifactId>
<version>0.0.8-SNAPSHOT</version> <version>0.0.9-SNAPSHOT</version>
<name>media_data_crawler</name> <name>media_data_crawler</name>
<description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等</description> <description>网媒数据抓取,包含百度新闻、搜狗新闻、360新闻、知乎回答列表等</description>
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.2.6-RELEASE</version> <version>0.3.0-RELEASE</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BaiduNewsCrawlerParse { public class BaiduNewsCrawlerParse {
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.TiebaData; import com.zhiwei.media_data_crawler.entity.TiebaData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BaiduTiebaCrawlerParse { public class BaiduTiebaCrawlerParse {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot();
......
...@@ -15,8 +15,8 @@ import org.slf4j.Logger; ...@@ -15,8 +15,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
public class BaiduZhidaoCrawlerParse { public class BaiduZhidaoCrawlerParse {
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.DouBanData; import com.zhiwei.media_data_crawler.entity.DouBanData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DoubanCrawlerParse { public class DoubanCrawlerParse {
private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class); private static Logger logger = LogManager.getLogger(BaiduTiebaCrawlerParse.class);
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
...@@ -11,19 +24,8 @@ import com.zhiwei.tools.httpclient.HttpClientTemplateOK; ...@@ -11,19 +24,8 @@ import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SoCrawlerParse { public class SoCrawlerParse {
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.NewsData; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class SoNewsCrawlerParse { public class SoNewsCrawlerParse {
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import java.net.Proxy;
import com.zhiwei.crawler.core.RequestUtils; import java.util.ArrayList;
import com.zhiwei.crawler.proxy.ProxyHolder; import java.util.HashMap;
import com.zhiwei.media_data_crawler.data.DataCrawler; import java.util.List;
import com.zhiwei.media_data_crawler.entity.NewsData; import java.util.Map;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
...@@ -19,11 +14,17 @@ import org.jsoup.nodes.Document; ...@@ -19,11 +14,17 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import java.net.Proxy; import com.zhiwei.crawler.core.HttpBoot;
import java.util.ArrayList; import com.zhiwei.crawler.proxy.ProxyHolder;
import java.util.HashMap; import com.zhiwei.crawler.utils.RequestUtils;
import java.util.List; import com.zhiwei.media_data_crawler.data.DataCrawler;
import java.util.Map; import com.zhiwei.media_data_crawler.entity.NewsData;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class SougouNewsCrawlerParse { public class SougouNewsCrawlerParse {
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import com.zhiwei.crawler.core.HttpBoot; import java.net.Proxy;
import com.zhiwei.crawler.core.RequestUtils; import java.util.ArrayList;
import com.zhiwei.crawler.proxy.ProxyHolder; import java.util.Date;
import com.zhiwei.media_data_crawler.data.DataCrawler; import java.util.HashMap;
import com.zhiwei.media_data_crawler.entity.ZhiHuData; import java.util.List;
import com.zhiwei.tools.httpclient.HeaderTool; import java.util.Map;
import com.zhiwei.tools.timeparse.TimeParse; import java.util.regex.Matcher;
import com.zhiwei.tools.tools.URLCodeUtil; import java.util.regex.Pattern;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
...@@ -19,10 +17,17 @@ import org.jsoup.nodes.Document; ...@@ -19,10 +17,17 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import java.net.Proxy; import com.zhiwei.crawler.core.HttpBoot;
import java.util.*; import com.zhiwei.crawler.proxy.ProxyHolder;
import java.util.regex.Matcher; import com.zhiwei.crawler.utils.RequestUtils;
import java.util.regex.Pattern; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData;
import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
public class SougouZhihuCrawlerParse{ public class SougouZhihuCrawlerParse{
......
package com.zhiwei.media_data_crawler.crawler; package com.zhiwei.media_data_crawler.crawler;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.LunTanData; import com.zhiwei.media_data_crawler.entity.LunTanData;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import okhttp3.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.Proxy; import okhttp3.Response;
import java.util.*;
public class TianYaCrawlerParse { public class TianYaCrawlerParse {
......
...@@ -12,9 +12,9 @@ import org.apache.logging.log4j.Logger; ...@@ -12,9 +12,9 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyFactory; import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswerComment; import com.zhiwei.media_data_crawler.entity.ZhihuAnswerComment;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory; ...@@ -16,7 +16,7 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer; import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
......
...@@ -15,7 +15,7 @@ import org.apache.logging.log4j.Logger; ...@@ -15,7 +15,7 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.data.DataCrawler; import com.zhiwei.media_data_crawler.data.DataCrawler;
import com.zhiwei.media_data_crawler.entity.ZhiHuData; import com.zhiwei.media_data_crawler.entity.ZhiHuData;
import com.zhiwei.media_data_crawler.entity.ZhihuAuthor; import com.zhiwei.media_data_crawler.entity.ZhihuAuthor;
......
...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory; ...@@ -13,8 +13,8 @@ import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.media_data_crawler.entity.ZhihuAnswer; import com.zhiwei.media_data_crawler.entity.ZhihuAnswer;
import com.zhiwei.media_data_crawler.entity.ZhihuAuthor; import com.zhiwei.media_data_crawler.entity.ZhihuAuthor;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment