Commit 3d198f56 by chenweiyang

爬虫核心包版本升级 版本升级0.4.4

parent 862fa762
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>toutiao</artifactId> <artifactId>toutiao</artifactId>
<version>0.4.3-SNAPSHOT</version> <version>0.4.4-SNAPSHOT</version>
<dependencies> <dependencies>
<dependency> <dependency>
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.5.6.2-RELEASE</version> <version>0.6.6.3-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
...@@ -23,11 +23,6 @@ ...@@ -23,11 +23,6 @@
<artifactId>excelpoi</artifactId> <artifactId>excelpoi</artifactId>
<version>0.0.5-SNAPSHOT</version> <version>0.0.5-SNAPSHOT</version>
</dependency> </dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.8.1</version>
</dependency>
</dependencies> </dependencies>
<!-- 打包管理 --> <!-- 打包管理 -->
......
...@@ -16,8 +16,8 @@ import org.apache.logging.log4j.util.Strings; ...@@ -16,8 +16,8 @@ import org.apache.logging.log4j.util.Strings;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.Signature; import com.zhiwei.toutiao.bean.Signature;
......
...@@ -34,8 +34,8 @@ import org.jsoup.Jsoup; ...@@ -34,8 +34,8 @@ import org.jsoup.Jsoup;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.Signature; import com.zhiwei.toutiao.bean.Signature;
import com.zhiwei.toutiao.bean.TouTiaoArticle; import com.zhiwei.toutiao.bean.TouTiaoArticle;
......
...@@ -15,8 +15,8 @@ import org.apache.logging.log4j.Logger; ...@@ -15,8 +15,8 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.TouTiaoComment; import com.zhiwei.toutiao.bean.TouTiaoComment;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
......
...@@ -14,8 +14,8 @@ import org.jsoup.nodes.Document; ...@@ -14,8 +14,8 @@ import org.jsoup.nodes.Document;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.TouTiaoQuestionAnswer; import com.zhiwei.toutiao.bean.TouTiaoQuestionAnswer;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
......
...@@ -13,8 +13,8 @@ import org.apache.logging.log4j.Logger; ...@@ -13,8 +13,8 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.toutiao.bean.TouTiaoQuestion; import com.zhiwei.toutiao.bean.TouTiaoQuestion;
......
...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray; ...@@ -14,8 +14,8 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException; import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.proxy.ProxyHolder; import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.httpclient.HeaderTool; import com.zhiwei.tools.httpclient.HeaderTool;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.TouTiaoArticle; import com.zhiwei.toutiao.bean.TouTiaoArticle;
......
...@@ -12,7 +12,7 @@ import org.apache.logging.log4j.Logger; ...@@ -12,7 +12,7 @@ import org.apache.logging.log4j.Logger;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.core.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.ZhiWeiTools; import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.util.Tools; import com.zhiwei.toutiao.util.Tools;
......
package com.zhiwei.toutiao.test; //package com.zhiwei.toutiao.test;
//
import java.util.List; //import java.util.List;
//
import com.zhiwei.common.config.GroupType; //import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.crawler.proxy.ProxyHolder; //import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.toutiao.bean.TouTiaoAccount; //import com.zhiwei.toutiao.bean.TouTiaoAccount;
import com.zhiwei.toutiao.parse.TouTiaoAccountParse; //import com.zhiwei.toutiao.parse.TouTiaoAccountParse;
//
/** ///**
* @ClassName: TouTiaoAccountExample // * @ClassName: TouTiaoAccountExample
* @Description: TODO(今日头条帐号采集) // * @Description: TODO(今日头条帐号采集)
* @author hero // * @author hero
* @date 2017年10月17日 下午4:03:44 // * @date 2017年10月17日 下午4:03:44
*/ // */
public class TouTiaoAccountExample { //public class TouTiaoAccountExample {
//
private static final String registry = "zookeeper://192.168.0.36:2181"; // private static final String registry = "zookeeper://192.168.0.36:2181";
private static final String group = "local"; // private static final String group = "local";
//
//
public static void main(String[] args) { // public static void main(String[] args) {
ProxyFactory.init(registry, group, GroupType.PROVIDER); // ProxyFactory.init(registry, group, GroupType.PROVIDER);
touTiaoAccountFriendTest(); // touTiaoAccountFriendTest();
//
} // }
//
public void touTiaoAccountTest(){ // public void touTiaoAccountTest(){
String word = "华尔街瞭望"; // String word = "华尔街瞭望";
System.out.println("===================="+TouTiaoAccountParse.getTouTiaoAccountInfoByName(word, null)); // System.out.println("===================="+TouTiaoAccountParse.getTouTiaoAccountInfoByName(word, null));
} // }
//
//
//
public static void touTiaoAccountFriendTest(){ // public static void touTiaoAccountFriendTest(){
String userid = "3478445819704347"; // String userid = "3478445819704347";
List<TouTiaoAccount> userList = TouTiaoAccountParse.getFriendsList(userid, ProxyHolder.NAT_HEAVY_PROXY); // List<TouTiaoAccount> userList = TouTiaoAccountParse.getFriendsList(userid, ProxyHolder.NAT_HEAVY_PROXY);
for(TouTiaoAccount tta : userList){ // for(TouTiaoAccount tta : userList){
System.out.println(tta); // System.out.println(tta);
} // }
//
} // }
} //}
package com.zhiwei.toutiao.test; //package com.zhiwei.toutiao.test;
//
import java.util.List; //import java.util.List;
import java.util.Map; //import java.util.Map;
import com.alibaba.fastjson.JSONObject; //import com.alibaba.fastjson.JSONObject;
import com.mongodb.BasicDBObject; //import com.mongodb.BasicDBObject;
import com.mongodb.DB; //import com.mongodb.DB;
import com.mongodb.DBCollection; //import com.mongodb.DBCollection;
import com.mongodb.DBObject; //import com.mongodb.DBObject;
import com.mongodb.Mongo; //import com.mongodb.Mongo;
import com.zhiwei.common.config.GroupType; //import com.zhiwei.common.config.GroupType;
import com.zhiwei.crawler.proxy.ProxyFactory; //import com.zhiwei.crawler.proxy.ProxyFactory;
import com.zhiwei.excelpoi.bean.ExcelResult; //import com.zhiwei.excelpoi.bean.ExcelResult;
import com.zhiwei.excelpoi.excel.PoiExcelUtil; //import com.zhiwei.excelpoi.excel.PoiExcelUtil;
import com.zhiwei.toutiao.bean.TouTiaoQuestion; //import com.zhiwei.toutiao.bean.TouTiaoQuestion;
import com.zhiwei.toutiao.parse.TouTiaoQuestionParse; //import com.zhiwei.toutiao.parse.TouTiaoQuestionParse;
//
/** ///**
* @ClassName: TouTiaoQuestionExample // * @ClassName: TouTiaoQuestionExample
* @Description: TODO(头条问答采集测试类) // * @Description: TODO(头条问答采集测试类)
* @author hero // * @author hero
* @date 2017年7月20日 下午3:06:51 // * @date 2017年7月20日 下午3:06:51
*/ // */
public class TouTiaoQuestionExample { //public class TouTiaoQuestionExample {
//
private static final String registry = "zookeeper://192.168.0.36:2181"; // private static final String registry = "zookeeper://192.168.0.36:2181";
private static final String group = "local"; // private static final String group = "local";
//
public static void main(String[] args) { // public static void main(String[] args) {
ProxyFactory.init(registry, group, GroupType.PROVIDER); // ProxyFactory.init(registry, group, GroupType.PROVIDER);
//
Mongo mongo = new Mongo("192.168.0.81", 27017); // Mongo mongo = new Mongo("192.168.0.81", 27017);
DB db = mongo.getDB("wukong"); // DB db = mongo.getDB("wukong");
DBCollection coll = db.getCollection("wukong"); // DBCollection coll = db.getCollection("wukong");
//
touTiaoQuestionTest(coll); // touTiaoQuestionTest(coll);
//
} // }
//
public static void touTiaoQuestionTest(DBCollection coll) { // public static void touTiaoQuestionTest(DBCollection coll) {
PoiExcelUtil poi = PoiExcelUtil.getInstance(); // PoiExcelUtil poi = PoiExcelUtil.getInstance();
ExcelResult excelResult = poi.importExcelResult("C:\\Users\\qq859\\Desktop\\悟空问答关键词.xlsx", 0); // ExcelResult excelResult = poi.importExcelResult("C:\\Users\\qq859\\Desktop\\悟空问答关键词.xlsx", 0);
List<Map<String,Object>> dataList = excelResult.getBodyList(); // List<Map<String,Object>> dataList = excelResult.getBodyList();
for(Map<String,Object> data : dataList) { // for(Map<String,Object> data : dataList) {
String word = data.get("关键词").toString(); // String word = data.get("关键词").toString();
System.out.println("word================"+word); // System.out.println("word================"+word);
List<TouTiaoQuestion> list = TouTiaoQuestionParse.getSearchTouTiaoQuestion(word); // List<TouTiaoQuestion> list = TouTiaoQuestionParse.getSearchTouTiaoQuestion(word);
System.out.println(list.size()); // System.out.println(list.size());
for(TouTiaoQuestion question : list){ // for(TouTiaoQuestion question : list){
String jsonStr = JSONObject.toJSONString(question); // String jsonStr = JSONObject.toJSONString(question);
Map dataMap = JSONObject.toJavaObject(JSONObject.parseObject(jsonStr), Map.class); // Map dataMap = JSONObject.toJavaObject(JSONObject.parseObject(jsonStr), Map.class);
dataMap.put("word", word); // dataMap.put("word", word);
coll.save(new BasicDBObject(dataMap)); // coll.save(new BasicDBObject(dataMap));
} // }
} // }
} // }
//
} //}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment