Commit 2c702467 by zhiwei

升级采集核心包

parent a9af9087
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId> <artifactId>wechat</artifactId>
<version>1.1.4-SNAPSHOT</version> <version>1.1.5-SNAPSHOT</version>
<description> <description>
知微微信采集程序,包含 知微微信采集程序,包含
1.微信历史文章采集 1.微信历史文章采集
...@@ -85,13 +85,13 @@ ...@@ -85,13 +85,13 @@
<dependency> <dependency>
<groupId>com.zhiwei.tools</groupId> <groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId> <artifactId>zhiwei-tools</artifactId>
<version>0.1.2-SNAPSHOT</version> <version>0.1.3-SNAPSHOT</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.zhiwei.crawler</groupId> <groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId> <artifactId>crawler-core</artifactId>
<version>0.3.0-RELEASE</version> <version>0.3.6-RELEASE</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
</dependencies> </dependencies>
......
...@@ -18,7 +18,8 @@ public class WechatAccountFans { ...@@ -18,7 +18,8 @@ public class WechatAccountFans {
// private static Logger logger = LoggerFactory.getLogger(WechatAccountFans.class); // private static Logger logger = LoggerFactory.getLogger(WechatAccountFans.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private Map<String,String> headerMap; private Map<String,String> headerMap;
public WechatAccountFans() public WechatAccountFans()
......
...@@ -20,7 +20,8 @@ import com.zhiwei.wechat.entity.WechatAccount; ...@@ -20,7 +20,8 @@ import com.zhiwei.wechat.entity.WechatAccount;
public class WechatAccountInfo { public class WechatAccountInfo {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class); private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class);
/*** /***
......
...@@ -17,8 +17,8 @@ import org.jsoup.nodes.Document; ...@@ -17,8 +17,8 @@ import org.jsoup.nodes.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.wechat.comment.WechatCommentList; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.wechat.util.Tools; import com.zhiwei.wechat.util.Tools;
/** /**
...@@ -28,7 +28,8 @@ import com.zhiwei.wechat.util.Tools; ...@@ -28,7 +28,8 @@ import com.zhiwei.wechat.util.Tools;
*/ */
public class AriticleContent{ public class AriticleContent{
private static Logger logger = LoggerFactory.getLogger(WechatCommentList.class); private static Logger logger = LoggerFactory.getLogger(AriticleContent.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
...@@ -47,7 +48,7 @@ public class AriticleContent{ ...@@ -47,7 +48,7 @@ public class AriticleContent{
String content = null; String content = null;
String source = null; String source = null;
try { try {
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
content = document.select("div.rich_media_content").text(); content = document.select("div.rich_media_content").text();
if(htmlBody.contains("var nickname = ")){ if(htmlBody.contains("var nickname = ")){
...@@ -79,7 +80,7 @@ public class AriticleContent{ ...@@ -79,7 +80,7 @@ public class AriticleContent{
headerMap.put("Referer", url); headerMap.put("Referer", url);
String comment_id = null; String comment_id = null;
try { try {
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody!=null) if(htmlBody!=null)
{ {
Document document = Jsoup.parse(htmlBody); Document document = Jsoup.parse(htmlBody);
......
...@@ -35,8 +35,8 @@ import com.zhiwei.wechat.entity.WechatAricle; ...@@ -35,8 +35,8 @@ import com.zhiwei.wechat.entity.WechatAricle;
public class WechatAritcleSearch { public class WechatAritcleSearch {
private static Logger logger = LogManager.getLogger(WechatAritcleSearch.class); private static Logger logger = LogManager.getLogger(WechatAritcleSearch.class);
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/** /**
* *
* @Title: wechatKeywordSearch * @Title: wechatKeywordSearch
......
...@@ -13,7 +13,7 @@ import com.zhiwei.crawler.utils.RequestUtils; ...@@ -13,7 +13,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
public class WechatCount { public class WechatCount {
private static HttpBoot httpBoot = new HttpBoot(); private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static int getWechatCountByWord(String word, String cookie, public static int getWechatCountByWord(String word, String cookie,
String startTime, String endTime, Proxy proxy) { String startTime, String endTime, Proxy proxy) {
......
...@@ -5,7 +5,8 @@ import java.util.HashMap; ...@@ -5,7 +5,8 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse; import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil; import com.zhiwei.tools.tools.URLCodeUtil;
...@@ -17,7 +18,7 @@ import com.zhiwei.tools.tools.URLCodeUtil; ...@@ -17,7 +18,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
*/ */
public class WechatIndex { public class WechatIndex {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
...@@ -53,7 +54,7 @@ public class WechatIndex { ...@@ -53,7 +54,7 @@ public class WechatIndex {
headerMap.put("Accept","application/json, text/javascript, */*; q=0.01"); headerMap.put("Accept","application/json, text/javascript, */*; q=0.01");
headerMap.put("Cookie","mmsearch_user_key=AStrb5tD4ruSixIDu1cVpTA=; pass_ticket=bbP7ZT5xEUrYe+oOa6ACUw+mgR05TAGGA1P9xnC7fIyaaOnwkWyNQK8aYtva+Gxj; pgv_pvi=4102772736; pgv_si=s1607859200; pgv_pvid=153672700"); headerMap.put("Cookie","mmsearch_user_key=AStrb5tD4ruSixIDu1cVpTA=; pass_ticket=bbP7ZT5xEUrYe+oOa6ACUw+mgR05TAGGA1P9xnC7fIyaaOnwkWyNQK8aYtva+Gxj; pgv_pvi=4102772736; pgv_si=s1607859200; pgv_pvid=153672700");
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
System.out.println(htmlBody); System.out.println(htmlBody);
Thread.sleep(3000); Thread.sleep(3000);
......
/** ///**
* @Title: WechatDataFromHistoryExample.java // * @Title: WechatDataFromHistoryExample.java
* @Package com.zhiwei.wechat.example // * @Package com.zhiwei.wechat.example
* @Description:微信采集历史文章测试 // * @Description:微信采集历史文章测试
* @author hero // * @author hero
* @date 2016年5月20日 下午5:47:56 // * @date 2016年5月20日 下午5:47:56
* @version V1.0 // * @version V1.0
*/ // */
/** ///**
* // *
*/ // */
package com.zhiwei.wechat.example; //package com.zhiwei.wechat.example;
//
import java.util.ArrayList; //import java.util.ArrayList;
import java.util.List; //import java.util.List;
//
import com.zhiwei.wechat.entity.WechatAricle; //import com.zhiwei.wechat.entity.WechatAricle;
import com.zhiwei.wechat.history.WechatDataFromHistory; //import com.zhiwei.wechat.history.WechatDataFromHistory;
//
/** ///**
* @Description:微信采集历史文章测试 // * @Description:微信采集历史文章测试
* @author hero // * @author hero
* @date 2016年5月20日 下午5:47:56 // * @date 2016年5月20日 下午5:47:56
*/ // */
public class WechatDataFromHistoryExample { //public class WechatDataFromHistoryExample {
//
public static void main(String[] args) { // public static void main(String[] args) {
boolean updateLike = false; // boolean updateLike = false;
boolean follow = true; // boolean follow = true;
String endDate = "2017-01-27"; // String endDate = "2017-01-27";
try { // try {
List<String> urllist = new ArrayList<String>(); // List<String> urllist = new ArrayList<String>();
urllist.add("https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1"); // urllist.add("https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1");
System.out.println(urllist.size()); // System.out.println(urllist.size());
int i = 0; // int i = 0;
for (String s : urllist) { // for (String s : urllist) {
System.out.println("i===========" + i); // System.out.println("i===========" + i);
String url = s.split(",")[0]; // String url = s.split(",")[0];
// String source = s.split(",")[1]; //// String source = s.split(",")[1];
//
WechatDataFromHistory wdfh = new WechatDataFromHistory(updateLike,endDate,follow); // WechatDataFromHistory wdfh = new WechatDataFromHistory(updateLike,endDate,follow);
System.out.println(url); // System.out.println(url);
List<WechatAricle> list = wdfh.getWechatDataFromHistory(url,null); // List<WechatAricle> list = wdfh.getWechatDataFromHistory(url,null);
System.out.println("list size is :" + list.size()); // System.out.println("list size is :" + list.size());
//
} // }
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
} // }
//
//
} //}
...@@ -40,13 +40,11 @@ public class WechatSearchExample{ ...@@ -40,13 +40,11 @@ public class WechatSearchExample{
public static void wechatSearchExample() throws UnknownHostException public static void wechatSearchExample() throws UnknownHostException
{ {
List<String> wordList = new ArrayList<String>(); List<String> wordList = new ArrayList<String>();
wordList.add("工业互联网"); wordList.add("京东");
String idOrName = "吴晓波频道";
for(String word : wordList) for(String word : wordList)
{ {
try { try {
List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearchByAccount(word, idOrName, "2017-12-01", "2018-12-01", ProxyHolder.SOUGOU_INNER_PROXY); List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-04-08", "2019-04-08", ProxyHolder.SOUGOU_INNER_PROXY.getProxy());
System.out.println("======"+list.size()); System.out.println("======"+list.size());
for(WechatAricle wechat : list){ for(WechatAricle wechat : list){
System.out.println(wechat.getTitle()); System.out.println(wechat.getTitle());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment