Commit 2c702467 by zhiwei

升级采集核心包

parent a9af9087
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId>
<version>1.1.4-SNAPSHOT</version>
<version>1.1.5-SNAPSHOT</version>
<description>
知微微信采集程序,包含
1.微信历史文章采集
......@@ -85,13 +85,13 @@
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.1.2-SNAPSHOT</version>
<version>0.1.3-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.3.0-RELEASE</version>
<version>0.3.6-RELEASE</version>
<scope>provided</scope>
</dependency>
</dependencies>
......
......@@ -18,7 +18,8 @@ public class WechatAccountFans {
// private static Logger logger = LoggerFactory.getLogger(WechatAccountFans.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private Map<String,String> headerMap;
public WechatAccountFans()
......
......@@ -20,7 +20,8 @@ import com.zhiwei.wechat.entity.WechatAccount;
public class WechatAccountInfo {
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static Logger logger = LoggerFactory.getLogger(WechatAccountInfo.class);
/***
......
......@@ -17,8 +17,8 @@ import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.wechat.comment.WechatCommentList;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.wechat.util.Tools;
/**
......@@ -28,7 +28,8 @@ import com.zhiwei.wechat.util.Tools;
*/
public class AriticleContent{
private static Logger logger = LoggerFactory.getLogger(WechatCommentList.class);
private static Logger logger = LoggerFactory.getLogger(AriticleContent.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
......@@ -47,7 +48,7 @@ public class AriticleContent{
String content = null;
String source = null;
try {
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
Document document = Jsoup.parse(htmlBody);
content = document.select("div.rich_media_content").text();
if(htmlBody.contains("var nickname = ")){
......@@ -79,7 +80,7 @@ public class AriticleContent{
headerMap.put("Referer", url);
String comment_id = null;
try {
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
if(htmlBody!=null)
{
Document document = Jsoup.parse(htmlBody);
......
......@@ -35,8 +35,8 @@ import com.zhiwei.wechat.entity.WechatAricle;
public class WechatAritcleSearch {
private static Logger logger = LogManager.getLogger(WechatAritcleSearch.class);
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
/**
*
* @Title: wechatKeywordSearch
......
......@@ -13,7 +13,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
public class WechatCount {
private static HttpBoot httpBoot = new HttpBoot();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static int getWechatCountByWord(String word, String cookie,
String startTime, String endTime, Proxy proxy) {
......
......@@ -5,7 +5,8 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.tools.tools.URLCodeUtil;
......@@ -17,7 +18,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
*/
public class WechatIndex {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
public static void main(String[] args) throws Exception {
......@@ -53,7 +54,7 @@ public class WechatIndex {
headerMap.put("Accept","application/json, text/javascript, */*; q=0.01");
headerMap.put("Cookie","mmsearch_user_key=AStrb5tD4ruSixIDu1cVpTA=; pass_ticket=bbP7ZT5xEUrYe+oOa6ACUw+mgR05TAGGA1P9xnC7fIyaaOnwkWyNQK8aYtva+Gxj; pgv_pvi=4102772736; pgv_si=s1607859200; pgv_pvid=153672700");
String htmlBody = HttpClientTemplateOK.get(url, null,headerMap);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap)).body().string();
System.out.println(htmlBody);
Thread.sleep(3000);
......
/**
* @Title: WechatDataFromHistoryExample.java
* @Package com.zhiwei.wechat.example
* @Description:微信采集历史文章测试
* @author hero
* @date 2016年5月20日 下午5:47:56
* @version V1.0
*/
/**
*
*/
package com.zhiwei.wechat.example;
import java.util.ArrayList;
import java.util.List;
import com.zhiwei.wechat.entity.WechatAricle;
import com.zhiwei.wechat.history.WechatDataFromHistory;
/**
* @Description:微信采集历史文章测试
* @author hero
* @date 2016年5月20日 下午5:47:56
*/
public class WechatDataFromHistoryExample {
public static void main(String[] args) {
boolean updateLike = false;
boolean follow = true;
String endDate = "2017-01-27";
try {
List<String> urllist = new ArrayList<String>();
urllist.add("https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1");
System.out.println(urllist.size());
int i = 0;
for (String s : urllist) {
System.out.println("i===========" + i);
String url = s.split(",")[0];
// String source = s.split(",")[1];
WechatDataFromHistory wdfh = new WechatDataFromHistory(updateLike,endDate,follow);
System.out.println(url);
List<WechatAricle> list = wdfh.getWechatDataFromHistory(url,null);
System.out.println("list size is :" + list.size());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
///**
// * @Title: WechatDataFromHistoryExample.java
// * @Package com.zhiwei.wechat.example
// * @Description:微信采集历史文章测试
// * @author hero
// * @date 2016年5月20日 下午5:47:56
// * @version V1.0
// */
///**
// *
// */
//package com.zhiwei.wechat.example;
//
//import java.util.ArrayList;
//import java.util.List;
//
//import com.zhiwei.wechat.entity.WechatAricle;
//import com.zhiwei.wechat.history.WechatDataFromHistory;
//
///**
// * @Description:微信采集历史文章测试
// * @author hero
// * @date 2016年5月20日 下午5:47:56
// */
//public class WechatDataFromHistoryExample {
//
// public static void main(String[] args) {
// boolean updateLike = false;
// boolean follow = true;
// String endDate = "2017-01-27";
// try {
// List<String> urllist = new ArrayList<String>();
// urllist.add("https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1");
// System.out.println(urllist.size());
// int i = 0;
// for (String s : urllist) {
// System.out.println("i===========" + i);
// String url = s.split(",")[0];
//// String source = s.split(",")[1];
//
// WechatDataFromHistory wdfh = new WechatDataFromHistory(updateLike,endDate,follow);
// System.out.println(url);
// List<WechatAricle> list = wdfh.getWechatDataFromHistory(url,null);
// System.out.println("list size is :" + list.size());
//
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
//
//
//}
......@@ -40,13 +40,11 @@ public class WechatSearchExample{
public static void wechatSearchExample() throws UnknownHostException
{
List<String> wordList = new ArrayList<String>();
wordList.add("工业互联网");
String idOrName = "吴晓波频道";
wordList.add("京东");
for(String word : wordList)
{
try {
List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearchByAccount(word, idOrName, "2017-12-01", "2018-12-01", ProxyHolder.SOUGOU_INNER_PROXY);
List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-04-08", "2019-04-08", ProxyHolder.SOUGOU_INNER_PROXY.getProxy());
System.out.println("======"+list.size());
for(WechatAricle wechat : list){
System.out.println(wechat.getTitle());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment