Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wechat
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
wechat
Commits
2c702467
Commit
2c702467
authored
Apr 19, 2019
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
升级采集核心包
parent
a9af9087
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
915 additions
and
910 deletions
+915
-910
pom.xml
+3
-3
src/main/java/com/zhiwei/wechat/account/WechatAccountFans.java
+2
-1
src/main/java/com/zhiwei/wechat/account/WechatAccountInfo.java
+2
-1
src/main/java/com/zhiwei/wechat/comment/WechatCommentList.java
+170
-167
src/main/java/com/zhiwei/wechat/history/WechatDataFromHistory.java
+485
-485
src/main/java/com/zhiwei/wechat/readAndLike/AriticleContent.java
+6
-5
src/main/java/com/zhiwei/wechat/readAndLike/WeChatReadAndLike.java
+185
-185
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
+2
-2
src/main/java/com/zhiwei/wechat/search/WechatCount.java
+1
-1
src/main/java/com/zhiwei/wechat/search/WechatIndex.java
+4
-3
src/test/java/com/zhiwei/wechat/example/WechatDataFromHistoryExample.java
+53
-53
src/test/java/com/zhiwei/wechat/example/WechatSearchExample.java
+2
-4
No files found.
pom.xml
View file @
2c702467
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
<modelVersion>
4.0.0
</modelVersion>
<modelVersion>
4.0.0
</modelVersion>
<groupId>
com.zhiwei
</groupId>
<groupId>
com.zhiwei
</groupId>
<artifactId>
wechat
</artifactId>
<artifactId>
wechat
</artifactId>
<version>
1.1.
4
-SNAPSHOT
</version>
<version>
1.1.
5
-SNAPSHOT
</version>
<description>
<description>
知微微信采集程序,包含
知微微信采集程序,包含
1.微信历史文章采集
1.微信历史文章采集
...
@@ -85,13 +85,13 @@
...
@@ -85,13 +85,13 @@
<dependency>
<dependency>
<groupId>
com.zhiwei.tools
</groupId>
<groupId>
com.zhiwei.tools
</groupId>
<artifactId>
zhiwei-tools
</artifactId>
<artifactId>
zhiwei-tools
</artifactId>
<version>
0.1.
2
-SNAPSHOT
</version>
<version>
0.1.
3
-SNAPSHOT
</version>
<scope>
provided
</scope>
<scope>
provided
</scope>
</dependency>
</dependency>
<dependency>
<dependency>
<groupId>
com.zhiwei.crawler
</groupId>
<groupId>
com.zhiwei.crawler
</groupId>
<artifactId>
crawler-core
</artifactId>
<artifactId>
crawler-core
</artifactId>
<version>
0.3.
0
-RELEASE
</version>
<version>
0.3.
6
-RELEASE
</version>
<scope>
provided
</scope>
<scope>
provided
</scope>
</dependency>
</dependency>
</dependencies>
</dependencies>
...
...
src/main/java/com/zhiwei/wechat/account/WechatAccountFans.java
View file @
2c702467
...
@@ -18,7 +18,8 @@ public class WechatAccountFans {
...
@@ -18,7 +18,8 @@ public class WechatAccountFans {
// private static Logger logger = LoggerFactory.getLogger(WechatAccountFans.class);
// private static Logger logger = LoggerFactory.getLogger(WechatAccountFans.class);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
Map
<
String
,
String
>
headerMap
;
private
Map
<
String
,
String
>
headerMap
;
public
WechatAccountFans
()
public
WechatAccountFans
()
...
...
src/main/java/com/zhiwei/wechat/account/WechatAccountInfo.java
View file @
2c702467
...
@@ -20,7 +20,8 @@ import com.zhiwei.wechat.entity.WechatAccount;
...
@@ -20,7 +20,8 @@ import com.zhiwei.wechat.entity.WechatAccount;
public
class
WechatAccountInfo
{
public
class
WechatAccountInfo
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatAccountInfo
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatAccountInfo
.
class
);
/***
/***
...
...
src/main/java/com/zhiwei/wechat/comment/WechatCommentList.java
View file @
2c702467
/**
///**
* 获取微信文章评论
// * 获取微信文章评论
* @Title: WechatComment.java
// * @Title: WechatComment.java
* @Package com.zhiwei.wechat.comment
// * @Package com.zhiwei.wechat.comment
* @Description:获取微信文章评论
// * @Description:获取微信文章评论
* @author hero
// * @author hero
* @date 2016年6月25日 上午8:17:37
// * @date 2016年6月25日 上午8:17:37
* @version V1.0
// * @version V1.0
*/
/**
// */ /**
*
// *
*/
// */
package
com
.
zhiwei
.
wechat
.
comment
;
//package com.zhiwei.wechat.comment;
//
import
java.io.IOException
;
//import java.io.IOException;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
org.slf4j.Logger
;
//import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
//import org.slf4j.LoggerFactory;
//
import
com.alibaba.fastjson.JSON
;
//import com.alibaba.fastjson.JSON;
import
com.alibaba.fastjson.JSONObject
;
//import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.tools.httpclient.HttpClientTemplateOK
;
//import com.zhiwei.crawler.core.HttpBoot;
import
com.zhiwei.wechat.entity.WechatComment
;
//import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import
com.zhiwei.wechat.readAndLike.AriticleContent
;
//import com.zhiwei.wechat.entity.WechatComment;
import
com.zhiwei.wechat.util.Tools
;
//import com.zhiwei.wechat.readAndLike.AriticleContent;
//import com.zhiwei.wechat.util.Tools;
/**
//
* @Description:获取微信文章评论
///**
* @author hero
// * @Description:获取微信文章评论
* @date 2016年6月25日 上午8:17:37
// * @author hero
*/
// * @date 2016年6月25日 上午8:17:37
public
class
WechatCommentList
{
// */
//public class WechatCommentList {
private
static
WechatComment
wc
=
new
WechatComment
();
//
// private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatCommentList
.
class
);
//
/**
// private static WechatComment wc = new WechatComment();
* 根据文章url获取文章评论列表
//
* @Description:
// private static Logger logger = LoggerFactory.getLogger(WechatCommentList.class);
* @param @param url
// /**
* @param @return
// * 根据文章url获取文章评论列表
* @return List<WechatComment> 返回类型
// * @Description:
*/
// * @param @param url
public
static
List
<
WechatComment
>
getWechatCommentList
(
String
url
,
String
key
)
// * @param @return
{
// * @return List<WechatComment> 返回类型
List
<
WechatComment
>
wcList
=
null
;
// */
/*处理url*/
// public static List<WechatComment> getWechatCommentList(String url,String key)
String
urlcookie
=
url
;
// {
if
(!
url
.
contains
(
"key"
)){
// List<WechatComment> wcList = null;
urlcookie
=
Tools
.
getWechatCookieUrl
(
url
,
key
);
// /*处理url*/
}
// String urlcookie = url;
// 请求头信息
// if(!url.contains("key")){
Map
<
String
,
String
>
headerMap
=
Tools
.
getWechatHeader
();
// urlcookie = Tools.getWechatCookieUrl(url, key);
Map
<
String
,
String
>
cookieMap
;
// }
try
{
// // 请求头信息
cookieMap
=
HttpClientTemplateOK
.
getCookie
(
urlcookie
,
null
,
headerMap
);
// Map<String,String> headerMap = Tools.getWechatHeader();
headerMap
.
put
(
"Referer"
,
url
);
// Map<String, String> cookieMap;
if
(
cookieMap
.
get
(
"cookie"
).
length
()>
50
){
// try {
headerMap
.
put
(
"Cookie"
,
cookieMap
.
get
(
"cookie"
)+
""
);
// cookieMap = HttpClientTemplateOK.getCookie(urlcookie, null, headerMap);
}
// headerMap.put("Referer", url);
String
appmsg_token
=
Tools
.
getAppMsgToken
(
cookieMap
.
get
(
"htmlBody"
));
// if(cookieMap.get("cookie").length()>50){
// headerMap.put("Cookie", cookieMap.get("cookie")+"");
String
biz
=
url
.
split
(
"__biz="
)[
1
].
split
(
"&"
)[
0
];
// }
String
appmsgid
=
url
.
split
(
"mid="
)[
1
].
split
(
"&"
)[
0
];
// String appmsg_token = Tools.getAppMsgToken(cookieMap.get("htmlBody"));
String
comment_id
=
AriticleContent
.
getCommentId
(
url
,
key
);
//
if
(
comment_id
!=
null
&&
appmsg_token
!=
null
)
// String biz = url.split("__biz=")[1].split("&")[0];
{
// String appmsgid = url.split("mid=")[1].split("&")[0];
String
comment_url
=
"https://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz="
+
biz
// String comment_id = AriticleContent.getCommentId(url,key);
+
"&appmsgid="
+
appmsgid
+
"&idx=1&comment_id="
+
comment_id
+
"&offset=0&limit=100"
+
key
// if(comment_id!=null && appmsg_token!=null)
+
"&appmsg_token="
+
appmsg_token
;
// {
/**解析相关数据*/
// String comment_url = "https://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=" +biz
System
.
out
.
println
(
comment_url
);
// + "&appmsgid=" + appmsgid +"&idx=1&comment_id="+comment_id+"&offset=0&limit=100"+key
if
(
"0"
.
equals
(
comment_id
))
// + "&appmsg_token=" + appmsg_token;
{
// /**解析相关数据*/
logger
.
info
(
"此条微信文章没有评论"
);
// System.out.println(comment_url);
}
else
// if("0".equals(comment_id))
{
// {
try
{
// logger.info("此条微信文章没有评论");
String
htmlBody
=
HttpClientTemplateOK
.
get
(
comment_url
,
null
,
headerMap
);
// }else
if
(
htmlBody
!=
null
)
// {
{
// try {
JSONObject
json
=
JSON
.
parseObject
(
htmlBody
);
// String htmlBody = HttpClientTemplateOK.get(comment_url, null, headerMap);
wcList
=
wc
.
constructWechatComment
(
json
.
getJSONArray
(
"elected_comment"
),
url
);
// if(htmlBody!=null)
return
wcList
;
// {
}
// JSONObject json = JSON.parseObject(htmlBody);
}
catch
(
Exception
e
)
{
// wcList = wc.constructWechatComment(json.getJSONArray("elected_comment"),url);
logger
.
info
(
"解析微信文章评论列表时出现问题:"
,
e
.
fillInStackTrace
());
// return wcList;
return
null
;
// }
}
// } catch (Exception e) {
}
// logger.info("解析微信文章评论列表时出现问题:", e.fillInStackTrace());
}
// return null;
}
catch
(
IOException
e1
)
{
// }
return
null
;
// }
}
catch
(
Exception
e1
)
{
// }
e1
.
printStackTrace
();
// } catch (IOException e1) {
}
// return null;
// } catch (Exception e1) {
return
null
;
// e1.printStackTrace();
}
// }
//
// return null;
/**
// }
* @Title: getWechatCommentCount
//
* @Description: TODO(根据微信文章地址更新微信评论数)
//
* @param @param url
// /**
* @param @param key
// * @Title: getWechatCommentCount
* @param @return 设定文件
// * @Description: TODO(根据微信文章地址更新微信评论数)
* @return int 返回类型
// * @param @param url
*/
// * @param @param key
public
static
int
getWechatCommentCount
(
String
url
,
String
key
)
// * @param @return 设定文件
{
// * @return int 返回类型
System
.
out
.
println
(
url
);
// */
/*处理url*/
// public static int getWechatCommentCount(String url,String key)
String
url_new
=
url
;
// {
if
(
url
.
contains
(
"#rd"
))
// System.out.println(url);
{
// /*处理url*/
url_new
=
url
.
split
(
"#rd"
)[
0
]
+
key
;
// String url_new = url;
}
else
if
(
url
.
contains
(
"#wechat_redirect"
))
// if(url.contains("#rd"))
{
// {
url_new
=
url
.
split
(
"#wechat_redirect"
)[
0
]
+
key
;
// url_new = url.split("#rd")[0] + key;
}
// }else if(url.contains("#wechat_redirect"))
String
biz
=
url
.
split
(
"__biz="
)[
1
].
split
(
"&"
)[
0
];
// {
String
appmsgid
=
url
.
split
(
"mid="
)[
1
].
split
(
"&"
)[
0
];
// url_new = url.split("#wechat_redirect")[0] + key;
// }
/**获取网页头信息**/
// String biz = url.split("__biz=")[1].split("&")[0];
Map
<
String
,
String
>
headerMap
=
Tools
.
getWechatHeader
();
// String appmsgid = url.split("mid=")[1].split("&")[0];
/*获取评论id*/
//
String
comment_id
=
AriticleContent
.
getCommentId
(
url
,
key
);
// /**获取网页头信息**/
if
(
comment_id
!=
null
)
// Map<String,String> headerMap = Tools.getWechatHeader();
{
// /*获取评论id*/
String
comment_url
=
"http://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz="
+
biz
// String comment_id = AriticleContent.getCommentId(url,key);
+
"&appmsgid="
+
appmsgid
+
"&idx=1&comment_id="
+
comment_id
+
"&offset=0&limit=100"
+
key
;
// if(comment_id!=null)
/**解析相关数据*/
// {
// String comment_url = "http://mp.weixin.qq.com/mp/appmsg_comment?action=getcomment&__biz=" +biz
if
(
"0"
.
equals
(
comment_id
))
// + "&appmsgid=" + appmsgid +"&idx=1&comment_id="+comment_id+"&offset=0&limit=100"+key;
{
// /**解析相关数据*/
logger
.
info
(
"此条微信文章没有评论"
);
//
return
0
;
// if("0".equals(comment_id))
}
else
// {
{
// logger.info("此条微信文章没有评论");
try
{
// return 0;
Map
<
String
,
String
>
cookieMap
=
HttpClientTemplateOK
.
getCookie
(
url_new
,
null
,
headerMap
);
// }else
headerMap
.
put
(
"Cookie"
,
cookieMap
.
get
(
"cookie"
));
// {
String
htmlBody
=
HttpClientTemplateOK
.
get
(
comment_url
,
null
,
headerMap
);
// try {
System
.
out
.
println
(
htmlBody
);
// Map<String,String> cookieMap = HttpClientTemplateOK.getCookie(url_new, null,headerMap);
if
(
htmlBody
!=
null
)
// headerMap.put("Cookie", cookieMap.get("cookie"));
{
// String htmlBody = HttpClientTemplateOK.get(comment_url, null,headerMap);
JSONObject
json
=
JSON
.
parseObject
(
htmlBody
);
// System.out.println(htmlBody);
return
json
.
getIntValue
(
"elected_comment_total_cnt"
);
// if(htmlBody!=null)
}
// {
}
catch
(
Exception
e
)
{
// JSONObject json = JSON.parseObject(htmlBody);
logger
.
debug
(
"更新微信文章评论数时出现问题,问题信息:"
,
e
.
getMessage
());
// return json.getIntValue("elected_comment_total_cnt");
return
-
1
;
// }
}
// } catch (Exception e) {
}
// logger.debug("更新微信文章评论数时出现问题,问题信息:",e.getMessage());
}
else
// return -1;
{
// }
logger
.
info
(
"获取评论id失败"
);
// }
return
-
1
;
// }else
}
// {
return
-
1
;
// logger.info("获取评论id失败");
}
// return -1;
// }
// return -1;
// }
}
//
//
//
//}
src/main/java/com/zhiwei/wechat/history/WechatDataFromHistory.java
View file @
2c702467
/**
///**
* 抓取微信公号历史文章数据
// * 抓取微信公号历史文章数据
* @Title: WechatDataFromHistory.java
// * @Title: WechatDataFromHistory.java
* @Package com.zhiwei.wechat.history
// * @Package com.zhiwei.wechat.history
* @Description:抓取微信公号历史文章数据
// * @Description:抓取微信公号历史文章数据
* @author hero
// * @author hero
* @date 2016年5月20日 上午10:27:19
// * @date 2016年5月20日 上午10:27:19
* @version V1.0
// * @version V1.0
*/
/**
// */ /**
*
// *
*/
// */
package
com
.
zhiwei
.
wechat
.
history
;
//package com.zhiwei.wechat.history;
import
java.net.Proxy
;
//import java.net.Proxy;
import
java.util.ArrayList
;
//import java.util.ArrayList;
import
java.util.Date
;
//import java.util.Date;
import
java.util.List
;
//import java.util.List;
import
java.util.Map
;
//import java.util.Map;
//
import
org.apache.logging.log4j.LogManager
;
//import org.apache.logging.log4j.LogManager;
import
org.apache.logging.log4j.Logger
;
//import org.apache.logging.log4j.Logger;
//
import
com.alibaba.fastjson.JSONArray
;
//import com.alibaba.fastjson.JSONArray;
import
com.alibaba.fastjson.JSONObject
;
//import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.tools.httpclient.HttpClientTemplateOK
;
//import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import
com.zhiwei.tools.timeparse.TimeUtil
;
//import com.zhiwei.tools.timeparse.TimeUtil;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
//import com.zhiwei.tools.tools.ZhiWeiTools;
import
com.zhiwei.wechat.entity.WechatAricle
;
//import com.zhiwei.wechat.entity.WechatAricle;
import
com.zhiwei.wechat.entity.WechatReadLike
;
//import com.zhiwei.wechat.entity.WechatReadLike;
import
com.zhiwei.wechat.readAndLike.AriticleContent
;
//import com.zhiwei.wechat.readAndLike.AriticleContent;
import
com.zhiwei.wechat.readAndLike.WeChatReadAndLike
;
//import com.zhiwei.wechat.readAndLike.WeChatReadAndLike;
import
com.zhiwei.wechat.util.Tools
;
//import com.zhiwei.wechat.util.Tools;
//
/**
///**
* @Description:抓取微信公号历史文章数据
// * @Description:抓取微信公号历史文章数据
* @author Bewilder Z
// * @author Bewilder Z
* @date 2016年5月20日 上午10:27:19
// * @date 2016年5月20日 上午10:27:19
*/
// */
public
class
WechatDataFromHistory
{
//public class WechatDataFromHistory {
//
private
static
final
Logger
log
=
LogManager
.
getLogger
(
WechatDataFromHistory
.
class
);
// private static final Logger log = LogManager.getLogger(WechatDataFromHistory.class);
//
private
boolean
updateLike
=
false
;
//是否更新点赞阅读数
// private boolean updateLike = false; //是否更新点赞阅读数
//
private
Date
endDate
=
null
;
//采集的结束时间
// private Date endDate = null; //采集的结束时间
//
private
List
<
WechatAricle
>
result
;
//数据总集合
// private List<WechatAricle> result; //数据总集合
//
private
Map
<
String
,
String
>
headerMap
;
//请求头信息
// private Map<String,String> headerMap; //请求头信息
//
private
boolean
follow
=
false
;
//是否关注
// private boolean follow = false; //是否关注
//
private
String
nextId
;
//采集下一页id
// private String nextId; //采集下一页id
//
private
String
key
;
//更新点赞阅读的key
// private String key; //更新点赞阅读的key
//
private
boolean
next
=
true
;
//判断是否有下一页
// private boolean next = true; //判断是否有下一页
//
//
/**
// /**
*
// *
* @Description:
// * @Description:
* @param @param updateLike 是否更新点赞数和阅读数
// * @param @param updateLike 是否更新点赞数和阅读数
* @param @param endDate 采集结束时间
// * @param @param endDate 采集结束时间
* @return
// * @return
*/
// */
public
WechatDataFromHistory
(
boolean
updateLike
,
String
endDate
,
// public WechatDataFromHistory(boolean updateLike,String endDate,
boolean
follow
)
// boolean follow)
{
// {
this
.
updateLike
=
updateLike
;
// this.updateLike = updateLike;
result
=
new
ArrayList
<
WechatAricle
>();
// result = new ArrayList<WechatAricle>();
headerMap
=
Tools
.
getWechatHeader
();
// headerMap = Tools.getWechatHeader();
this
.
follow
=
follow
;
// this.follow = follow;
if
(
endDate
==
null
)
// if(endDate == null)
{
// {
endDate
=
"2011-12-30"
;
// endDate = "2011-12-30";
}
// }
this
.
endDate
=
TimeUtil
.
parseTime
(
endDate
,
"yyyy-MM-dd"
);
// this.endDate = TimeUtil.parseTime(endDate, "yyyy-MM-dd");
}
// }
//
public
WechatDataFromHistory
(){}
// public WechatDataFromHistory(){}
//
//
/**
// /**
* @Title: validateKey
// * @Title: validateKey
* @author hero
// * @author hero
* @Description: 验证链接是否有效
// * @Description: 验证链接是否有效
* @param @param key
// * @param @param key
* @param @return 设定文件
// * @param @return 设定文件
* @return boolean 返回类型
// * @return boolean 返回类型
*/
// */
public
static
boolean
validateKey
(
String
key
,
Proxy
proxy
){
// public static boolean validateKey(String key,Proxy proxy){
String
url
=
"http://mp.weixin.qq.com/s?__biz=MzIwNDk0NzEyOQ==&mid=2247484544&idx=2&sn=f64abc4b15badd77b70ca942bc5176d3&scene=0#wechat_redirect"
;
// String url = "http://mp.weixin.qq.com/s?__biz=MzIwNDk0NzEyOQ==&mid=2247484544&idx=2&sn=f64abc4b15badd77b70ca942bc5176d3&scene=0#wechat_redirect";
try
{
// try {
WechatReadLike
wrl
=
WeChatReadAndLike
.
getReadAndLike
(
url
,
key
,
proxy
);
// WechatReadLike wrl = WeChatReadAndLike.getReadAndLike(url, key,proxy);
if
(
wrl
.
getRead
()>
0
){
// if(wrl.getRead()>0){
return
true
;
// return true;
}
else
{
// }else{
return
false
;
// return false;
}
// }
}
catch
(
Exception
e
)
{
// } catch (Exception e) {
log
.
debug
(
"验证微信key有效性时出现问题,问题为:{}"
,
e
.
getMessage
());
// log.debug("验证微信key有效性时出现问题,问题为:{}",e.getMessage());
return
false
;
// return false;
}
// }
}
// }
//
//
/**
// /**
* @Title: getWechatDataFromHistory
// * @Title: getWechatDataFromHistory
* @author hero
// * @author hero
* @Description: 获取微信公众号历史文章
// * @Description: 获取微信公众号历史文章
* @param @param url
// * @param @param url
* @param @return 设定文件
// * @param @return 设定文件
* @return List<WechatAricle> 返回类型
// * @return List<WechatAricle> 返回类型
*/
// */
public
List
<
WechatAricle
>
getWechatDataFromHistory
(
String
url
,
Proxy
proxy
)
// public List<WechatAricle> getWechatDataFromHistory(String url,Proxy proxy)
{
// {
log
.
info
(
"url:::::::::{}"
,
url
);
// log.info("url:::::::::{}",url);
if
(
updateLike
)
// if(updateLike)
{
// {
key
=
"&uin"
+
url
.
split
(
"uin"
)[
1
].
split
(
"devicetype"
)[
0
];
// key = "&uin"+url.split("uin")[1].split("devicetype")[0];
}
// }
//
String
firstText
=
null
;
// String firstText = null;
try
{
// try {
Map
<
String
,
String
>
cookieMap
=
HttpClientTemplateOK
.
getCookie
(
url
,
proxy
,
headerMap
);
// Map<String,String> cookieMap = HttpClientTemplateOK.getCookie(url, proxy, headerMap);
//获取cookie
// //获取cookie
if
(
cookieMap
.
get
(
"cookie"
)!=
null
){
// if(cookieMap.get("cookie")!=null){
headerMap
.
put
(
"Referer"
,
url
);
//// headerMap.put("Referer", url);
headerMap
.
put
(
"Cookie"
,
cookieMap
.
get
(
"cookie"
));
// headerMap.put("Cookie", cookieMap.get("cookie"));
firstText
=
HttpClientTemplateOK
.
get
(
url
,
proxy
,
headerMap
);
// firstText = HttpClientTemplateOK.get(url, proxy,headerMap);
}
// }
}
catch
(
Exception
e
)
{
// } catch (Exception e) {
e
.
printStackTrace
();
// e.printStackTrace();
return
null
;
// return null;
}
// }
//采集下一页数据参数,并获取第一页数据
// //采集下一页数据参数,并获取第一页数据
if
(
firstText
!=
null
){
// if(firstText != null){
String
appToken
=
getFirst
(
firstText
,
proxy
);
// String appToken = getFirst(firstText,proxy);
if
(
follow
==
true
)
// if(follow == true)
{
// {
next
=
true
;
// next = true;
}
// }
//
//循环读取微信公号历史数据
// //循环读取微信公号历史数据
int
i
=
1
;
// int i = 1;
while
(
next
)
// while(next)
{
// {
String
nextUrl
=
url
.
replace
(
"home"
,
"getmsg"
)
+
"&f=json&&offset="
+
i
*
10
+
"&count=10&scene=123&is_ok=1&appmsg_token="
+
appToken
;
// String nextUrl = url.replace("home", "getmsg") + "&f=json&&offset=" + i*10 + "&count=10&scene=123&is_ok=1&appmsg_token="+appToken;
log
.
info
(
"下一页地址:{}"
,
nextUrl
);
// log.info("下一页地址:{}", nextUrl);
try
{
// try {
//采集下一页数据参数,并获取此页数据
// //采集下一页数据参数,并获取此页数据
headerMap
.
put
(
"Referer"
,
nextUrl
);
// headerMap.put("Referer", nextUrl);
String
nextJson
=
HttpClientTemplateOK
.
get
(
nextUrl
,
proxy
,
headerMap
);
// String nextJson = HttpClientTemplateOK.get(nextUrl, proxy,headerMap);
nextId
=
getNext
(
nextJson
,
proxy
);
// nextId = getNext(nextJson,proxy);
// System.out.println("nextId============"+nextId);
//// System.out.println("nextId============"+nextId);
// if(nextId.equals("1")){
//// if(nextId.equals("1")){
// next = true;
//// next = true;
// }else{
//// }else{
// next = false;
//// next = false;
//// }
// ZhiWeiTools.sleep(3000);
// } catch (Exception e) {
// e.printStackTrace();
// next = false;
// }
// i++;
// }
//
// }else{
// next = false;
// }
//
// return result;
// }
//
// /***
// * 获取公号历史文章
// * @Description:
// * @param @param url
// * @param @param source
// * @param @return
// * @return List<Wechat> 返回类型
// */
// @Deprecated
// public List<WechatAricle> getWechatDataFromHistoryOld(String url,Proxy proxy)
// {
// log.info("url:::::::::{}",url);
// if(updateLike)
// {
// key = "&uin"+url.split("uin")[1].split("devicetype")[0];
// }
//
// String firstText = null;
// try {
// Map<String,String> cookieMap = HttpClientTemplateOK.getCookie(url, proxy,headerMap);
// //获取cookie
// headerMap.put("Referer", url);
// headerMap.put("Cookie", cookieMap.get("cookie"));
// firstText = HttpClientTemplateOK.get(url, proxy,headerMap);
// } catch (Exception e) {
// e.printStackTrace();
// return null;
// }
// //采集下一页数据参数,并获取第一页数据
// nextId = getFirstOld(firstText,proxy);
// boolean next = false; //判断是否有下一页
// if(follow == true)
// {
// next = true;
// }
// //循环读取微信公号历史数据
// while(next)
// {
// //没有下一页数据,结束
// if(nextId==null)
// {
// next = false;
// }else //采集下一页数据
// {
// String nextUrl = url.replace("home", "getmsg") + "&f=json&frommsgid=" + nextId + "&count=10&scene=123&is_ok=1";
// log.info("下一页地址:{}", nextUrl);
// try {
// //采集下一页数据参数,并获取此页数据
// headerMap.put("Referer", nextUrl);
// String nextJson = HttpClientTemplateOK.get(nextUrl, null,headerMap);
// nextId = getNext(nextJson,proxy);
// System.out.println("nextId-============="+nextId);
// ZhiWeiTools.sleep(3000);
// } catch (Exception e) {
// e.printStackTrace();
// next = false;
// }
//
// }
// }
//
// return result;
// }
//
//
//
//
// /**
// * @Title: getFirst
// * @Description: TODO(解析第一页数据)
// * @param @param fristText
// * @param @param source
// * @param @return 设定文件
// * @return String 返回类型
// */
// @Deprecated
// public String getFirstOld(String fristText,Proxy proxy)
// {
// fristText = fristText
// .replace("\\", "")
// .replace("'", "")
// .replace(" ", " ")
// .replace(""", "\"")
// .replace("&", "&")
// .replace("amp;", "")
// .replace("'", "'")
// .replace(">", ">")
// .replace("<", "<")
// .replace("¥", "¥")
// ;
// log.info("开始解析第一页文章");
// // 截取HTML得到有用的JSON;替换掉转义字符
// if(fristText.contains("msgList ="))
// {
// fristText = fristText.split("msgList = ")[1].split("}}]};")[0]+"}}]}";
// return getNextIdAndAnalysis(fristText,proxy);
// }
// return null;
// }
//
// /**
// * @Title: getFirst
// * @author hero
// * @Description: 截取appmsg_token 值
// * @param @param fristText
// * @param @return 设定文件
// * @return String 返回类型
// */
// private String getFirst(String fristText,Proxy proxy)
// {
// String next = null;
//
// fristText = fristText
// .replace("\\", "")
// .replace("'", "")
// .replace(" ", " ")
// .replace(""", "\"")
// .replace("&", "&")
// .replace("amp;", "")
// .replace("'", "'")
// .replace(">", ">")
// .replace("<", "<")
// .replace("¥", "¥")
// ;
// log.info("开始解析第一页文章");
//
// if(fristText.contains("window.appmsg_token = ") && fristText.contains("msgList =")){
// try {
// next = fristText.split("window.appmsg_token = \"")[1].split("\";")[0];
// fristText = fristText.split("msgList = ")[1].split("}}]};")[0]+"}}]}";
// getNextIdAndAnalysis(fristText,proxy);
// return next;
// } catch (Exception e) {
// log.info("截取下一页数据参数出现问题:{}",fristText);
// return null;
// }
// }else{
// log.info("获取下一页数据参数出现问题....{}",fristText);
// }
// return null;
// }
//
//
// /***
// * 解析微信历史文章下一页数据
// * @Description:
// * @param @param nextJosn
// * @param @param key
// * @param @param source
// * @param @return
// * @return String 返回类型
// */
// private String getNext(String nextHtml,Proxy proxy)
// {
// try {
// JSONObject nextJosn = JSONObject.parseObject(nextHtml);
// String nextText = null;
// if(null != nextJosn.getString("general_msg_list"))
// {
// nextText = nextJosn.getString("general_msg_list");
// getNextIdAndAnalysis(nextText,proxy);
// }else
// {
// log.info("下一页数据解析出现问题:{}", nextHtml);
// next = false;
// return null;
// }
// return nextJosn.getInteger("can_msg_continue")+"";
//
// } catch (Exception e) {
// log.info("解析数据有问题:{}", nextHtml);
// next = false;
// return null;
// }
//
//
// }
//
// /**
// * @Title: getNextIdAndAnalysis
// * @Description: TODO(解析下一页所需字段,及数据解析)
// * @param @param text
// * @param @param source
// * @param @return 设定文件
// * @return String 返回类型
// */
// public String getNextIdAndAnalysis(String text,Proxy proxy)
// {
// JSONObject wechatData = JSONObject.parseObject(text);
// JSONArray dataList = wechatData.getJSONArray("list");
// if(dataList.size()==0)
// {
// nextId = null;
// next = false;
// }else
// {
// for(int i = 0;i<dataList.size();i++)
// {
// JSONObject data = dataList.getJSONObject(i);
// //解析时间
// JSONObject dateJson = data.getJSONObject("comm_msg_info");
// long dateTime = dateJson.getLong("datetime");
// Date time = new Date(dateTime*1000);
// nextId = dateJson.getString("id");
// if(time.before(endDate))
// {
// next = false;
// nextId = null;
// }
// //解析文本数据
// if(null != data.getJSONObject("app_msg_ext_info"))
// {
// //解析头条数据
// JSONObject first = data.getJSONObject("app_msg_ext_info");
// String content_url = first.getString("content_url");
// String content = first.getString("digest");
// String title = first.getString("title");
// String img_url = first.getString("cover");
//
// WechatAricle wechatFirst = setWechat(content_url,title
// , time, img_url, content,"true",proxy);
// result.add(wechatFirst);
// //解析其余数据
// JSONArray otherJSON = first.getJSONArray("multi_app_msg_item_list");
// if(otherJSON != null)
// {
// for(int j = 0;j<otherJSON.size();j++)
// {
// JSONObject other = otherJSON.getJSONObject(j);
// String other_content_url = other.getString("content_url");
// String other_content = other.getString("digest");
// String other_title = other.getString("title");
// String other_img_url = other.getString("cover");
//
// WechatAricle wechatOther = setWechat(other_content_url,other_title
// , time, other_img_url, other_content,"false",proxy);
// result.add(wechatOther);
// }
// }else
// {
// log.info("只有一条数据");
// }
// }
ZhiWeiTools
.
sleep
(
3000
);
// }else
}
catch
(
Exception
e
)
{
// {
e
.
printStackTrace
();
// log.info("不存在相关文章......");
next
=
false
;
// }
}
// }
i
++;
// }
}
// return nextId;
// }
}
else
{
//
next
=
false
;
//
}
//
// /**
return
result
;
// * 给实体类对象赋值
}
// * @Description:
// * @param @param url
/***
// * @param @param title
* 获取公号历史文章
// * @param @param source
* @Description:
// * @param @param datetime
* @param @param url
// * @param @param key
* @param @param source
// * @param @return
* @param @return
// * @return Wechat 返回类型
* @return List<Wechat> 返回类型
// */
*/
// private WechatAricle setWechat(String url,String title,
@Deprecated
// Date datetime,String imgUrl,String content,String isFirst,Proxy proxy)
public
List
<
WechatAricle
>
getWechatDataFromHistoryOld
(
String
url
,
Proxy
proxy
)
// {
{
// WechatAricle wechat = new WechatAricle();
log
.
info
(
"url:::::::::{}"
,
url
);
// wechat.setId(url);
if
(
updateLike
)
// wechat.setTitle(title);
{
// wechat.setTime(datetime);
key
=
"&uin"
+
url
.
split
(
"uin"
)[
1
].
split
(
"devicetype"
)[
0
];
// wechat.setImgUrl(imgUrl);
}
// wechat.setIsFirst(isFirst);
// //采集文章
String
firstText
=
null
;
// String source = null;
try
{
// Map<String,String> sacMap = AriticleContent.getAriticleContent(url);
Map
<
String
,
String
>
cookieMap
=
HttpClientTemplateOK
.
getCookie
(
url
,
proxy
,
headerMap
);
// if(sacMap!=null)
//获取cookie
// {
headerMap
.
put
(
"Referer"
,
url
);
// source = sacMap.get("source");
headerMap
.
put
(
"Cookie"
,
cookieMap
.
get
(
"cookie"
));
// content = sacMap.get("content");
firstText
=
HttpClientTemplateOK
.
get
(
url
,
proxy
,
headerMap
);
// }
}
catch
(
Exception
e
)
{
// //更新点赞阅读数
e
.
printStackTrace
();
// if(updateLike)
return
null
;
// {
}
// url = url.replaceAll("amp;", "").replaceAll("amp;", "");
//采集下一页数据参数,并获取第一页数据
// try {
nextId
=
getFirstOld
(
firstText
,
proxy
);
// Thread.sleep(2000);
boolean
next
=
false
;
//判断是否有下一页
// WechatReadLike wcrl = WeChatReadAndLike.getReadAndLike(url,key,proxy);
if
(
follow
==
true
)
// wechat.setLikeNum(wcrl.getLike());
{
// wechat.setReadNum(wcrl.getRead());
next
=
true
;
// } catch (InterruptedException e) {
}
// wechat.setLikeNum(-1);
//循环读取微信公号历史数据
// wechat.setReadNum(-1);
while
(
next
)
// log.error("获取点赞阅读数出现为题,问题:{}", e.getMessage());
{
// }
//没有下一页数据,结束
// }
if
(
nextId
==
null
)
//
{
// wechat.setContent(content);
next
=
false
;
// wechat.setSource(source);
}
else
//采集下一页数据
// return wechat;
{
// }
String
nextUrl
=
url
.
replace
(
"home"
,
"getmsg"
)
+
"&f=json&frommsgid="
+
nextId
+
"&count=10&scene=123&is_ok=1"
;
//
log
.
info
(
"下一页地址:{}"
,
nextUrl
);
//
try
{
// public static void main(String[] args) {
//采集下一页数据参数,并获取此页数据
// String url = "http:\\/\\/mp.weixin.qq.com\\/s?__biz=MjM5NTU0MzI0MA==&mid=2661648551&idx=1&sn=74397ab60184beb0abd4dd3f8c62f7d3&chksm=bda7c9008ad04016a5eac88c8dd18b6bc5797ae780c56e307e11781af257a68a52b7f87dfd8e&scene=27#wechat_redirect";
headerMap
.
put
(
"Referer"
,
nextUrl
);
// System.out.println(url.replaceAll("\\", ""));
String
nextJson
=
HttpClientTemplateOK
.
get
(
nextUrl
,
null
,
headerMap
);
//
nextId
=
getNext
(
nextJson
,
proxy
);
// }
System
.
out
.
println
(
"nextId-============="
+
nextId
);
//
ZhiWeiTools
.
sleep
(
3000
);
//
}
catch
(
Exception
e
)
{
//}
e
.
printStackTrace
();
next
=
false
;
}
}
}
return
result
;
}
/**
* @Title: getFirst
* @Description: TODO(解析第一页数据)
* @param @param fristText
* @param @param source
* @param @return 设定文件
* @return String 返回类型
*/
@Deprecated
public
String
getFirstOld
(
String
fristText
,
Proxy
proxy
)
{
fristText
=
fristText
.
replace
(
"\\"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
" "
,
" "
)
.
replace
(
"""
,
"\""
)
.
replace
(
"&"
,
"&"
)
.
replace
(
"amp;"
,
""
)
.
replace
(
"'"
,
"'"
)
.
replace
(
">"
,
">"
)
.
replace
(
"<"
,
"<"
)
.
replace
(
"¥"
,
"¥"
)
;
log
.
info
(
"开始解析第一页文章"
);
// 截取HTML得到有用的JSON;替换掉转义字符
if
(
fristText
.
contains
(
"msgList ="
))
{
fristText
=
fristText
.
split
(
"msgList = "
)[
1
].
split
(
"}}]};"
)[
0
]+
"}}]}"
;
return
getNextIdAndAnalysis
(
fristText
,
proxy
);
}
return
null
;
}
/**
* @Title: getFirst
* @author hero
* @Description: 截取appmsg_token 值
* @param @param fristText
* @param @return 设定文件
* @return String 返回类型
*/
private
String
getFirst
(
String
fristText
,
Proxy
proxy
)
{
String
next
=
null
;
fristText
=
fristText
.
replace
(
"\\"
,
""
)
.
replace
(
"'"
,
""
)
.
replace
(
" "
,
" "
)
.
replace
(
"""
,
"\""
)
.
replace
(
"&"
,
"&"
)
.
replace
(
"amp;"
,
""
)
.
replace
(
"'"
,
"'"
)
.
replace
(
">"
,
">"
)
.
replace
(
"<"
,
"<"
)
.
replace
(
"¥"
,
"¥"
)
;
log
.
info
(
"开始解析第一页文章"
);
if
(
fristText
.
contains
(
"window.appmsg_token = "
)
&&
fristText
.
contains
(
"msgList ="
)){
try
{
next
=
fristText
.
split
(
"window.appmsg_token = \""
)[
1
].
split
(
"\";"
)[
0
];
fristText
=
fristText
.
split
(
"msgList = "
)[
1
].
split
(
"}}]};"
)[
0
]+
"}}]}"
;
getNextIdAndAnalysis
(
fristText
,
proxy
);
return
next
;
}
catch
(
Exception
e
)
{
log
.
info
(
"截取下一页数据参数出现问题:{}"
,
fristText
);
return
null
;
}
}
else
{
log
.
info
(
"获取下一页数据参数出现问题....{}"
,
fristText
);
}
return
null
;
}
/***
* 解析微信历史文章下一页数据
* @Description:
* @param @param nextJosn
* @param @param key
* @param @param source
* @param @return
* @return String 返回类型
*/
private
String
getNext
(
String
nextHtml
,
Proxy
proxy
)
{
try
{
JSONObject
nextJosn
=
JSONObject
.
parseObject
(
nextHtml
);
String
nextText
=
null
;
if
(
null
!=
nextJosn
.
getString
(
"general_msg_list"
))
{
nextText
=
nextJosn
.
getString
(
"general_msg_list"
);
getNextIdAndAnalysis
(
nextText
,
proxy
);
}
else
{
log
.
info
(
"下一页数据解析出现问题:{}"
,
nextHtml
);
next
=
false
;
return
null
;
}
return
nextJosn
.
getInteger
(
"can_msg_continue"
)+
""
;
}
catch
(
Exception
e
)
{
log
.
info
(
"解析数据有问题:{}"
,
nextHtml
);
next
=
false
;
return
null
;
}
}
/**
* @Title: getNextIdAndAnalysis
* @Description: TODO(解析下一页所需字段,及数据解析)
* @param @param text
* @param @param source
* @param @return 设定文件
* @return String 返回类型
*/
public
String
getNextIdAndAnalysis
(
String
text
,
Proxy
proxy
)
{
JSONObject
wechatData
=
JSONObject
.
parseObject
(
text
);
JSONArray
dataList
=
wechatData
.
getJSONArray
(
"list"
);
if
(
dataList
.
size
()==
0
)
{
nextId
=
null
;
next
=
false
;
}
else
{
for
(
int
i
=
0
;
i
<
dataList
.
size
();
i
++)
{
JSONObject
data
=
dataList
.
getJSONObject
(
i
);
//解析时间
JSONObject
dateJson
=
data
.
getJSONObject
(
"comm_msg_info"
);
long
dateTime
=
dateJson
.
getLong
(
"datetime"
);
Date
time
=
new
Date
(
dateTime
*
1000
);
nextId
=
dateJson
.
getString
(
"id"
);
if
(
time
.
before
(
endDate
))
{
next
=
false
;
nextId
=
null
;
}
//解析文本数据
if
(
null
!=
data
.
getJSONObject
(
"app_msg_ext_info"
))
{
//解析头条数据
JSONObject
first
=
data
.
getJSONObject
(
"app_msg_ext_info"
);
String
content_url
=
first
.
getString
(
"content_url"
);
String
content
=
first
.
getString
(
"digest"
);
String
title
=
first
.
getString
(
"title"
);
String
img_url
=
first
.
getString
(
"cover"
);
WechatAricle
wechatFirst
=
setWechat
(
content_url
,
title
,
time
,
img_url
,
content
,
"true"
,
proxy
);
result
.
add
(
wechatFirst
);
//解析其余数据
JSONArray
otherJSON
=
first
.
getJSONArray
(
"multi_app_msg_item_list"
);
if
(
otherJSON
!=
null
)
{
for
(
int
j
=
0
;
j
<
otherJSON
.
size
();
j
++)
{
JSONObject
other
=
otherJSON
.
getJSONObject
(
j
);
String
other_content_url
=
other
.
getString
(
"content_url"
);
String
other_content
=
other
.
getString
(
"digest"
);
String
other_title
=
other
.
getString
(
"title"
);
String
other_img_url
=
other
.
getString
(
"cover"
);
WechatAricle
wechatOther
=
setWechat
(
other_content_url
,
other_title
,
time
,
other_img_url
,
other_content
,
"false"
,
proxy
);
result
.
add
(
wechatOther
);
}
}
else
{
log
.
info
(
"只有一条数据"
);
}
}
else
{
log
.
info
(
"不存在相关文章......"
);
}
}
}
return
nextId
;
}
/**
* 给实体类对象赋值
* @Description:
* @param @param url
* @param @param title
* @param @param source
* @param @param datetime
* @param @param key
* @param @return
* @return Wechat 返回类型
*/
private
WechatAricle
setWechat
(
String
url
,
String
title
,
Date
datetime
,
String
imgUrl
,
String
content
,
String
isFirst
,
Proxy
proxy
)
{
WechatAricle
wechat
=
new
WechatAricle
();
wechat
.
setId
(
url
);
wechat
.
setTitle
(
title
);
wechat
.
setTime
(
datetime
);
wechat
.
setImgUrl
(
imgUrl
);
wechat
.
setIsFirst
(
isFirst
);
//采集文章
String
source
=
null
;
Map
<
String
,
String
>
sacMap
=
AriticleContent
.
getAriticleContent
(
url
);
if
(
sacMap
!=
null
)
{
source
=
sacMap
.
get
(
"source"
);
content
=
sacMap
.
get
(
"content"
);
}
//更新点赞阅读数
if
(
updateLike
)
{
url
=
url
.
replaceAll
(
"amp;"
,
""
).
replaceAll
(
"amp;"
,
""
);
try
{
Thread
.
sleep
(
2000
);
WechatReadLike
wcrl
=
WeChatReadAndLike
.
getReadAndLike
(
url
,
key
,
proxy
);
wechat
.
setLikeNum
(
wcrl
.
getLike
());
wechat
.
setReadNum
(
wcrl
.
getRead
());
}
catch
(
InterruptedException
e
)
{
wechat
.
setLikeNum
(-
1
);
wechat
.
setReadNum
(-
1
);
log
.
error
(
"获取点赞阅读数出现为题,问题:{}"
,
e
.
getMessage
());
}
}
wechat
.
setContent
(
content
);
wechat
.
setSource
(
source
);
return
wechat
;
}
public
static
void
main
(
String
[]
args
)
{
String
url
=
"http:\\/\\/mp.weixin.qq.com\\/s?__biz=MjM5NTU0MzI0MA==&mid=2661648551&idx=1&sn=74397ab60184beb0abd4dd3f8c62f7d3&chksm=bda7c9008ad04016a5eac88c8dd18b6bc5797ae780c56e307e11781af257a68a52b7f87dfd8e&scene=27#wechat_redirect"
;
System
.
out
.
println
(
url
.
replaceAll
(
"\\"
,
""
));
}
}
src/main/java/com/zhiwei/wechat/readAndLike/AriticleContent.java
View file @
2c702467
...
@@ -17,8 +17,8 @@ import org.jsoup.nodes.Document;
...
@@ -17,8 +17,8 @@ import org.jsoup.nodes.Document;
import
org.slf4j.Logger
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.
tools.httpclient.HttpClientTemplateOK
;
import
com.zhiwei.
crawler.core.HttpBoot
;
import
com.zhiwei.
wechat.comment.WechatCommentList
;
import
com.zhiwei.
crawler.utils.RequestUtils
;
import
com.zhiwei.wechat.util.Tools
;
import
com.zhiwei.wechat.util.Tools
;
/**
/**
...
@@ -28,7 +28,8 @@ import com.zhiwei.wechat.util.Tools;
...
@@ -28,7 +28,8 @@ import com.zhiwei.wechat.util.Tools;
*/
*/
public
class
AriticleContent
{
public
class
AriticleContent
{
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatCommentList
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
AriticleContent
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
/**
...
@@ -47,7 +48,7 @@ public class AriticleContent{
...
@@ -47,7 +48,7 @@ public class AriticleContent{
String
content
=
null
;
String
content
=
null
;
String
source
=
null
;
String
source
=
null
;
try
{
try
{
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
(
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
content
=
document
.
select
(
"div.rich_media_content"
).
text
();
content
=
document
.
select
(
"div.rich_media_content"
).
text
();
if
(
htmlBody
.
contains
(
"var nickname = "
)){
if
(
htmlBody
.
contains
(
"var nickname = "
)){
...
@@ -79,7 +80,7 @@ public class AriticleContent{
...
@@ -79,7 +80,7 @@ public class AriticleContent{
headerMap
.
put
(
"Referer"
,
url
);
headerMap
.
put
(
"Referer"
,
url
);
String
comment_id
=
null
;
String
comment_id
=
null
;
try
{
try
{
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
(
);
if
(
htmlBody
!=
null
)
if
(
htmlBody
!=
null
)
{
{
Document
document
=
Jsoup
.
parse
(
htmlBody
);
Document
document
=
Jsoup
.
parse
(
htmlBody
);
...
...
src/main/java/com/zhiwei/wechat/readAndLike/WeChatReadAndLike.java
View file @
2c702467
/**
/
//
**
* @Title: WindowsClient.java
//
* @Title: WindowsClient.java
* @Package com.wcral.client
//
* @Package com.wcral.client
* @Description: TODO(用一句话描述该文件做什么)
//
* @Description: TODO(用一句话描述该文件做什么)
* @author Bewilder Z
//
* @author Bewilder Z
* @date 2015年8月6日 上午9:13:37
//
* @date 2015年8月6日 上午9:13:37
* @version V1.0
//
* @version V1.0
*/
//
*/
//
package
com
.
zhiwei
.
wechat
.
readAndLike
;
//
package com.zhiwei.wechat.readAndLike;
//
import
java.net.Proxy
;
//
import java.net.Proxy;
import
java.net.URLEncoder
;
//
import java.net.URLEncoder;
import
java.util.HashMap
;
//
import java.util.HashMap;
import
java.util.Map
;
//
import java.util.Map;
//
import
org.jsoup.Jsoup
;
//
import org.jsoup.Jsoup;
import
org.jsoup.nodes.Document
;
//
import org.jsoup.nodes.Document;
import
org.jsoup.nodes.Element
;
//
import org.jsoup.nodes.Element;
import
org.jsoup.select.Elements
;
//
import org.jsoup.select.Elements;
import
org.slf4j.Logger
;
//
import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
//
import org.slf4j.LoggerFactory;
//
import
com.alibaba.fastjson.JSONObject
;
//
import com.alibaba.fastjson.JSONObject;
import
com.zhiwei.tools.httpclient.HttpClientTemplateOK
;
//
import com.zhiwei.tools.httpclient.HttpClientTemplateOK;
import
com.zhiwei.wechat.entity.WechatReadLike
;
//
import com.zhiwei.wechat.entity.WechatReadLike;
import
com.zhiwei.wechat.search.WechatAritcleSearch
;
//
import com.zhiwei.wechat.search.WechatAritcleSearch;
import
com.zhiwei.wechat.util.Tools
;
//
import com.zhiwei.wechat.util.Tools;
//
/**
/
//
**
* @ClassName: WindowsClient
//
* @ClassName: WindowsClient
* @Description: TODO(利用windows客戶端進行点赞阅读抓取)
//
* @Description: TODO(利用windows客戶端進行点赞阅读抓取)
* @author Abner Liu
//
* @author Abner Liu
* @date 2015年8月6日 上午9:13:37
//
* @date 2015年8月6日 上午9:13:37
*/
//
*/
public
class
WeChatReadAndLike
{
//
public class WeChatReadAndLike {
//
//
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WeChatReadAndLike
.
class
);
//
private static Logger logger = LoggerFactory.getLogger(WeChatReadAndLike.class);
//
/**
//
/**
*
//
*
* @Title: getReadAndLike
//
* @Title: getReadAndLike
* @Description: 利用windows客戶端進行点赞阅读抓取
//
* @Description: 利用windows客戶端進行点赞阅读抓取
* @param url
//
* @param url
* 微信文章链接
//
* 微信文章链接
* @return WeChatReadLike 微信文章实体类
//
* @return WeChatReadLike 微信文章实体类
*
//
*
*/
//
*/
public
static
WechatReadLike
getReadAndLike
(
String
url
,
String
key
,
Proxy
proxy
){
//
public static WechatReadLike getReadAndLike(String url,String key,Proxy proxy){
WechatReadLike
wLike
=
new
WechatReadLike
();
//
WechatReadLike wLike = new WechatReadLike();
try
{
//
try {
String
urlcookie
=
Tools
.
getWechatCookieUrl
(
url
,
key
);
//
String urlcookie = Tools.getWechatCookieUrl(url, key);
// 请求头信息
//
// 请求头信息
Map
<
String
,
String
>
headerMap
=
Tools
.
getWechatHeader
();
//
Map<String,String> headerMap = Tools.getWechatHeader();
Map
<
String
,
String
>
cookieMap
=
HttpClientTemplateOK
.
getCookie
(
urlcookie
,
proxy
,
headerMap
);
//
Map<String,String> cookieMap = HttpClientTemplateOK.getCookie(urlcookie, proxy, headerMap);
//
headerMap
.
put
(
"Referer"
,
urlcookie
);
//
headerMap.put("Referer", urlcookie);
headerMap
.
put
(
"Cookie"
,
cookieMap
.
get
(
"cookie"
)+
""
);
//
headerMap.put("Cookie", cookieMap.get("cookie")+"");
String
appmsg_token
=
Tools
.
getAppMsgToken
(
cookieMap
.
get
(
"htmlBody"
));
//
String appmsg_token = Tools.getAppMsgToken(cookieMap.get("htmlBody"));
System
.
out
.
println
(
"appmsg_token==========="
+
appmsg_token
);
//
System.out.println("appmsg_token==========="+appmsg_token);
String
urlLike
=
Tools
.
getWechatLikeUrl
(
urlcookie
,
appmsg_token
);
//
String urlLike = Tools.getWechatLikeUrl(urlcookie,appmsg_token);
//设置post请求参数
//
//设置post请求参数
HashMap
<
String
,
Object
>
postMap
=
new
HashMap
<
String
,
Object
>();
//
HashMap<String,Object> postMap = new HashMap<String,Object>();
postMap
.
put
(
"is_only_read"
,
"1"
);
//
postMap.put("is_only_read", "1");
//
//获取数据
//
//获取数据
String
htsString
=
HttpClientTemplateOK
.
post
(
urlLike
,
proxy
,
headerMap
,
postMap
);
//
String htsString = HttpClientTemplateOK.post(urlLike, proxy, headerMap ,postMap);
System
.
out
.
println
(
htsString
);
//
System.out.println(htsString);
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htsString
);
//
JSONObject jsonObject = JSONObject.parseObject(htsString);
String
like_num
=
jsonObject
.
getJSONObject
(
"appmsgstat"
)
//
String like_num = jsonObject.getJSONObject("appmsgstat")
.
get
(
"like_num"
).
toString
();
//
.get("like_num").toString();
//
String
real_read_num
=
""
;
//
String real_read_num = "";
try
{
//
try {
real_read_num
=
jsonObject
.
getJSONObject
(
"appmsgstat"
)
//
real_read_num = jsonObject.getJSONObject("appmsgstat")
.
get
(
"real_read_num"
).
toString
();
//
.get("real_read_num").toString();
if
(
real_read_num
.
equals
(
"0"
))
//
if(real_read_num.equals("0"))
{
//
{
real_read_num
=
jsonObject
.
getJSONObject
(
"appmsgstat"
)
//
real_read_num = jsonObject.getJSONObject("appmsgstat")
.
get
(
"read_num"
).
toString
();
//
.get("read_num").toString();
}
//
}
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
real_read_num
=
jsonObject
.
getJSONObject
(
"appmsgstat"
)
//
real_read_num = jsonObject.getJSONObject("appmsgstat")
.
get
(
"read_num"
).
toString
();
//
.get("read_num").toString();
}
//
}
wLike
.
setUrl
(
url
);
//
wLike.setUrl(url);
wLike
.
setRead
(
Integer
.
valueOf
(
real_read_num
));
//
wLike.setRead(Integer.valueOf(real_read_num));
wLike
.
setLike
(
Integer
.
valueOf
(
like_num
));
//
wLike.setLike(Integer.valueOf(like_num));
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
wLike
.
setUrl
(
url
);
//
wLike.setUrl(url);
wLike
.
setRead
(-
1
);
//
wLike.setRead(-1);
wLike
.
setLike
(-
1
);
//
wLike.setLike(-1);
}
//
}
return
wLike
;
//
return wLike;
}
//
}
//
//
//
/**
//
/**
* @Title: getReadAndLike
//
* @Title: getReadAndLike
* @Description: TODO(通过搜狗微信获取阅读数)
//
* @Description: TODO(通过搜狗微信获取阅读数)
* @param @param word
//
* @param @param word
* @param @param time
//
* @param @param time
* @param @param link
//
* @param @param link
* @param @param wxId
//
* @param @param wxId
* @param @return 设定文件
//
* @param @return 设定文件
* @return WeChatReadLike 返回类型
//
* @return WeChatReadLike 返回类型
*/
//
*/
public
static
WechatReadLike
getReadAndLike
(
String
word
,
//
public static WechatReadLike getReadAndLike(String word,
String
time
,
String
link
,
String
wxId
){
//
String time,String link,String wxId){
//
WechatReadLike
wLike
=
new
WechatReadLike
();
//
WechatReadLike wLike = new WechatReadLike();
//
Map
<
String
,
String
>
headerMap
=
new
HashMap
<
String
,
String
>();
//
Map<String,String> headerMap = new HashMap<String,String>();
headerMap
.
put
(
"Upgrade-Insecure-Requests"
,
"1"
);
//
headerMap.put("Upgrade-Insecure-Requests", "1");
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
);
//
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
//
headerMap.put("Host","weixin.sogou.com");
//
if
(
time
.
contains
(
" "
))
//
if(time.contains(" "))
{
//
{
time
=
time
.
split
(
" "
)[
0
];
//
time = time.split(" ")[0];
}
//
}
//
String
openid
=
WechatAritcleSearch
.
getOpenId
(
wxId
,
null
);
//
String openid = WechatAritcleSearch.getOpenId(wxId,null);
logger
.
info
(
"openid is {}"
,
openid
);
//
logger.info("openid is {}", openid);
//
try
{
//
try {
String
url
=
"http://weixin.sogou.com/weixin?query="
+
URLEncoder
.
encode
(
word
,
"utf-8"
)
//
String url = "http://weixin.sogou.com/weixin?query=" + URLEncoder.encode(word,"utf-8")
+
"&type=2&ie=utf8&page=1&interation=&tsn=5&ft="
+
time
+
"&et="
+
time
//
+ "&type=2&ie=utf8&page=1&interation=&tsn=5&ft="+time + "&et="+ time
+
"&wxid="
+
openid
+
"&usip="
+
wxId
+
"&from=tool"
;
//
+ "&wxid="+openid+"&usip="+wxId+"&from=tool";
//
logger
.
info
(
"url is {}"
,
url
);
//
logger.info("url is {}",url);
//
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
//
String htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if
(
htmlBody
!=
null
)
//
if(htmlBody!=null)
{
//
{
try
{
//
try {
// 解析数据
//
// 解析数据
Document
document
=
Jsoup
.
parse
(
htmlBody
);
//
Document document = Jsoup.parse(htmlBody);
Elements
elements
=
document
.
select
(
"div.news-box"
)
//
Elements elements = document.select("div.news-box")
.
select
(
"ul.news-list"
).
select
(
"li"
);
//
.select("ul.news-list").select("li");
for
(
Element
element
:
elements
)
//
for (Element element : elements)
{
//
{
try
{
//
try {
String
url_link
=
element
.
select
(
"div.txt-box"
).
select
(
"h3 >a"
).
attr
(
"href"
);
//
String url_link = element.select("div.txt-box").select("h3 >a").attr("href");
int
readNum
=
0
;
//
int readNum = 0;
try
{
//
try {
readNum
=
Integer
.
valueOf
(
element
.
select
(
"div.txt-box"
)
//
readNum = Integer.valueOf(element.select("div.txt-box")
.
select
(
"div.s-p"
).
select
(
"span.s1"
).
text
().
trim
());
//
.select("div.s-p").select("span.s1").text().trim());
logger
.
info
(
"readNum is {}"
,
readNum
);
//
logger.info("readNum is {}", readNum);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
readNum
=
0
;
//
readNum = 0;
}
//
}
if
(
url_link
.
contains
(
"&chksm="
))
//
if(url_link.contains("&chksm="))
{
//
{
url_link
=
url_link
.
split
(
"&chksm="
)[
0
]
+
"&3rd"
+
url_link
.
split
(
"&3rd"
)[
1
];
//
url_link = url_link.split("&chksm=")[0] + "&3rd" + url_link.split("&3rd")[1];
}
//
}
//
if
(
link
.
equals
(
url_link
))
//
if(link.equals(url_link))
{
//
{
wLike
.
setUrl
(
link
);
//
wLike.setUrl(link);
wLike
.
setRead
(
readNum
);
//
wLike.setRead(readNum);
break
;
//
break;
}
//
}
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
continue
;
//
continue;
}
//
}
}
//
}
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
wLike
.
setUrl
(
link
);
//
wLike.setUrl(link);
wLike
.
setRead
(
0
);
//
wLike.setRead(0);
return
null
;
//
return null;
}
//
}
}
//
}
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
wLike
.
setUrl
(
link
);
//
wLike.setUrl(link);
wLike
.
setRead
(
0
);
//
wLike.setRead(0);
return
null
;
//
return null;
}
//
}
return
wLike
;
//
return wLike;
}
//
}
//
}
//
}
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
View file @
2c702467
...
@@ -35,8 +35,8 @@ import com.zhiwei.wechat.entity.WechatAricle;
...
@@ -35,8 +35,8 @@ import com.zhiwei.wechat.entity.WechatAricle;
public
class
WechatAritcleSearch
{
public
class
WechatAritcleSearch
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
WechatAritcleSearch
.
class
);
private
static
Logger
logger
=
LogManager
.
getLogger
(
WechatAritcleSearch
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
/**
*
*
* @Title: wechatKeywordSearch
* @Title: wechatKeywordSearch
...
...
src/main/java/com/zhiwei/wechat/search/WechatCount.java
View file @
2c702467
...
@@ -13,7 +13,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
...
@@ -13,7 +13,7 @@ import com.zhiwei.crawler.utils.RequestUtils;
public
class
WechatCount
{
public
class
WechatCount
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
public
static
int
getWechatCountByWord
(
String
word
,
String
cookie
,
public
static
int
getWechatCountByWord
(
String
word
,
String
cookie
,
String
startTime
,
String
endTime
,
Proxy
proxy
)
{
String
startTime
,
String
endTime
,
Proxy
proxy
)
{
...
...
src/main/java/com/zhiwei/wechat/search/WechatIndex.java
View file @
2c702467
...
@@ -5,7 +5,8 @@ import java.util.HashMap;
...
@@ -5,7 +5,8 @@ import java.util.HashMap;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Map.Entry
;
import
java.util.Map.Entry
;
import
com.zhiwei.tools.httpclient.HttpClientTemplateOK
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.utils.RequestUtils
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.timeparse.TimeParse
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
import
com.zhiwei.tools.tools.URLCodeUtil
;
...
@@ -17,7 +18,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
...
@@ -17,7 +18,7 @@ import com.zhiwei.tools.tools.URLCodeUtil;
*/
*/
public
class
WechatIndex
{
public
class
WechatIndex
{
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
public
static
void
main
(
String
[]
args
)
throws
Exception
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
...
@@ -53,7 +54,7 @@ public class WechatIndex {
...
@@ -53,7 +54,7 @@ public class WechatIndex {
headerMap
.
put
(
"Accept"
,
"application/json, text/javascript, */*; q=0.01"
);
headerMap
.
put
(
"Accept"
,
"application/json, text/javascript, */*; q=0.01"
);
headerMap
.
put
(
"Cookie"
,
"mmsearch_user_key=AStrb5tD4ruSixIDu1cVpTA=; pass_ticket=bbP7ZT5xEUrYe+oOa6ACUw+mgR05TAGGA1P9xnC7fIyaaOnwkWyNQK8aYtva+Gxj; pgv_pvi=4102772736; pgv_si=s1607859200; pgv_pvid=153672700"
);
headerMap
.
put
(
"Cookie"
,
"mmsearch_user_key=AStrb5tD4ruSixIDu1cVpTA=; pass_ticket=bbP7ZT5xEUrYe+oOa6ACUw+mgR05TAGGA1P9xnC7fIyaaOnwkWyNQK8aYtva+Gxj; pgv_pvi=4102772736; pgv_si=s1607859200; pgv_pvid=153672700"
);
String
htmlBody
=
HttpClientTemplateOK
.
get
(
url
,
null
,
headerMap
);
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headerMap
)).
body
().
string
(
);
System
.
out
.
println
(
htmlBody
);
System
.
out
.
println
(
htmlBody
);
Thread
.
sleep
(
3000
);
Thread
.
sleep
(
3000
);
...
...
src/test/java/com/zhiwei/wechat/example/WechatDataFromHistoryExample.java
View file @
2c702467
/**
/
//
**
* @Title: WechatDataFromHistoryExample.java
//
* @Title: WechatDataFromHistoryExample.java
* @Package com.zhiwei.wechat.example
//
* @Package com.zhiwei.wechat.example
* @Description:微信采集历史文章测试
//
* @Description:微信采集历史文章测试
* @author hero
//
* @author hero
* @date 2016年5月20日 下午5:47:56
//
* @date 2016年5月20日 下午5:47:56
* @version V1.0
//
* @version V1.0
*/
//
*/
/**
/
//
**
*
//
*
*/
//
*/
package
com
.
zhiwei
.
wechat
.
example
;
//
package com.zhiwei.wechat.example;
//
import
java.util.ArrayList
;
//
import java.util.ArrayList;
import
java.util.List
;
//
import java.util.List;
//
import
com.zhiwei.wechat.entity.WechatAricle
;
//
import com.zhiwei.wechat.entity.WechatAricle;
import
com.zhiwei.wechat.history.WechatDataFromHistory
;
//
import com.zhiwei.wechat.history.WechatDataFromHistory;
//
/**
/
//
**
* @Description:微信采集历史文章测试
//
* @Description:微信采集历史文章测试
* @author hero
//
* @author hero
* @date 2016年5月20日 下午5:47:56
//
* @date 2016年5月20日 下午5:47:56
*/
//
*/
public
class
WechatDataFromHistoryExample
{
//
public class WechatDataFromHistoryExample {
//
public
static
void
main
(
String
[]
args
)
{
//
public static void main(String[] args) {
boolean
updateLike
=
false
;
//
boolean updateLike = false;
boolean
follow
=
true
;
//
boolean follow = true;
String
endDate
=
"2017-01-27"
;
//
String endDate = "2017-01-27";
try
{
//
try {
List
<
String
>
urllist
=
new
ArrayList
<
String
>();
//
List<String> urllist = new ArrayList<String>();
urllist
.
add
(
"https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1"
);
//
urllist.add("https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NTU0MzI0MA==&scene=124&uin=MTE4OTQyMDc0MQ%3D%3D&key=df62f0a2a8b7732dca2d1f886b5bd15c398e1fe92940e352837738ea99e5ddc531fc24d5d57a5a43eab11df1e4db7db80aeeddfc06c8f410e159d80df4f822c07c555b4b536b52593f132f39c6868698&devicetype=Windows+8&version=6203005d&lang=zh_CN&a8scene=7&pass_ticket=nMJ5n97UE%2BxdJKqeKp3ovi8slnCMNSYF6Tu%2FgsQ4Phk%2Bc%2B%2BDM5AQy7LT6H%2BBQTc5&winzoom=1");
System
.
out
.
println
(
urllist
.
size
());
//
System.out.println(urllist.size());
int
i
=
0
;
//
int i = 0;
for
(
String
s
:
urllist
)
{
//
for (String s : urllist) {
System
.
out
.
println
(
"i==========="
+
i
);
//
System.out.println("i===========" + i);
String
url
=
s
.
split
(
","
)[
0
];
//
String url = s.split(",")[0];
// String source = s.split(",")[1];
//
//
String source = s.split(",")[1];
//
WechatDataFromHistory
wdfh
=
new
WechatDataFromHistory
(
updateLike
,
endDate
,
follow
);
//
WechatDataFromHistory wdfh = new WechatDataFromHistory(updateLike,endDate,follow);
System
.
out
.
println
(
url
);
//
System.out.println(url);
List
<
WechatAricle
>
list
=
wdfh
.
getWechatDataFromHistory
(
url
,
null
);
//
List<WechatAricle> list = wdfh.getWechatDataFromHistory(url,null);
System
.
out
.
println
(
"list size is :"
+
list
.
size
());
//
System.out.println("list size is :" + list.size());
//
}
//
}
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
}
//
}
//
//
}
//
}
src/test/java/com/zhiwei/wechat/example/WechatSearchExample.java
View file @
2c702467
...
@@ -40,13 +40,11 @@ public class WechatSearchExample{
...
@@ -40,13 +40,11 @@ public class WechatSearchExample{
public
static
void
wechatSearchExample
()
throws
UnknownHostException
public
static
void
wechatSearchExample
()
throws
UnknownHostException
{
{
List
<
String
>
wordList
=
new
ArrayList
<
String
>();
List
<
String
>
wordList
=
new
ArrayList
<
String
>();
wordList
.
add
(
"工业互联网"
);
wordList
.
add
(
"京东"
);
String
idOrName
=
"吴晓波频道"
;
for
(
String
word
:
wordList
)
for
(
String
word
:
wordList
)
{
{
try
{
try
{
List
<
WechatAricle
>
list
=
WechatAritcleSearch
.
wechatKeywordSearch
ByAccount
(
word
,
idOrName
,
"2017-12-01"
,
"2018-12-01"
,
ProxyHolder
.
SOUGOU_INNER_PROXY
);
List
<
WechatAricle
>
list
=
WechatAritcleSearch
.
wechatKeywordSearch
(
word
,
5
,
null
,
"2019-04-08"
,
"2019-04-08"
,
ProxyHolder
.
SOUGOU_INNER_PROXY
.
getProxy
()
);
System
.
out
.
println
(
"======"
+
list
.
size
());
System
.
out
.
println
(
"======"
+
list
.
size
());
for
(
WechatAricle
wechat
:
list
){
for
(
WechatAricle
wechat
:
list
){
System
.
out
.
println
(
wechat
.
getTitle
());
System
.
out
.
println
(
wechat
.
getTitle
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment