Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wechat
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
wechat
Commits
718abf10
Commit
718abf10
authored
Mar 19, 2019
by
yangchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
微信 关键词采集增加cookie
parent
4287aea9
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
5 deletions
+15
-5
src/main/java/com/zhiwei/wechat/account/WechatAccountInfo.java
+1
-0
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
+14
-5
No files found.
src/main/java/com/zhiwei/wechat/account/WechatAccountInfo.java
View file @
718abf10
...
@@ -22,6 +22,7 @@ public class WechatAccountInfo {
...
@@ -22,6 +22,7 @@ public class WechatAccountInfo {
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
();
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatAccountInfo
.
class
);
private
static
Logger
logger
=
LoggerFactory
.
getLogger
(
WechatAccountInfo
.
class
);
/***
/***
* @Title: getWechatAccount
* @Title: getWechatAccount
* @Description: TODO(根据帐号id查询帐号信息)
* @Description: TODO(根据帐号id查询帐号信息)
...
...
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
View file @
718abf10
...
@@ -8,6 +8,7 @@ import java.util.ArrayList;
...
@@ -8,6 +8,7 @@ import java.util.ArrayList;
import
java.util.Date
;
import
java.util.Date
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Objects
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
org.apache.logging.log4j.Logger
;
...
@@ -59,10 +60,13 @@ public class WechatAritcleSearch {
...
@@ -59,10 +60,13 @@ public class WechatAritcleSearch {
* @return List<Wechat> 返回类型
* @return List<Wechat> 返回类型
*/
*/
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
int
tsn
,
String
startTime
,
String
endTime
,
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
int
tsn
,
String
startTime
,
String
endTime
,
Proxy
proxy
)
throws
Exception
,
UnsupportedEncodingException
{
Proxy
proxy
,
String
cookie
)
throws
Exception
,
UnsupportedEncodingException
{
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
if
(
Objects
.
nonNull
(
cookie
))
{
headerMap
.
put
(
"cookie"
,
cookie
);
}
boolean
f
=
true
;
boolean
f
=
true
;
int
page
=
1
;
int
page
=
1
;
...
@@ -119,13 +123,13 @@ public class WechatAritcleSearch {
...
@@ -119,13 +123,13 @@ public class WechatAritcleSearch {
wechat
=
new
WechatAricle
(
link
,
title
,
source
,
content
,
date
,
readNum
,
0
,
openid
,
"unknow"
);
wechat
=
new
WechatAricle
(
link
,
title
,
source
,
content
,
date
,
readNum
,
0
,
openid
,
"unknow"
);
result
.
add
(
wechat
);
result
.
add
(
wechat
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
debug
(
"解析数据出现错误:{}"
,
e
.
getMessage
());
logger
.
debug
(
"解析数据出现错误:{}"
,
e
);
continue
;
}
}
}
}
// 解析最大可寻页码
// 解析最大可寻页码
String
pageNext
=
document
.
select
(
"[id=pagebar_container]>a"
).
text
();
String
pageNext
=
document
.
select
(
"[id=pagebar_container]>a"
).
text
();
if
(
pageNext
.
contains
(
"下一页"
))
{
if
(
pageNext
.
contains
(
"下一页"
))
{
// logger.info("采集到 {} 页" , page);
page
++;
page
++;
}
else
{
}
else
{
f
=
false
;
f
=
false
;
...
@@ -133,7 +137,7 @@ public class WechatAritcleSearch {
...
@@ -133,7 +137,7 @@ public class WechatAritcleSearch {
// logger.info("数据总页数为:{}", page);
// logger.info("数据总页数为:{}", page);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
logger
.
debug
(
"获取数据出现问题:{}"
,
e
.
getMessage
());
logger
.
debug
(
"获取数据出现问题:{}"
,
e
.
getMessage
());
return
null
;
return
result
;
}
}
}
else
{
}
else
{
logger
.
info
(
"根据关键词获取微信文章失败,返回的数据结果集: {}"
,
htmlBody
);
logger
.
info
(
"根据关键词获取微信文章失败,返回的数据结果集: {}"
,
htmlBody
);
...
@@ -424,16 +428,21 @@ public class WechatAritcleSearch {
...
@@ -424,16 +428,21 @@ public class WechatAritcleSearch {
String
openId
=
null
;
String
openId
=
null
;
String
url
=
"https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query="
+
URLCodeUtil
.
getURLEncode
(
idOrName
,
"utf-8"
);
String
url
=
"https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query="
+
URLCodeUtil
.
getURLEncode
(
idOrName
,
"utf-8"
);
String
htmlBody
;
String
htmlBody
;
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
try
{
try
{
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxyHolder
,
true
).
body
().
string
();
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
),
proxyHolder
).
body
().
string
();
System
.
out
.
println
(
htmlBody
);
System
.
out
.
println
(
htmlBody
);
if
(
htmlBody
!=
null
)
{
if
(
htmlBody
!=
null
)
{
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
json
=
JSONObject
.
parseObject
(
htmlBody
);
openId
=
json
.
getString
(
"openid"
);
openId
=
json
.
getString
(
"openid"
);
return
openId
;
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
openId
=
null
;
openId
=
null
;
}
}
}
return
openId
;
return
openId
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment