Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wechat
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
wechat
Commits
ab9c3fd4
Commit
ab9c3fd4
authored
Aug 25, 2020
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
处理微信链接出现重复拼接问题
parent
1cbcc794
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
65 deletions
+51
-65
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
+51
-65
No files found.
src/main/java/com/zhiwei/wechat/search/WechatAritcleSearch.java
View file @
ab9c3fd4
...
@@ -7,6 +7,7 @@ import java.net.URLEncoder;
...
@@ -7,6 +7,7 @@ import java.net.URLEncoder;
import
java.util.*
;;
import
java.util.*
;;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.JSONObject
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.utils.URIUtils
;
import
com.zhiwei.wechat.util.HtmlDownUtil
;
import
com.zhiwei.wechat.util.HtmlDownUtil
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.LogManager
;
...
@@ -20,9 +21,9 @@ import org.seimicrawler.xpath.JXDocument;
...
@@ -20,9 +21,9 @@ import org.seimicrawler.xpath.JXDocument;
import
org.seimicrawler.xpath.JXNode
;
import
org.seimicrawler.xpath.JXNode
;
/**
/**
* @author Bewilder Z
* @ClassName: WechatAritcleSearch
* @ClassName: WechatAritcleSearch
* @Description: TODO(在搜索接口根据关键词采集微信文章)
* @Description: TODO(在搜索接口根据关键词采集微信文章)
* @author Bewilder Z
* @date 2016年10月14日 上午9:40:18
* @date 2016年10月14日 上午9:40:18
*/
*/
public
class
WechatAritcleSearch
{
public
class
WechatAritcleSearch
{
...
@@ -31,18 +32,15 @@ public class WechatAritcleSearch {
...
@@ -31,18 +32,15 @@ public class WechatAritcleSearch {
/**
/**
* 根据关键词在搜狗微信搜索微信文章,不包含全文
* 根据关键词在搜狗微信搜索微信文章,不包含全文
* @Title: wechatKeywordSearch
*
* @param
* @param word 关键词
* word 关键词
* @param proxy 代理
* @param
* @param pages 需要限制返回页数的总页数(如返回前20页则传21),如没有限制页数则传null
* proxy 代理
* @param
* pages 需要限制返回页数的总页数(如返回前20页则传21),如没有限制页数则传null
* @throws
* Exception
* @return List<Wechat> 返回类型
* @return List<Wechat> 返回类型
* @throws Exception
* @Title: wechatKeywordSearch
*/
*/
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
Proxy
proxy
,
Integer
pages
)
throws
Exception
{
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
Proxy
proxy
,
Integer
pages
)
throws
Exception
{
List
<
WechatAricle
>
result
=
new
ArrayList
<>();
List
<
WechatAricle
>
result
=
new
ArrayList
<>();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
...
@@ -51,10 +49,10 @@ public class WechatAritcleSearch {
...
@@ -51,10 +49,10 @@ public class WechatAritcleSearch {
int
page
=
1
;
int
page
=
1
;
while
(
f
)
{
while
(
f
)
{
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&s_from=input&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&ie=utf8&_sug_=n&_sug_type_=&page="
+
page
;
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&s_from=input&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
+
"&ie=utf8&_sug_=n&_sug_type_=&page="
+
page
;
headerMap
.
put
(
"Referer"
,
searchUrl
);
headerMap
.
put
(
"Referer"
,
searchUrl
);
// 获取数据
// 获取数据
try
{
try
{
String
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
searchUrl
,
headerMap
,
proxy
);
String
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
searchUrl
,
headerMap
,
proxy
);
// 解析数据
// 解析数据
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
...
@@ -70,10 +68,10 @@ public class WechatAritcleSearch {
...
@@ -70,10 +68,10 @@ public class WechatAritcleSearch {
}
else
{
}
else
{
logger
.
info
(
"根据关键词获取微信文章失败,返回的数据结果集: {}"
,
htmlBody
);
logger
.
info
(
"根据关键词获取微信文章失败,返回的数据结果集: {}"
,
htmlBody
);
}
}
if
(
pages
!=
null
&&
pages
==
page
)
{
if
(
pages
!=
null
&&
pages
==
page
)
{
break
;
break
;
}
}
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
logger
.
error
(
"根据关键词获取微信文章失败,错误为: {}"
,
e
);
logger
.
error
(
"根据关键词获取微信文章失败,错误为: {}"
,
e
);
}
}
...
@@ -82,28 +80,18 @@ public class WechatAritcleSearch {
...
@@ -82,28 +80,18 @@ public class WechatAritcleSearch {
}
}
/**
/**
*
* @param @param word 关键词
* @Title: wechatKeywordSearch
* @param @param tsn 采集时间范围:1(1天内);2(一周内);3(一月内);4(一年内);
* @Description: 根据关键词在搜狗微信搜索微信文章,包含全文
* @param @param
* word 关键词
* @param @param
* tsn 采集时间范围:1(1天内);2(一周内);3(一月内);4(一年内);
* 5(某一时间段内与startTime和endTime配合使用)
* 5(某一时间段内与startTime和endTime配合使用)
* @param @param
* @param @param startTime 开始时间 格式为yyyy-MM-dd
* startTime 开始时间 格式为yyyy-MM-dd
* @param @param endTime 结束时间 格式为yyyy-MM-dd
* @param @param
* endTime 结束时间 格式为yyyy-MM-dd
* @param @return
* @param @return
* @param @throws
* @param @throws ZhiWeiException
* ZhiWeiException
* @param @throws UnsupportedEncodingException 设定文件
* @param @throws
* UnsupportedEncodingException 设定文件
* @return List<Wechat> 返回类型
* @return List<Wechat> 返回类型
* @Title: wechatKeywordSearch
* @Description: 根据关键词在搜狗微信搜索微信文章, 包含全文
*/
*/
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
public
static
List
<
WechatAricle
>
wechatKeywordSearch
(
String
word
,
Proxy
proxy
,
ProxyHolder
proxyHolder
)
throws
Exception
{
Proxy
proxy
,
ProxyHolder
proxyHolder
)
throws
Exception
{
...
@@ -114,7 +102,7 @@ public class WechatAritcleSearch {
...
@@ -114,7 +102,7 @@ public class WechatAritcleSearch {
boolean
f
=
true
;
boolean
f
=
true
;
int
page
=
1
;
int
page
=
1
;
while
(
f
)
{
while
(
f
)
{
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&s_from=input&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)+
"&ie=utf8&_sug_=n&_sug_type_=&page="
+
page
;
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&s_from=input&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
+
"&ie=utf8&_sug_=n&_sug_type_=&page="
+
page
;
headerMap
.
put
(
"Referer"
,
searchUrl
);
headerMap
.
put
(
"Referer"
,
searchUrl
);
// 获取数据
// 获取数据
String
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
searchUrl
,
headerMap
,
proxy
);
String
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
searchUrl
,
headerMap
,
proxy
);
...
@@ -138,13 +126,14 @@ public class WechatAritcleSearch {
...
@@ -138,13 +126,14 @@ public class WechatAritcleSearch {
/**
/**
* 获取全文及来源
* 获取全文及来源
*
* @param url
* @param url
* @param proxy
* @param proxy
* @param wechatAricle
* @param wechatAricle
* @return
* @return
* @throws IOException
* @throws IOException
*/
*/
private
static
WechatAricle
getWechatAricleInfo
(
String
url
,
ProxyHolder
proxy
,
WechatAricle
wechatAricle
)
{
private
static
WechatAricle
getWechatAricleInfo
(
String
url
,
ProxyHolder
proxy
,
WechatAricle
wechatAricle
)
{
try
{
try
{
String
contentHtml
=
HtmlDownUtil
.
downloadHtml
(
url
,
HeaderTool
.
getCommonHead
(),
proxy
.
getProxy
());
String
contentHtml
=
HtmlDownUtil
.
downloadHtml
(
url
,
HeaderTool
.
getCommonHead
(),
proxy
.
getProxy
());
String
content
=
null
;
String
content
=
null
;
...
@@ -156,51 +145,51 @@ public class WechatAritcleSearch {
...
@@ -156,51 +145,51 @@ public class WechatAritcleSearch {
String
wxId
=
null
;
String
wxId
=
null
;
List
<
String
>
imgUrls
=
null
;
List
<
String
>
imgUrls
=
null
;
String
rootSource
=
null
;
String
rootSource
=
null
;
if
(
contentHtml
!=
null
)
{
if
(
contentHtml
!=
null
)
{
JXDocument
jxDocument
=
JXDocument
.
create
(
contentHtml
);
JXDocument
jxDocument
=
JXDocument
.
create
(
contentHtml
);
title
=
jxDocument
.
selNOne
(
"//h2[@id='activity-name']"
).
asElement
().
text
();
title
=
jxDocument
.
selNOne
(
"//h2[@id='activity-name']"
).
asElement
().
text
();
wxId
=
jxDocument
.
selNOne
(
"//p[@class='profile_meta'][1]/span[@class='profile_meta_value']"
).
asElement
().
text
();
wxId
=
jxDocument
.
selNOne
(
"//p[@class='profile_meta'][1]/span[@class='profile_meta_value']"
).
asElement
().
text
();
if
(
contentHtml
.
contains
(
"js_content"
))
{
if
(
contentHtml
.
contains
(
"js_content"
))
{
content
=
jxDocument
.
selNOne
(
"//div[@id='js_content']"
).
asElement
().
text
();
content
=
jxDocument
.
selNOne
(
"//div[@id='js_content']"
).
asElement
().
text
();
}
else
if
(
contentHtml
.
contains
(
"js_share_content"
))
{
}
else
if
(
contentHtml
.
contains
(
"js_share_content"
))
{
content
=
jxDocument
.
selNOne
(
"//div[@id='js_share_content']"
).
asElement
().
text
();
content
=
jxDocument
.
selNOne
(
"//div[@id='js_share_content']"
).
asElement
().
text
();
}
}
if
(
contentHtml
.
contains
(
"content_tpl"
))
{
if
(
contentHtml
.
contains
(
"content_tpl"
))
{
String
text
=
jxDocument
.
selNOne
(
"//script[@id='content_tpl']"
).
asElement
().
text
();
String
text
=
jxDocument
.
selNOne
(
"//script[@id='content_tpl']"
).
asElement
().
text
();
content
=
Jsoup
.
parse
(
text
).
text
();
content
=
Jsoup
.
parse
(
text
).
text
();
}
}
//解析文章图片地址
//解析文章图片地址
if
(
Objects
.
nonNull
(
jxDocument
.
selN
(
"//div[@id='js_content']//img"
)))
{
if
(
Objects
.
nonNull
(
jxDocument
.
selN
(
"//div[@id='js_content']//img"
)))
{
imgUrls
=
new
ArrayList
<>();
imgUrls
=
new
ArrayList
<>();
List
<
JXNode
>
imgNodeList
=
jxDocument
.
selN
(
"//div[@id='js_content']//img"
);
List
<
JXNode
>
imgNodeList
=
jxDocument
.
selN
(
"//div[@id='js_content']//img"
);
for
(
JXNode
imgNode
:
imgNodeList
)
{
for
(
JXNode
imgNode
:
imgNodeList
)
{
String
imgUrl
=
imgNode
.
selOne
(
"//img"
).
asElement
().
attr
(
"href"
);
String
imgUrl
=
imgNode
.
selOne
(
"//img"
).
asElement
().
attr
(
"href"
);
imgUrls
.
add
(
imgUrl
);
imgUrls
.
add
(
imgUrl
);
}
}
}
}
//解析来源
//解析来源
if
(
Objects
.
nonNull
(
jxDocument
.
selNOne
(
"//span[@id='copyright_logo']"
)))
{
if
(
Objects
.
nonNull
(
jxDocument
.
selNOne
(
"//span[@id='copyright_logo']"
)))
{
rootSource
=
jxDocument
.
selNOne
(
"//span[@id='profileBt']/a[@id='js_name']"
).
asElement
().
text
();
rootSource
=
jxDocument
.
selNOne
(
"//span[@id='profileBt']/a[@id='js_name']"
).
asElement
().
text
();
}
}
if
(
contentHtml
.
contains
(
"d.nick_name = "
))
{
if
(
contentHtml
.
contains
(
"d.nick_name = "
))
{
time
=
contentHtml
.
split
(
"d.ct = \""
)[
1
].
split
(
"\";"
)[
0
];
time
=
contentHtml
.
split
(
"d.ct = \""
)[
1
].
split
(
"\";"
)[
0
];
source
=
contentHtml
.
split
(
"d.nick_name = \""
)[
1
].
split
(
"\";"
)[
0
];
source
=
contentHtml
.
split
(
"d.nick_name = \""
)[
1
].
split
(
"\";"
)[
0
];
biz
=
contentHtml
.
split
(
"d.biz = \""
)[
1
].
split
(
"\""
)[
0
];
biz
=
contentHtml
.
split
(
"d.biz = \""
)[
1
].
split
(
"\""
)[
0
];
user_name
=
contentHtml
.
split
(
"d.user_name = \""
)[
1
].
split
(
"\""
)[
0
];
user_name
=
contentHtml
.
split
(
"d.user_name = \""
)[
1
].
split
(
"\""
)[
0
];
}
else
if
(
contentHtml
.
contains
(
"var nickname = "
))
{
}
else
if
(
contentHtml
.
contains
(
"var nickname = "
))
{
time
=
contentHtml
.
split
(
"var ct = \""
)[
1
].
split
(
"\";"
)[
0
];
time
=
contentHtml
.
split
(
"var ct = \""
)[
1
].
split
(
"\";"
)[
0
];
source
=
contentHtml
.
split
(
"var nickname = \""
)[
1
].
split
(
"\";"
)[
0
];
source
=
contentHtml
.
split
(
"var nickname = \""
)[
1
].
split
(
"\";"
)[
0
];
biz
=
contentHtml
.
split
(
"var appuin = \"\"||\""
)[
1
].
split
(
"\""
)[
0
];
biz
=
contentHtml
.
split
(
"var appuin = \"\"||\""
)[
1
].
split
(
"\""
)[
0
];
user_name
=
contentHtml
.
split
(
"var user_name = \""
)[
1
].
split
(
"\""
)[
0
];
user_name
=
contentHtml
.
split
(
"var user_name = \""
)[
1
].
split
(
"\""
)[
0
];
}
}
}
}
if
(
wechatAricle
==
null
)
{
if
(
wechatAricle
==
null
)
{
wechatAricle
=
new
WechatAricle
();
wechatAricle
=
new
WechatAricle
();
wechatAricle
.
setTitle
(
title
);
wechatAricle
.
setTitle
(
title
);
wechatAricle
.
setTime
(
new
Date
(
Long
.
valueOf
(
time
)*
1000
));
wechatAricle
.
setTime
(
new
Date
(
Long
.
valueOf
(
time
)
*
1000
));
wechatAricle
.
setSource
(
source
);
wechatAricle
.
setSource
(
source
);
}
}
...
@@ -219,9 +208,9 @@ public class WechatAritcleSearch {
...
@@ -219,9 +208,9 @@ public class WechatAritcleSearch {
}
}
/**
/**
* 根据关键词采集指定时间+账号的数据
* 根据关键词采集指定时间+账号的数据
*
* @param word
* @param word
* @param idOrName
* @param idOrName
* @param startTime
* @param startTime
...
@@ -236,20 +225,20 @@ public class WechatAritcleSearch {
...
@@ -236,20 +225,20 @@ public class WechatAritcleSearch {
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
Map
<
String
,
String
>
headerMap
=
HeaderTool
.
getCommonHead
();
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
if
(
idOrName
==
null
||
idOrName
.
equals
(
""
))
{
if
(
idOrName
==
null
||
idOrName
.
equals
(
""
))
{
throw
new
IllegalArgumentException
(
"要检索的昵称或id不能为空"
);
throw
new
IllegalArgumentException
(
"要检索的昵称或id不能为空"
);
}
}
String
openId
=
getOpenId
(
idOrName
,
proxyHolder
);
String
openId
=
getOpenId
(
idOrName
,
proxyHolder
);
boolean
f
=
false
;
boolean
f
=
false
;
if
(
openId
!=
null
)
{
if
(
openId
!=
null
)
{
f
=
true
;
f
=
true
;
}
}
int
page
=
1
;
int
page
=
1
;
while
(
f
)
{
while
(
f
)
{
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&ie=utf8&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
String
searchUrl
=
"https://weixin.sogou.com/weixin?type=2&ie=utf8&query="
+
URLEncoder
.
encode
(
word
,
"UTF-8"
)
+
"&tsn=5&ft="
+
startTime
+
"&et="
+
endTime
+
"&interation=&page="
+
page
+
"&wxid="
+
openId
+
"&tsn=5&ft="
+
startTime
+
"&et="
+
endTime
+
"&interation=&page="
+
page
+
"&wxid="
+
openId
+
"&usip="
+
URLEncoder
.
encode
(
idOrName
,
"UTF-8"
);
+
"&usip="
+
URLEncoder
.
encode
(
idOrName
,
"UTF-8"
);
headerMap
.
put
(
"Referer"
,
searchUrl
);
headerMap
.
put
(
"Referer"
,
searchUrl
);
// 获取数据
// 获取数据
...
@@ -274,10 +263,11 @@ public class WechatAritcleSearch {
...
@@ -274,10 +263,11 @@ public class WechatAritcleSearch {
/**
/**
* 解析数据
* 解析数据
*
* @param jxDocument
* @param jxDocument
* @return
* @return
*/
*/
private
static
List
<
WechatAricle
>
analysis
(
JXDocument
jxDocument
)
{
private
static
List
<
WechatAricle
>
analysis
(
JXDocument
jxDocument
)
{
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
List
<
WechatAricle
>
result
=
new
ArrayList
<
WechatAricle
>();
// 解析数据
// 解析数据
try
{
try
{
...
@@ -291,14 +281,12 @@ public class WechatAritcleSearch {
...
@@ -291,14 +281,12 @@ public class WechatAritcleSearch {
String
putDate
=
null
;
String
putDate
=
null
;
Date
date
=
null
;
Date
date
=
null
;
WechatAricle
wechat
=
null
;
WechatAricle
wechat
=
null
;
if
(
Objects
.
nonNull
(
jxNodeList
)
&&
!
jxNodeList
.
isEmpty
())
{
if
(
Objects
.
nonNull
(
jxNodeList
)
&&
!
jxNodeList
.
isEmpty
())
{
for
(
JXNode
jxNode
:
jxNodeList
)
{
for
(
JXNode
jxNode
:
jxNodeList
)
{
try
{
try
{
title
=
jxNode
.
selOne
(
"//div[@class='txt-box']/h3/a"
).
asElement
().
text
();
title
=
jxNode
.
selOne
(
"//div[@class='txt-box']/h3/a"
).
asElement
().
text
();
link
=
jxNode
.
selOne
(
"//div[@class='txt-box']/h3/a"
).
asElement
().
attr
(
"href"
);
link
=
jxNode
.
selOne
(
"//div[@class='txt-box']/h3/a"
).
asElement
().
attr
(
"href"
);
if
(!
link
.
contains
(
"weixin.sogou.com"
)){
link
=
URIUtils
.
resolve
(
"https://weixin.sogou.com"
,
link
);
link
=
"https://weixin.sogou.com"
+
link
;
}
if
(
Objects
.
nonNull
(
jxNode
.
selOne
(
"//div[@class='txt-box']/p"
)))
{
if
(
Objects
.
nonNull
(
jxNode
.
selOne
(
"//div[@class='txt-box']/p"
)))
{
content
=
jxNode
.
selOne
(
"//div[@class='txt-box']/p"
).
asElement
().
text
();
content
=
jxNode
.
selOne
(
"//div[@class='txt-box']/p"
).
asElement
().
text
();
}
}
...
@@ -314,8 +302,8 @@ public class WechatAritcleSearch {
...
@@ -314,8 +302,8 @@ public class WechatAritcleSearch {
}
}
title
=
ZhiWeiTools
.
SBC2DBC
(
title
);
title
=
ZhiWeiTools
.
SBC2DBC
(
title
);
content
=
ZhiWeiTools
.
SBC2DBC
(
content
);
content
=
ZhiWeiTools
.
SBC2DBC
(
content
);
if
(
StringUtils
.
isNotBlank
(
title
))
{
if
(
StringUtils
.
isNotBlank
(
title
))
{
wechat
=
new
WechatAricle
(
link
,
title
,
source
,
content
,
date
,
null
,
null
,
readNum
,
0
,
openid
,
"unknow"
);
wechat
=
new
WechatAricle
(
link
,
title
,
source
,
content
,
date
,
null
,
null
,
readNum
,
0
,
openid
,
"unknow"
);
result
.
add
(
wechat
);
result
.
add
(
wechat
);
}
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
@@ -334,22 +322,20 @@ public class WechatAritcleSearch {
...
@@ -334,22 +322,20 @@ public class WechatAritcleSearch {
}
}
/**
/**
* @param @param wxId
* @param @return 设定文件
* @return String 返回类型
* @Title: getOpenId
* @Title: getOpenId
* @Description: 获取微信wxID
* @Description: 获取微信wxID
* @param @param
* wxId
* @param @return
* 设定文件
* @return String 返回类型
*/
*/
public
static
String
getOpenId
(
String
idOrName
,
ProxyHolder
proxyHolder
)
{
public
static
String
getOpenId
(
String
idOrName
,
ProxyHolder
proxyHolder
)
{
String
openId
=
null
;
String
openId
=
null
;
String
url
=
"https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query="
+
URLCodeUtil
.
getURLEncode
(
idOrName
,
"utf-8"
);
String
url
=
"https://weixin.sogou.com/weixin?zhnss=1&type=1&ie=utf8&query="
+
URLCodeUtil
.
getURLEncode
(
idOrName
,
"utf-8"
);
String
htmlBody
;
String
htmlBody
;
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
for
(
int
i
=
1
;
i
<
3
;
i
++)
{
try
{
try
{
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
url
,
null
,
proxyHolder
);
htmlBody
=
HtmlDownUtil
.
downloadHtml
(
url
,
null
,
proxyHolder
);
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
);
JSONObject
jsonObject
=
JSONObject
.
parseObject
(
htmlBody
);
openId
=
jsonObject
.
getString
(
"openid"
);
openId
=
jsonObject
.
getString
(
"openid"
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment