Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wechat
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
chenweiyang
wechat
Commits
0348e2a1
Commit
0348e2a1
authored
Jul 29, 2020
by
shenjunjie
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
调整微信链接获取方式
parent
4bd06f63
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
44 deletions
+76
-44
src/main/java/com/zhiwei/wechat/search/WechatReal.java
+76
-44
No files found.
src/main/java/com/zhiwei/wechat/search/WechatReal.java
View file @
0348e2a1
...
@@ -6,49 +6,109 @@ import java.util.HashMap;
...
@@ -6,49 +6,109 @@ import java.util.HashMap;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
import
java.util.stream.Collectors
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
com.zhiwei.crawler.core.utils.RequestUtils
;
import
okhttp3.Request
;
import
okhttp3.Response
;
public
class
WechatReal
{
public
class
WechatReal
{
private
static
Logger
logger
=
LogManager
.
getLogger
(
WechatReal
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
useCookieJar
(
true
).
build
();
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
useCookieJar
(
true
).
build
();
private
static
final
Pattern
PATTERN
=
Pattern
.
compile
(
"url \\+= \'(.*?)\';"
);
private
String
cookie
=
null
;
/**
/**
* 获取真实链接
* 获取真实链接
*
* @param originalUrl
* @param originalUrl
* @return
* @return
* @throws IOException
* @throws IOException
*/
*/
public
static
String
getRealLink
(
String
originalUrl
)
throws
Exception
{
public
String
getRealLink
(
String
originalUrl
)
throws
Exception
{
Proxy
proxy
=
ProxyHolder
.
NAT_HEAVY_PROXY
.
getProxy
();
Proxy
proxy
=
ProxyHolder
.
NAT_HEAVY_PROXY
.
getProxy
();
originalUrl
=
getOriginalUrl
(
originalUrl
);
return
getFinalUrl
(
originalUrl
,
proxy
);
String
realUrl
=
getFinalUrl
(
originalUrl
,
proxy
);
return
realUrl
;
}
}
/**
/**
* 通过白名单代理ip获取真实链接
* 通过白名单代理ip获取真实链接
*
* @param originalUrl
* @param originalUrl
* @param proxy
* @param proxy
* @return
* @return
* @throws Exception
* @throws Exception
*/
*/
public
static
String
getRealLink
(
String
originalUrl
,
Proxy
proxy
)
throws
Exception
{
public
String
getRealLink
(
String
originalUrl
,
Proxy
proxy
)
throws
Exception
{
originalUrl
=
getOriginalUrl
(
originalUrl
);
return
getFinalUrl
(
originalUrl
,
proxy
);
String
realUrl
=
getFinalUrl
(
originalUrl
,
proxy
);
return
realUrl
;
}
}
private
String
getFinalUrl
(
String
url
,
Proxy
proxy
)
throws
IOException
{
Map
<
String
,
Object
>
headers
=
new
HashMap
<>();
headers
.
put
(
"Referer"
,
"https://weixin.sogou.com/weixin"
);
headers
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"
);
// 初次获取cookie
if
(
null
==
cookie
)
{
initCookie
(
url
,
headers
,
proxy
);
}
headers
.
put
(
"cookie"
,
cookie
);
for
(
int
i
=
0
;
i
<
2
;
i
++)
{
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
url
,
headers
),
proxy
).
body
().
string
();
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
!
htmlBody
.
contains
(
"验证码"
))
{
StringBuilder
furl
=
new
StringBuilder
();
Matcher
ma1
=
PATTERN
.
matcher
(
htmlBody
);
while
(
ma1
.
find
())
{
furl
.
append
(
ma1
.
group
(
1
));
}
return
furl
.
toString
();
}
else
{
// cookie过期
initCookie
(
url
,
headers
,
proxy
);
}
}
throw
new
NullPointerException
(
"ip:"
+
proxy
.
address
()
+
"获取临时链接失败, 出现输入验证码"
);
}
private
void
initCookie
(
String
url
,
Map
<
String
,
Object
>
headers
,
Proxy
proxy
)
throws
IOException
{
Request
request
=
RequestUtils
.
wrapGet
(
getSearchUrl
(
url
),
headers
);
Response
response
=
httpBoot
.
syncCall
(
request
,
proxy
);
// System.out.println("htmlBody:" + response.body().string());
cookie
=
response
.
headers
(
"set-cookie"
).
stream
().
map
(
s
->
s
.
replaceAll
(
";.*"
,
""
))
.
collect
(
Collectors
.
joining
(
"; "
));
logger
.
info
(
"ip:{},初始化cookie:{}"
,
proxy
.
address
(),
cookie
);
}
/**
*
* 获取cookie所需的搜索链接
*
* @param url
* @return String
*/
private
static
String
getSearchUrl
(
String
url
)
{
// return "https://weixin.sogou.com/weixin?query=" + getOriginalUrl(url).split("query=")[1];
String
timestamp
=
Long
.
toString
(
System
.
currentTimeMillis
());
return
StringUtils
.
join
(
"https://weixin.sogou.com/weixin?type=2&query=%E8%90%A5%E9%94%80&ie=utf8&s_from=input&_sug_=y&_sug_type_=&w=01019900&sut=1314&sst0="
,
timestamp
,
"&lkt=1%2C"
,
timestamp
,
"%2C"
,
timestamp
);
}
/**
/**
* 获取转链接的中间跳转链接
* 获取转链接的中间跳转链接
*
* @param originalUrl
* @param originalUrl
* @return
* @return
*/
*/
private
static
String
getOriginalUrl
(
String
originalUrl
){
@Deprecated
private
static
String
getOriginalUrl
(
String
originalUrl
)
{
int
b
=
(
int
)
(
Math
.
floor
(
100
*
Math
.
random
())
+
1
);
int
b
=
(
int
)
(
Math
.
floor
(
100
*
Math
.
random
())
+
1
);
int
a
=
originalUrl
.
indexOf
(
"url="
);
int
a
=
originalUrl
.
indexOf
(
"url="
);
int
c
=
originalUrl
.
indexOf
(
"&k="
);
int
c
=
originalUrl
.
indexOf
(
"&k="
);
...
@@ -60,40 +120,12 @@ public class WechatReal {
...
@@ -60,40 +120,12 @@ public class WechatReal {
return
originalUrl
;
return
originalUrl
;
}
}
// public static void main(String[] args) throws Exception {
// WechatReal real = new WechatReal();
// Proxy proxy = new Proxy(Type.HTTP, new InetSocketAddress("119.3.86.205", 31128));
// String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS8FzrTzGEPrGvM6hiNXA4ZFfuz5MvdMSLVqXa8Fplpd9gowHZ2-xDps585u2obuOVGC2ke8iAlwOUW5Vlcs1qv8YeB2DBj_2dTSVEmgoED-M4y9lx6Ykc9IjDA2sWjYtSyDfEXs2p-nZB6QB9v1FTm3sgVx8MYuQh6L7kx32DJ4fKy9a6PM182aN3M2SXrGSIqAH50L-W7WN8EgDyGxD5NruL0unUdKkuw..&type=2&query=%E8%90%A5%E9%94%80&token=3ABD0306D5E9D84C3F3A954539751A493F10FC545F1FCD9F&k=61&h=M";
// String url1 = real.getRealLink(url, proxy);
// System.out.println(url1);
// }
/**
* 通过普通代理获取临时链接
* @param originalUrl
* @param proxy
* @return
* @throws Exception
*/
private
static
String
getFinalUrl
(
String
originalUrl
,
Proxy
proxy
)
throws
Exception
{
Map
<
String
,
String
>
headerMap
=
new
HashMap
<>();
headerMap
.
put
(
"Sec-Fetch-Mode"
,
"navigate"
);
headerMap
.
put
(
"Sec-Fetch-User"
,
"?1"
);
headerMap
.
put
(
"Sec-Fetch-Site"
,
"same-origin"
);
headerMap
.
put
(
"Host"
,
"weixin.sogou.com"
);
headerMap
.
put
(
"User-Agent"
,
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"
);
String
word
=
originalUrl
.
split
(
"query="
)[
1
];
String
searchUrl
=
"https://weixin.sogou.com/weixin?query="
+
word
;
headerMap
.
put
(
"referer"
,
searchUrl
);
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
searchUrl
,
headerMap
),
proxy
).
body
().
string
();
String
htmlBody
=
httpBoot
.
syncCall
(
RequestUtils
.
wrapGet
(
originalUrl
,
headerMap
),
proxy
).
body
().
string
();
headerMap
.
put
(
"referer"
,
searchUrl
);
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
!
htmlBody
.
contains
(
"验证码"
)){
StringBuilder
furl
=
new
StringBuilder
();
Pattern
pa1
=
Pattern
.
compile
(
"url \\+= \'(.*?)\';"
);
Matcher
ma1
=
pa1
.
matcher
(
htmlBody
);
while
(
ma1
.
find
())
{
furl
.
append
(
ma1
.
group
(
1
));
}
return
furl
.
toString
();
}
else
{
throw
new
NullPointerException
(
"获取临时链接失败, 出现输入验证码"
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment