Commit 0348e2a1 by shenjunjie

调整微信链接获取方式

parent 4bd06f63
......@@ -6,49 +6,109 @@ import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.proxy.ProxyHolder;
import com.zhiwei.crawler.core.utils.RequestUtils;
import okhttp3.Request;
import okhttp3.Response;
public class WechatReal {
private static Logger logger = LogManager.getLogger(WechatReal.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().useCookieJar(true).build();
private static final Pattern PATTERN = Pattern.compile("url \\+= \'(.*?)\';");
private String cookie = null;
/**
* 获取真实链接
*
* @param originalUrl
* @return
* @throws IOException
*/
public static String getRealLink(String originalUrl) throws Exception{
public String getRealLink(String originalUrl) throws Exception {
Proxy proxy = ProxyHolder.NAT_HEAVY_PROXY.getProxy();
originalUrl = getOriginalUrl(originalUrl);
String realUrl = getFinalUrl(originalUrl, proxy);
return realUrl;
return getFinalUrl(originalUrl, proxy);
}
/**
* 通过白名单代理ip获取真实链接
*
* @param originalUrl
* @param proxy
* @return
* @throws Exception
*/
public static String getRealLink(String originalUrl,Proxy proxy) throws Exception{
originalUrl = getOriginalUrl(originalUrl);
String realUrl = getFinalUrl(originalUrl, proxy);
return realUrl;
public String getRealLink(String originalUrl, Proxy proxy) throws Exception {
return getFinalUrl(originalUrl, proxy);
}
private String getFinalUrl(String url, Proxy proxy) throws IOException {
Map<String, Object> headers = new HashMap<>();
headers.put("Referer", "https://weixin.sogou.com/weixin");
headers.put("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
// 初次获取cookie
if (null == cookie) {
initCookie(url, headers, proxy);
}
headers.put("cookie", cookie);
for (int i = 0; i < 2; i++) {
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headers), proxy).body().string();
if (StringUtils.isNotBlank(htmlBody) && !htmlBody.contains("验证码")) {
StringBuilder furl = new StringBuilder();
Matcher ma1 = PATTERN.matcher(htmlBody);
while (ma1.find()) {
furl.append(ma1.group(1));
}
return furl.toString();
} else {
// cookie过期
initCookie(url, headers, proxy);
}
}
throw new NullPointerException("ip:" + proxy.address() + "获取临时链接失败, 出现输入验证码");
}
private void initCookie(String url, Map<String, Object> headers, Proxy proxy) throws IOException {
Request request = RequestUtils.wrapGet(getSearchUrl(url), headers);
Response response = httpBoot.syncCall(request, proxy);
// System.out.println("htmlBody:" + response.body().string());
cookie = response.headers("set-cookie").stream().map(s -> s.replaceAll(";.*", ""))
.collect(Collectors.joining("; "));
logger.info("ip:{},初始化cookie:{}", proxy.address(), cookie);
}
/**
*
* 获取cookie所需的搜索链接
*
* @param url
* @return String
*/
private static String getSearchUrl(String url) {
// return "https://weixin.sogou.com/weixin?query=" + getOriginalUrl(url).split("query=")[1];
String timestamp = Long.toString(System.currentTimeMillis());
return StringUtils.join(
"https://weixin.sogou.com/weixin?type=2&query=%E8%90%A5%E9%94%80&ie=utf8&s_from=input&_sug_=y&_sug_type_=&w=01019900&sut=1314&sst0=",
timestamp, "&lkt=1%2C", timestamp, "%2C", timestamp);
}
/**
* 获取转链接的中间跳转链接
*
* @param originalUrl
* @return
*/
private static String getOriginalUrl(String originalUrl){
@Deprecated
private static String getOriginalUrl(String originalUrl) {
int b = (int) (Math.floor(100 * Math.random()) + 1);
int a = originalUrl.indexOf("url=");
int c = originalUrl.indexOf("&k=");
......@@ -60,40 +120,12 @@ public class WechatReal {
return originalUrl;
}
// public static void main(String[] args) throws Exception {
// WechatReal real = new WechatReal();
// Proxy proxy = new Proxy(Type.HTTP, new InetSocketAddress("119.3.86.205", 31128));
// String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS8FzrTzGEPrGvM6hiNXA4ZFfuz5MvdMSLVqXa8Fplpd9gowHZ2-xDps585u2obuOVGC2ke8iAlwOUW5Vlcs1qv8YeB2DBj_2dTSVEmgoED-M4y9lx6Ykc9IjDA2sWjYtSyDfEXs2p-nZB6QB9v1FTm3sgVx8MYuQh6L7kx32DJ4fKy9a6PM182aN3M2SXrGSIqAH50L-W7WN8EgDyGxD5NruL0unUdKkuw..&type=2&query=%E8%90%A5%E9%94%80&token=3ABD0306D5E9D84C3F3A954539751A493F10FC545F1FCD9F&k=61&h=M";
// String url1 = real.getRealLink(url, proxy);
// System.out.println(url1);
// }
/**
* 通过普通代理获取临时链接
* @param originalUrl
* @param proxy
* @return
* @throws Exception
*/
private static String getFinalUrl(String originalUrl, Proxy proxy) throws Exception{
Map<String,String> headerMap = new HashMap<>();
headerMap.put("Sec-Fetch-Mode", "navigate");
headerMap.put("Sec-Fetch-User", "?1");
headerMap.put("Sec-Fetch-Site", "same-origin");
headerMap.put("Host", "weixin.sogou.com");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
String word = originalUrl.split("query=")[1];
String searchUrl = "https://weixin.sogou.com/weixin?query="+ word;
headerMap.put("referer", searchUrl);
httpBoot.syncCall(RequestUtils.wrapGet(searchUrl, headerMap), proxy).body().string();
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(originalUrl, headerMap), proxy).body().string();
headerMap.put("referer", searchUrl);
if(StringUtils.isNotBlank(htmlBody) && !htmlBody.contains("验证码")){
StringBuilder furl = new StringBuilder();
Pattern pa1 = Pattern.compile("url \\+= \'(.*?)\';");
Matcher ma1 = pa1.matcher(htmlBody);
while (ma1.find()) {
furl.append(ma1.group(1));
}
return furl.toString();
}else{
throw new NullPointerException("获取临时链接失败, 出现输入验证码");
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment