Commit 03e0f9fc by shenjunjie

关闭cookie-jar

parent 22ff0845
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId>
<version>1.3.6-SNAPSHOT</version>
<version>1.3.7-SNAPSHOT</version>
<description>
知微微信采集程序,包含
1.微信历史文章采集
......
package com.zhiwei.wechat.search;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.Proxy.Type;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
......@@ -22,7 +24,7 @@ import okhttp3.Response;
public class WechatReal {
private static Logger logger = LogManager.getLogger(WechatReal.class);
private static HttpBoot httpBoot = new HttpBoot.Builder().useCookieJar(true).build();
private static HttpBoot httpBoot = new HttpBoot.Builder().useCookieJar(false).build();
private static final Pattern PATTERN = Pattern.compile("url \\+= \'(.*?)\';");
private String cookie = null;
......@@ -57,7 +59,7 @@ public class WechatReal {
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
// 初次获取cookie
if (null == cookie) {
initCookie(url, headers, proxy);
initCookie(headers, proxy);
}
headers.put("cookie", cookie);
for (int i = 0; i < 2; i++) {
......@@ -67,7 +69,7 @@ public class WechatReal {
String htmlBody = response.body().string();
// cookie过期
if (StringUtils.isBlank(htmlBody) || htmlBody.contains("验证码")) {
initCookie(url, headers, proxy);
initCookie(headers, proxy);
continue;
}
StringBuilder furl = new StringBuilder();
......@@ -87,22 +89,32 @@ public class WechatReal {
throw new NullPointerException("ip:" + proxy.address() + "获取临时链接失败, 出现输入验证码");
}
private void initCookie(String url, Map<String, Object> headers, Proxy proxy) {
private String initCookie(Map<String, Object> headers, Proxy proxy) {
Response response = null;
boolean init = false;
if (null == cookie) {
init = true;
}
try {
Request request = RequestUtils.wrapGet(getSearchUrl(url), headers);
Request request = RequestUtils.wrapGet(getSearchUrl(), headers);
response = httpBoot.syncCall(request, proxy);
// System.out.println("htmlBody:" + response.body().string());
System.out.println(response.headers("set-cookie"));
cookie = response.headers("set-cookie").stream().map(s -> s.replaceAll(";.*", ""))
.collect(Collectors.joining("; "));
if (init) {
logger.info("ip:{},初始化cookie:{}", proxy.address(), cookie);
} else {
logger.info("ip:{},重置cookie:{}", proxy.address(), cookie);
}
} catch (Exception e) {
logger.info("ip:{},初始化cookie异常:", proxy.address(), e);
logger.info("初始化cookie异常:", e);
} finally {
if (null != response) {
response.close();
}
}
return cookie;
}
/**
......@@ -112,41 +124,63 @@ public class WechatReal {
* @param url
* @return String
*/
private static String getSearchUrl(String url) {
// return "https://weixin.sogou.com/weixin?query=" + getOriginalUrl(url).split("query=")[1];
private static String getSearchUrl() {
String timestamp = Long.toString(System.currentTimeMillis());
return StringUtils.join(
"https://weixin.sogou.com/weixin?type=2&query=%E8%90%A5%E9%94%80&ie=utf8&s_from=input&_sug_=y&_sug_type_=&w=01019900&sut=1314&sst0=",
timestamp, "&lkt=1%2C", timestamp, "%2C", timestamp);
}
// /**
// * 获取转链接的中间跳转链接
// *
// * @param originalUrl
// * @return
// */
// @Deprecated
// private static String getOriginalUrl(String originalUrl) {
// int b = (int) (Math.floor(100 * Math.random()) + 1);
// int a = originalUrl.indexOf("url=");
// int c = originalUrl.indexOf("&k=");
// String d = null;
// if (a != -1 && -1 == c) {
// d = originalUrl.substring(a + 25 + b, a + 26 + b);
// }
// originalUrl += "&k=" + b + "&h=" + d;
// return originalUrl;
// }
// public static void main(String[] args) throws Exception {
// WechatReal real = new WechatReal();
//// 119.3.86.205
//// 122.112.137.194
// Proxy proxy = new Proxy(Type.HTTP, new InetSocketAddress("119.3.86.205", 31128));
// String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS8FzrTzGEPrGvM6hiNXA4ZFfuz5MvdMSLVqXa8Fplpd9gowHZ2-xDps585u2obuOVGC2ke8iAlwOUW5Vlcs1qv8YeB2DBj_2dTSVEmgoED-M4y9lx6Ykc9IjDA2sWjYtSyDfEXs2p-nZB6QB9v1FTm3sgVx8MYuQh6L7kx32DJ4fKy9a6PM182aN3M2SXrGSIqAH50L-W7WN8EgDyGxD5NruL0unUdKkuw..&type=2&query=%E8%90%A5%E9%94%80&token=3ABD0306D5E9D84C3F3A954539751A493F10FC545F1FCD9F&k=61&h=M";
public static void main(String[] args) throws Exception {
WechatReal real = new WechatReal();
// 122.112.137.194 X
// 122.112.163.207 X
// 119.3.86.205
// 119.3.38.9
// 121.36.135.139 X
Proxy proxy = new Proxy(Type.HTTP, new InetSocketAddress("122.112.137.194", 31128));
proxy = new Proxy(Type.SOCKS, new InetSocketAddress("124.229.150.121", 21212));
String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS8FzrTzGEPrGvM6hiNXA4ZFfuz5MvdMSLVqXa8Fplpd9gowHZ2-xDps585u2obuOVGC2ke8iAlwOUW5Vlcs1qv8YeB2DBj_2dTSVEmgoED-M4y9lx6Ykc9IjDA2sWjYtSyDfEXs2p-nZB6QB9v1FTm3sgVx8MYuQh6L7kx32DJ4fKy9a6PM182aN3M2SXrGSIqAH50L-W7WN8EgDyGxD5NruL0unUdKkuw..&type=2&query=%E8%90%A5%E9%94%80&token=3ABD0306D5E9D84C3F3A954539751A493F10FC545F1FCD9F&k=61&h=M";
// String url1 = real.getRealLink(url, proxy);
// System.out.println(url1);
// }
String url2 = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS5ByhFAvN3jwIiW7b_R2gv7D_OCfBbIfVFqXa8Fplpd9WL3bgJTbHGo38w1lva_JHII692P3H8G7-mt4s86xANAGf5uOiWvacZcp7SBqpdhO544SEmniDVMFz9WJ1FooC47fFANaOotNkbMyJmuoTQOy3lZQRgGiSSW-vt7YDjMV3AdU8zszpKc4wKJ75UzPcBdGJ95RtHiV4XP9dLgLIUQY-UZGbO56rA..";
Map<String, Object> headers = new HashMap<>();
headers.put("Referer", "https://weixin.sogou.com/weixin");
headers.put("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
// 初次获取cookie
String cookie = real.initCookie(headers, proxy);
headers.put("cookie", cookie);
System.out.println(testGet(url, headers, proxy));
System.out.println(testGet(url, headers, proxy));
}
private static String testGet(String url, Map<String, Object> headers, Proxy proxy) {
Response response = null;
try {
response = httpBoot.syncCall(RequestUtils.wrapGet(url, headers), proxy);
String htmlBody = response.body().string();
if (htmlBody.contains("验证码")) {
System.err.println("出现验证码");
}
StringBuilder furl = new StringBuilder();
Matcher ma1 = PATTERN.matcher(htmlBody);
while (ma1.find()) {
furl.append(ma1.group(1));
}
return furl.toString();
} catch (Exception e) {
e.printStackTrace();
logger.error(e);
} finally {
if (null != response) {
response.close();
}
}
return "错误";
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment