Commit 9159a942 by zhiwei

将转临时链接添加白明天代理访问使用

parent eec30e53
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId> <groupId>com.zhiwei</groupId>
<artifactId>wechat</artifactId> <artifactId>wechat</artifactId>
<version>1.2.9-SNAPSHOT</version> <version>1.3.0-SNAPSHOT</version>
<description> <description>
知微微信采集程序,包含 知微微信采集程序,包含
1.微信历史文章采集 1.微信历史文章采集
......
...@@ -26,6 +26,31 @@ public class WechatReal { ...@@ -26,6 +26,31 @@ public class WechatReal {
*/ */
public static String getRealLink(String originalUrl) throws Exception{ public static String getRealLink(String originalUrl) throws Exception{
Proxy proxy = ProxyHolder.NAT_HEAVY_PROXY.getProxy(); Proxy proxy = ProxyHolder.NAT_HEAVY_PROXY.getProxy();
originalUrl = getOriginalUrl(originalUrl);
String realUrl = getFinalUrl(originalUrl, proxy);
return realUrl;
}
/**
* 通过白名单代理ip获取真实链接
* @param originalUrl
* @param proxy
* @return
* @throws Exception
*/
public static String getRealLink(String originalUrl,Proxy proxy) throws Exception{
originalUrl = getOriginalUrl(originalUrl);
String realUrl = getFinalUrl(originalUrl, proxy);
return realUrl;
}
/**
* 获取转链接的中间跳转链接
* @param originalUrl
* @return
*/
private static String getOriginalUrl(String originalUrl){
int b = (int) (Math.floor(100 * Math.random()) + 1); int b = (int) (Math.floor(100 * Math.random()) + 1);
int a = originalUrl.indexOf("url="); int a = originalUrl.indexOf("url=");
int c = originalUrl.indexOf("&k="); int c = originalUrl.indexOf("&k=");
...@@ -34,13 +59,17 @@ public class WechatReal { ...@@ -34,13 +59,17 @@ public class WechatReal {
d = originalUrl.substring(a + 25 + b, a + 26 + b); d = originalUrl.substring(a + 25 + b, a + 26 + b);
} }
originalUrl += "&k=" + b + "&h=" + d; originalUrl += "&k=" + b + "&h=" + d;
String realUrl = getFinalUrl(originalUrl, proxy); return originalUrl;
return realUrl;
} }
/**
* 通过普通代理获取临时链接
* @param originalUrl
* @param proxy
* @return
* @throws Exception
*/
private static String getFinalUrl(String originalUrl, Proxy proxy) throws Exception{ private static String getFinalUrl(String originalUrl, Proxy proxy) throws Exception{
Map<String,String> headerMap = new HashMap<>(); Map<String,String> headerMap = new HashMap<>();
headerMap.put("Sec-Fetch-Mode", "navigate"); headerMap.put("Sec-Fetch-Mode", "navigate");
...@@ -66,8 +95,7 @@ public class WechatReal { ...@@ -66,8 +95,7 @@ public class WechatReal {
} }
return furl.toString(); return furl.toString();
}else{ }else{
throw new NullPointerException("获取临时链接失败"); throw new NullPointerException("获取临时链接失败, 出现输入验证码");
} }
} }
} }
...@@ -42,26 +42,25 @@ public class WechatSearchExample{ ...@@ -42,26 +42,25 @@ public class WechatSearchExample{
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init(registry, group, GroupType.PROVIDER,10000018); ProxyFactory.init(registry, group, GroupType.PROVIDER,10000018);
Proxy proxy = ProxyHolder.SOUGOU_INNER_PROXY.getProxy(); Proxy proxy = ProxyHolder.NAT_HEAVY_PROXY.getProxy();
// String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgSzcttWBfUfRyBteZJZKwOQZcZaXkLh7iD1qXa8Fplpd9OIASmEBDDgpc-DopMAxHDRa5rMUETB5W4jcmy1RslCj6dRdWlI71gTiuwjp2qvcTJ8ryfwJWyrd9awnq8kg4J-jH9rgNij43NIxLSEyMEC0OFckdi_fmA1TpUaYEJzIlQ9H-i95UM3h5UwmbSJx95X6FkyXmgknK9g_68U3LLV9hlgeRt7bSzA..&type=2&query=%E4%BA%BA%E6%B0%91%E6%97%A5%E6%8A%A5"; String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgSzcttWBfUfRyBteZJZKwOQZcZaXkLh7iD1qXa8Fplpd9OIASmEBDDgpc-DopMAxHDRa5rMUETB5W4jcmy1RslCj6dRdWlI71gTiuwjp2qvcTJ8ryfwJWyrd9awnq8kg4J-jH9rgNij43NIxLSEyMEC0OFckdi_fmA1TpUaYEJzIlQ9H-i95UM3h5UwmbSJx95X6FkyXmgknK9g_68U3LLV9hlgeRt7bSzA..&type=2&query=%E4%BA%BA%E6%B0%91%E6%97%A5%E6%8A%A5";
// try { try {
//// String cookie = getRealUrlCookie(url, proxy); // String cookie = getRealUrlCookie(url, proxy);
//// System.out.println("cookie==================="+cookie); // System.out.println("cookie==================="+cookie);
//
// String realUrl = getRealLink(url, proxy);
//
// System.out.println("realUrl==================="+realUrl);
// } catch (Exception e) {
// e.printStackTrace();
// }
try{
WechatAritcleSearch.wechatKeywordSearch("京东", 5, null, "2019-10-01", "2019-10-01", proxy, 10);
}catch (Exception e){ String realUrl = getRealLink(url, proxy);
System.out.println("realUrl==================="+realUrl);
} catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
// try{
// WechatAritcleSearch.wechatKeywordSearch("京东", 5, null, "2019-10-01", "2019-10-01", proxy, 10);
//
// }catch (Exception e){
// e.printStackTrace();
// }
} }
...@@ -130,80 +129,6 @@ public class WechatSearchExample{ ...@@ -130,80 +129,6 @@ public class WechatSearchExample{
} }
public static String getRealUrlCookie(String originalUrl, Proxy proxy) throws Exception{
String word = originalUrl.split("query=")[1];
String searchUrl = "https://weixin.sogou.com/weixin?type=2&s_from=input&query="+ word +"&ie=utf8&_sug_=n&_sug_type_=&page=1";
Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "weixin.sogou.com");
headerMap.put("Referer", searchUrl);
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(searchUrl, headerMap), proxy,false).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("snuid")){
Matcher matcher = Pattern.compile("var uigs_para = [\\s\\S]+?;").matcher(htmlBody);
matcher.find();
String str = matcher.group().replaceAll("var uigs_para = |;", "");
try {
str = str.toString().replaceAll(" passportUserId \\? \"1\" \\:", "");
JSONObject data = JSONObject.parseObject(str);
str = str.replaceAll("\\s","").replaceAll("\":\"","=").replaceAll("\",\"","&").replaceAll("\\{\"|\"\\}","");
String ac = URLCodeUtil.getURLEncode(str, "utf-8");
String cookieUrl = "https://pb.sogou.com/cl.gif?uigs_cl=article_title_6&href="+originalUrl+"&uigs_refer=https://weixin.sogou.com/&uigs_t="+ ac +"&right=right0_0&exp_id=null_0-null_1-null_2-null_3-null_4-null_5-null_6-null_7-null_8-null_9";
headerMap.put("Host", "pb.sogou.com");
headerMap.put("Referer", searchUrl);
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
Response response = httpBoot.syncCall(RequestUtils.wrapGet(cookieUrl, headerMap), proxy,false);
if(Objects.nonNull(response)){
String cookie = "";
List<String> headersResponse = response.headers("Set-Cookie");
for (String header : headersResponse) {
System.out.println("head======"+header);
cookie += header.split(";")[0];
}
return "uuid=" + data.getString("uuid")+ ";snuid="+ data.getString("snuid")+ ";" +cookie + "IPLOC=CN3302; sct=1;weixinIndexVisited=1";
}
}catch (Exception e){
e.printStackTrace();
}
}else{
System.out.println("-----------------"+ htmlBody);
}
return null;
}
public static String getCookie(String htmlBody) throws Exception{
String cookie = "";
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("snuid")){
StringBuilder furl = new StringBuilder();
Pattern pa1 = Pattern.compile("\"snuid\" : \"(.*?)\",");
Matcher ma1 = pa1.matcher(htmlBody);
while (ma1.find()) {
furl.append(ma1.group(1));
}
return "SNUID=" + furl.toString();
}
return null;
}
public static void wechatSearchExample() throws UnknownHostException public static void wechatSearchExample() throws UnknownHostException
{ {
List<String> wordList = new ArrayList<String>(); List<String> wordList = new ArrayList<String>();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment