Commit 4c2e31e6 by zhiwei

微信真实链接相关测试及获取snuid

parent 00eb5148
...@@ -473,16 +473,11 @@ public class WechatAritcleSearch { ...@@ -473,16 +473,11 @@ public class WechatAritcleSearch {
headerMap.put("Sec-Fetch-User", "?1"); headerMap.put("Sec-Fetch-User", "?1");
headerMap.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"); headerMap.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
headerMap.put("Sec-Fetch-Site", "same-origin"); headerMap.put("Sec-Fetch-Site", "same-origin");
headerMap.put("Upgrade-Insecure-Requests", "1");
headerMap.put("Connection", "keep-alive");
headerMap.put("Host", "weixin.sogou.com"); headerMap.put("Host", "weixin.sogou.com");
headerMap.put("Accept-Encoding", "gzip, deflate, br");
headerMap.put("Accept-Language", "zh-CN,zh;q=0.9");
headerMap.put("Cache-Control", "no-cache");
headerMap.put("Referer", searchUrl); headerMap.put("Referer", searchUrl);
headerMap.put("Cookie", cookie); headerMap.put("Cookie", cookie);
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(originalUrl, headerMap)).body().string(); String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(originalUrl, headerMap), ProxyHolder.NAT_HEAVY_PROXY).body().string();
if(StringUtils.isNotBlank(htmlBody) ){ if(StringUtils.isNotBlank(htmlBody) ){
StringBuilder furl = new StringBuilder(); StringBuilder furl = new StringBuilder();
Pattern pa1 = Pattern.compile("url \\+= \'(.*?)\';"); Pattern pa1 = Pattern.compile("url \\+= \'(.*?)\';");
...@@ -496,6 +491,31 @@ public class WechatAritcleSearch { ...@@ -496,6 +491,31 @@ public class WechatAritcleSearch {
return null; return null;
} }
public static String getRealUrlCookie(String originalUrl) throws Exception{
String word = originalUrl.split("query=")[1];
String searchUrl = "https://weixin.sogou.com/weixin?type=2&s_from=input&query="+ word +"&ie=utf8&_sug_=n&_sug_type_=&page=1";
Map<String,String> headerMap = HeaderTool.getCommonHead();
headerMap.put("Host", "weixin.sogou.com");
headerMap.put("Referer", searchUrl);
String cookie = "";
String htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(searchUrl, headerMap),ProxyHolder.NAT_HEAVY_PROXY, false).body().string();
if(StringUtils.isNotBlank(htmlBody) && htmlBody.contains("snuid")){
StringBuilder furl = new StringBuilder();
Pattern pa1 = Pattern.compile("\"snuid\" : \"(.*?)\",");
Matcher ma1 = pa1.matcher(htmlBody);
while (ma1.find()) {
furl.append(ma1.group(1));
}
return "SNUID=" + furl.toString();
}
return null;
}
/** /**
* @Title: getOpenId * @Title: getOpenId
* @Description: 获取微信wxID * @Description: 获取微信wxID
......
...@@ -14,6 +14,7 @@ import java.util.regex.Pattern; ...@@ -14,6 +14,7 @@ import java.util.regex.Pattern;
import com.zhiwei.crawler.core.HttpBoot; import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.utils.RequestUtils; import com.zhiwei.crawler.utils.RequestUtils;
import com.zhiwei.tools.tools.ZhiWeiTools;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -41,12 +42,41 @@ public class WechatSearchExample{ ...@@ -41,12 +42,41 @@ public class WechatSearchExample{
public static void main(String[] args) { public static void main(String[] args) {
ProxyFactory.init(registry, group, GroupType.PROVIDER,10000018); ProxyFactory.init(registry, group, GroupType.PROVIDER,10000018);
proxy = ProxyHolder.SOUGOU_INNER_PROXY.getProxy(); proxy = ProxyHolder.SOUGOU_INNER_PROXY.getProxy();
String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgS8CMDfv9wh9qo5s-_tRRSYjdmlThuLl1UVqXa8Fplpd9fM3bn57YTm93DADHLmE53r3LNleAx90O6EdWFlMiLgABVb5FRuhnTbO_GzJrBhvROBdUYdPJ-HwpjtEvi_VZoCFXsP2Y8IMUHXuJCM5s6KSDiXUXG84daahQ5c0PemEIN_1vJiPn8w7tlTjPfiB-Z3QxFXEDiCN7KNRVfuxuX7N535pjGOOjYg..&type=2&query=%E8%85%BE%E8%AE%AF";
try { try{
WechatSearchExample.wechatSearchExample(); String cookie = WechatAritcleSearch.getRealUrlCookie(url);
} catch (UnknownHostException e) { if(StringUtils.isNotBlank(cookie)){
System.out.println("cookie============="+cookie);
boolean f = true;
int i = 0;
while(f){
try{
String link = WechatAritcleSearch.getRealLink(url, cookie);
if(StringUtils.isNotBlank(link) && link.contains("s?src=")){
System.out.println(i+++"=========="+link);
}else{
System.out.println(i+++"=========="+link);
}
ZhiWeiTools.sleep(3000);
}catch (Exception e){
ZhiWeiTools.sleep(50);
e.printStackTrace();
}
}
}else {
System.out.println("cookie============="+cookie);
}
}catch (Exception e){
e.printStackTrace(); e.printStackTrace();
} }
// try {
// WechatSearchExample.wechatSearchExample();
// } catch (UnknownHostException e) {
// e.printStackTrace();
// }
} }
...@@ -57,7 +87,7 @@ public class WechatSearchExample{ ...@@ -57,7 +87,7 @@ public class WechatSearchExample{
for(String word : wordList) for(String word : wordList)
{ {
try { try {
List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-10-28", "2019-10-28",proxy, 2); List<WechatAricle> list = WechatAritcleSearch.wechatKeywordSearch(word, 5, null,"2019-10-28", "2019-10-28",proxy, 3);
System.out.println("======"+list.size()); System.out.println("======"+list.size());
for(WechatAricle wechat : list){ for(WechatAricle wechat : list){
System.out.println(wechat.getId()); System.out.println(wechat.getId());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment