Commit 1d4533e0 by [zhangzhiwei]

添加sign加密参数解密,并实现pc端网页头条号历史文章及关注列表采集

parent c2e5c825
......@@ -3,13 +3,18 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>toutiao</artifactId>
<version>0.2.4-SNAPSHOT</version>
<version>0.2.6-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.zhiwei.tools</groupId>
<artifactId>zhiwei-tools</artifactId>
<version>0.0.9-SNAPSHOT</version>
<version>0.1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.zhiwei.crawler</groupId>
<artifactId>crawler-core</artifactId>
<version>0.0.6-RELEASE</version>
</dependency>
</dependencies>
......
package com.zhiwei.toutiao.bean;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
/**
* 今日头条签名类
* @author bewiler hk
*
*/
public class Signature {
private String cp;
private String as;
private String signature;
public Signature(String userId, String max_behot_time){
this.signature = this.getSign(userId, max_behot_time);
getASCP();
}
public Signature(){
getASCP();
}
public String getCp() {
return cp;
}
public String getAs() {
return as;
}
public String getSignature() {
return signature;
}
/**
* 获取加密参数
* @return
* @throws IOException
*/
private String getSign(String userId, String max_behot_time){
ScriptEngineManager manager = new ScriptEngineManager();
ScriptEngine engine = manager.getEngineByName("javascript");
String jsText = getJSText(); // 读取js文件
String str = "0";
if(userId!=null){
str = userId + max_behot_time;
}
try {
engine.eval(jsText);
if(engine instanceof Invocable) {
Invocable invoke = (Invocable)engine;
String sign = invoke.invokeFunction("merge", str).toString();
return sign;
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* @Title: getAS
* @Description: TODO(获取今日头条加密值)
* @param @return 设定文件
* @return String 返回类型
*/
private void getASCP()
{
long i = (long)Math.floor(new Date().getTime()/1000L);
String t = Long.toHexString(i).toUpperCase();
char[] ts = t.toCharArray();
String e = parseStrToMd5L32(i+"").toString().toUpperCase();
char[] s = e.substring(0, 5).toCharArray();
char[] a = e.substring(e.length()-5,e.length()).toCharArray();
String c = "";
String o = "";
for(int n = 0; 5 > n; n++)
{
o += ""+s[n] + ts[n];
}
for (int r = 0; 5 > r; r++)
{
c += ""+ts[r + 3] + a[r];
}
String as = "A1" + o + t.substring(t.length()-3, t.length());
String cp = t.substring(0,3) + c + "E1";
this.as = as;
this.cp = cp;
}
/**
* 计算字符串Md5
* @Title: md5
* @param str
* @return String
*/
public static String md5(String str) {
String result = null;
try {
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] bytes = str.getBytes("utf-8");
md.update(bytes);
bytes = md.digest();
result = bytesToHexString(bytes);
} catch(Exception e) {}
return result;
}
/**
* 将二进制转换成16进制字符串
* @Title bytesToHexString
* @param buf
* @return String
*/
private static String bytesToHexString(byte bytes[]) {
String result = null;
if(bytes != null) {
if(bytes.length > 0) {
StringBuffer sb = new StringBuffer();
for(int i = 0; i < bytes.length; i++) {
String hex = Integer.toHexString(bytes[i] & 0xFF);
if (hex.length() == 1) {
hex = '0' + hex;
}
sb.append(hex);
}
result = sb.toString().toLowerCase();
}
}
return result;
}
/**
* @param str
* @return
* @Date: 2013-9-6
* @Author: lulei
* @Description: 32位小写MD5
*/
public static String parseStrToMd5L32(String str){
String reStr = null;
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
byte[] bytes = md5.digest(str.getBytes());
StringBuffer stringBuffer = new StringBuffer();
for (byte b : bytes){
int bt = b&0xff;
if (bt < 16){
stringBuffer.append(0);
}
stringBuffer.append(Integer.toHexString(bt));
}
reStr = stringBuffer.toString();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
return reStr;
}
/**
* 读取js文件
* @return
*/
private String getJSText() {
try {
StringBuffer sb = new StringBuffer();
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("signature.js");
BufferedReader br=new BufferedReader(new InputStreamReader(is));
String line = "";
while((line = br.readLine())!=null)
{
sb.append(line);
}
br.close();
return sb.toString();
} catch (IOException e) {
return null;
}
}
}
......@@ -13,8 +13,10 @@ import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.tools.tools.URLCodeUtil;
import com.zhiwei.tools.tools.ZhiWeiTools;
import com.zhiwei.toutiao.bean.Signature;
import com.zhiwei.toutiao.bean.TouTiaoAccount;
import com.zhiwei.toutiao.util.Tools;
......@@ -142,16 +144,18 @@ public class TouTiaoAccountParse {
* @param @return 设定文件
* @return List<TouTiaoAccount> 返回类型
*/
@Deprecated
public static List<TouTiaoAccount> getFriendsList(String userid,Proxy proxy,long sleep){
List<TouTiaoAccount> ttaList = new ArrayList<>();
boolean more = true;
int page = 0;
while(more){
String url = "http://is.snssdk.com/user/following/?offset="+page*50+"&device_id=35330393347&count=50&user_id="+userid+"&ts="+System.currentTimeMillis()/1000;
String url = "http://it-hl.snssdk.com/user/relation/following/v2/?user_id="+userid+"&device_id=54560738994&cursor=&iid=53238029655&offset="+page*50+"&count=50&ts="+System.currentTimeMillis()/1000;
System.out.println(url);
page++;
headerMap = Tools.getTouTiaoHeader();
headerMap.put("Host", "is.snssdk.com");
headerMap.put("User-Agent", "Dalvik/2.1.0 (Linux; U; Android 8.1.0; MI 8 MIUI/V10.0.11.0.OEACNFH) NewsArticle/7.0.1 cronet/TTNetVersion:pre_blink_merge-277498-gd2bb364e 2018-08-24");
headerMap.put("Host", "it-hl.snssdk.com");
try {
String htmlBody = null;
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
......@@ -178,6 +182,40 @@ public class TouTiaoAccountParse {
}
public static List<TouTiaoAccount> getFriendsList(String userid, ProxyHolder proxy){
List<TouTiaoAccount> ttaList = new ArrayList<>();
Signature signature = new Signature(userid, "0");
String _signature = signature.getSignature();
boolean more = true;
while(more){
String url = "https://www.toutiao.com/c/user/following/?user_id="+userid+"&cursor=0&count=100&_signature="+_signature;
System.out.println(url);
headerMap = Tools.getTouTiaoHeader();
headerMap.put("referer", "ihttps://www.toutiao.com/c/user/relation/"+ userid +"/?tab=following");
headerMap.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
try {
String htmlBody = null;
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url,headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("name")){
JSONObject json = JSONObject.parseObject(htmlBody);
more = json.getBooleanValue("has_more");
List<TouTiaoAccount> dataList = parseFans(json);
if(dataList!=null && !dataList.isEmpty()){
ttaList.addAll(dataList);
}else{
more = false;
}
}else{
more = false;
}
} catch (Exception e) {
logger.error("获取今日头条帐号数据连接超时", e.fillInStackTrace());
more = false;
return null;
}
}
return ttaList;
}
/**
* @Title: parseHtmlByAccount
......@@ -356,6 +394,60 @@ public class TouTiaoAccountParse {
}
/**
* 解析头条粉丝列表新接口
* @param json
* @return
*/
private static List<TouTiaoAccount> parseFans(JSONObject json) {
List<TouTiaoAccount> ttaList = null;
try {
ttaList = new ArrayList<>();
JSONArray jsonArray = json.getJSONArray("data");
Long user_id = null;
String name = null;
Long media_id = null;
String description = null;
Integer user_verified = null;
String verify_content = null;
int follow_count = 0;
String img_url = null;
Date create_time = null;
String gender = null;
String user_type = null;
TouTiaoAccount tta = null;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
user_id = data.getLong("user_id");
media_id = data.getLong("media_id");
name = data.getString("name");
img_url = "https:"+data.getString("avatar_url");
user_verified = data.getInteger("user_verified");
verify_content = data.getString("verified_content");
tta = new TouTiaoAccount(user_id, name, media_id, description, user_verified,
verify_content, follow_count, img_url, create_time, gender, user_type);
ttaList.add(tta);
} catch (Exception e) {
logger.error("数据解析出现问题,{}", e);
continue;
}
}
} catch (Exception e) {
logger.error("数据解析出现问题,{}", e);
return null;
}
return ttaList;
}
/***
* @Title: parseHtmlByFans
* @author hero
......
......@@ -28,6 +28,7 @@ import com.zhiwei.crawler.core.HttpBoot;
import com.zhiwei.crawler.core.RequestUtils;
import com.zhiwei.crawler.proxy.ProxyHolder;
import com.zhiwei.tools.timeparse.TimeParse;
import com.zhiwei.toutiao.bean.Signature;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.toutiao.util.Tools;
......@@ -51,10 +52,10 @@ public class TouTiaoArticleParse {
* @return List<TouTiao> 返回类型
* @throws Exception
*/
@Deprecated
public static Map<String, Object> getTouTiaoList(String media_id, String max_behot_time,Date endData, Proxy proxy ) throws Exception{
String as=Tools.getAS().split("_")[0];
String cp=Tools.getAS().split("_")[1];
String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id="+media_id+"&count=20&as="+as+"&cp="+cp;
Signature signature = new Signature();
String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id="+media_id+"&count=20&as="+signature.getAs()+"&cp="+signature.getCp();
if(max_behot_time!=null){
url = url + "&max_behot_time="+max_behot_time;
}
......@@ -79,10 +80,11 @@ public class TouTiaoArticleParse {
return null;
}
@Deprecated
public static Map<String, Object> getTouTiaoList(String media_id, Long max_behot_time,Date endData, ProxyHolder proxy ) throws Exception{
String as=Tools.getAS().split("_")[0];
String cp=Tools.getAS().split("_")[1];
Signature signature = new Signature();
String as=signature.getAs();
String cp=signature.getCp();
String url = "https://www.toutiao.com/pgc/ma/?page_type=1&media_id="+media_id+"&count=20&as="+as+"&cp="+cp;
if(max_behot_time!=null){
url = url + "&max_behot_time="+max_behot_time;
......@@ -102,12 +104,81 @@ public class TouTiaoArticleParse {
logger.info("数据为null");
}
} catch (Exception e) {
logger.error("获取今日头条帐号数据连接超时", e);
throw e;
}
return null;
}
/**
* 获取今日头条历史文章接口新
* @param user_id
* @param max_behot_time
* @param endData
* @param proxy
* @return
* @throws Exception
*/
public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time,Date endData, Proxy proxy ) throws Exception{
Signature signature = new Signature(user_id, max_behot_time);
String as=signature.getAs();
String cp=signature.getCp();
String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id="+user_id+"&max_behot_time="+max_behot_time+"&count=20&as="+as+"&cp="+cp+"&_signature="+signature;
if(max_behot_time!=null){
url = url + "&max_behot_time="+max_behot_time;
}
System.out.println("url=========="+url);
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
headerMap.put("referer","https://www.toutiao.com/c/user/"+user_id+"/");
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){
Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData);
if(ttList!=null && ttList.size()>0){
return ttList;
}
}else{
logger.info("数据为null");
}
} catch (Exception e) {
logger.error("获取今日头条帐号数据连接超时", e.fillInStackTrace());
throw e;
}
return null;
}
public static Map<String, Object> getTouTiaoHistory(String user_id, String max_behot_time,Date endData, ProxyHolder proxy ) throws Exception{
Signature signature = new Signature(user_id, max_behot_time);
String as=signature.getAs();
String cp=signature.getCp();
String url = "https://www.toutiao.com/c/user/article/?page_type=1&user_id="+user_id+"&max_behot_time="+max_behot_time+"&count=20&as="+as+"&cp="+cp+"&_signature="+signature;
if(max_behot_time!=null){
url = url + "&max_behot_time="+max_behot_time;
}
System.out.println("url=========="+url);
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36");
headerMap.put("referer","https://www.toutiao.com/c/user/"+user_id+"/");
String htmlBody = null;
try {
htmlBody = httpBoot.syncCall(RequestUtils.wrapGet(url, headerMap), proxy).body().string();
if(htmlBody != null && htmlBody.contains("behot_time")){
Map<String, Object> ttList = parseHtmlByAccount(user_id, htmlBody, endData);
if(ttList!=null && ttList.size()>0){
return ttList;
}
}else{
logger.info("数据为null");
}
} catch (Exception e) {
logger.error("获取今日头条帐号数据连接超时", e);
throw e;
}
return null;
}
/***
* 根据帐号解析历史文章地址
......@@ -119,14 +190,13 @@ public class TouTiaoArticleParse {
* @return List<String> 返回类型
*/
private static Map<String, Object> parseHtmlByAccount(String htmlBody, Date endDate) {
Map<String, Object> map = new HashMap<String, Object>();
String max_behot_time = null;
Long max_behot_time = null;
List<TouTiaoArticle> dataList = new ArrayList<TouTiaoArticle>();
try {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONArray jsonArray = json.getJSONArray("data");
max_behot_time = json.getJSONObject("next").getString("max_behot_time");
max_behot_time = Long.valueOf(json.getJSONObject("next").getString("max_behot_time"));
String title = null;
String content = null;
String time = null;
......@@ -148,10 +218,7 @@ public class TouTiaoArticleParse {
content = data.getString("abstract");
time = data.getLongValue("behot_time")*1000+"";
date = TimeParse.stringFormartDate(time);
readNum = data.getString("total_read_count");
if(readNum == null) {
readNum = data.getInteger("internal_visit_count")+"";
}
readNum = data.getString("go_detail_count");
commentNum = data.getString("comments_count");
playNum = data.getString("detail_play_effective_count");
shareNum = data.getString("share_count");
......@@ -174,20 +241,85 @@ public class TouTiaoArticleParse {
return null;
}
if(endDate!=null){
if(max_behot_time!=null && !"0".equals(max_behot_time)){
Date nextDate = new Date(Long.valueOf(max_behot_time+"000"));
if(endDate.after(nextDate)){
max_behot_time = null;
}
}
}
map.put("max_behot_time", max_behot_time);
map.put("data", dataList);
return map;
}
private static Map<String, Object> parseHtmlByAccount(String user_id, String htmlBody, Date endDate) {
Map<String, Object> map = new HashMap<String, Object>();
Long max_behot_time = null;
List<TouTiaoArticle> dataList = new ArrayList<TouTiaoArticle>();
try {
JSONObject json = JSONObject.parseObject(htmlBody);
JSONArray jsonArray = json.getJSONArray("data");
max_behot_time = Long.valueOf(json.getJSONObject("next").getString("max_behot_time"));
String title = null;
String content = null;
String time = null;
Date date = null;
String readNum = null;
String commentNum = null;
String playNum = null;
String shareNum = null;
String source = null;
List<String> labelList = null;
for (int i = 0; i < jsonArray.size(); i++) {
try {
JSONObject data = jsonArray.getJSONObject(i);
String href = "https://www.toutiao.com/";
if(data.containsKey("group_id")){
href = href+"a"+data.getLongValue("group_id");
title = data.getString("title");
content = data.getString("abstract");
time = data.getLongValue("behot_time")*1000+"";
date = TimeParse.stringFormartDate(time);
readNum = data.getString("go_detail_count");
commentNum = data.getString("comments_count");
playNum = data.getString("detail_play_effective_count");
shareNum = data.getString("share_count");
source = data.getString("source");
TouTiaoArticle tt = new TouTiaoArticle(href, title, user_id, source, date, content, commentNum, playNum, readNum, shareNum,"今日头条");
if(data.containsKey("label")){
labelList = data.getJSONArray("label").toJavaList(String.class);
tt.setLabelList(labelList);
}
dataList.add(tt);
}
} catch (Exception e) {
logger.error("数据解析出现问题,{}", e.getMessage());
continue;
}
}
} catch (Exception e) {
logger.error("数据解析出现问题,{}", e.getMessage());
return null;
}
if(endDate!=null){
if(max_behot_time!=null && !"0".equals(max_behot_time)){
Date nextDate = new Date(Long.valueOf(max_behot_time+"000"));
if(endDate.after(nextDate)){
max_behot_time = null;
}
}
}
map.put("max_behot_time", max_behot_time);
map.put("data", dataList);
return map;
}
/**
* @Title: getMicroTouTiaoCrawler
* @author hero
......
package com.zhiwei.toutiao.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
......@@ -14,132 +7,10 @@ public class Tools {
public static String getText(String textFileName) {
try {
StringBuffer sb = new StringBuffer();
InputStream is = Thread.currentThread().getContextClassLoader()
.getResourceAsStream("tac_sign.txt");
BufferedReader br=new BufferedReader(new InputStreamReader(is));
String line = "";
while((line = br.readLine())!=null)
{
sb.append(line);
}
br.close();
return sb.toString();
} catch (IOException e) {
return null;
}
}
/**
* @Title: getAS
* @Description: TODO(获取今日头条加密值)
* @param @return 设定文件
* @return String 返回类型
*/
public static String getAS()
{
long i = (long)Math.floor(new Date().getTime()/1000L);
String t = Long.toHexString(i).toUpperCase();
char[] ts = t.toCharArray();
String e = parseStrToMd5L32(i+"").toString().toUpperCase();
// System.out.println(i+"========"+t);
char[] s = e.substring(0, 5).toCharArray();
char[] a = e.substring(e.length()-5,e.length()).toCharArray();
String c = "";
String o = "";
for(int n = 0; 5 > n; n++)
{
o += ""+s[n] + ts[n];
}
for (int r = 0; 5 > r; r++)
{
c += ""+ts[r + 3] + a[r];
}
String as = "A1" + o + t.substring(t.length()-3, t.length());
String cp = t.substring(0,3) + c + "E1";
return as + "_" + cp;
}
// public static void main(String[] args) {
// Tools.getAS();
// }
/**
* 计算字符串Md5
* @Title: md5
* @param str
* @return String
*/
public static String md5(String str) {
String result = null;
try {
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] bytes = str.getBytes("utf-8");
md.update(bytes);
bytes = md.digest();
result = bytesToHexString(bytes);
} catch(Exception e) {}
return result;
}
/**
* 将二进制转换成16进制字符串
* @Title bytesToHexString
* @param buf
* @return String
*/
private static String bytesToHexString(byte bytes[]) {
String result = null;
if(bytes != null) {
if(bytes.length > 0) {
StringBuffer sb = new StringBuffer();
for(int i = 0; i < bytes.length; i++) {
String hex = Integer.toHexString(bytes[i] & 0xFF);
if (hex.length() == 1) {
hex = '0' + hex;
}
sb.append(hex);
}
result = sb.toString().toLowerCase();
}
}
return result;
}
/**
* @param str
* @return
* @Date: 2013-9-6
* @Author: lulei
* @Description: 32位小写MD5
*/
public static String parseStrToMd5L32(String str){
String reStr = null;
try {
MessageDigest md5 = MessageDigest.getInstance("MD5");
byte[] bytes = md5.digest(str.getBytes());
StringBuffer stringBuffer = new StringBuffer();
for (byte b : bytes){
int bt = b&0xff;
if (bt < 16){
stringBuffer.append(0);
}
stringBuffer.append(Integer.toHexString(bt));
}
reStr = stringBuffer.toString();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
return reStr;
}
/**
* @Title: getTouTiaoHeader
......@@ -266,4 +137,8 @@ public class Tools {
return headerMap;
}
}
......@@ -3,6 +3,7 @@ package com.zhiwei.toutiao.test;
import java.util.List;
import java.util.Map;
import com.zhiwei.toutiao.bean.Signature;
import com.zhiwei.toutiao.bean.TouTiaoArticle;
import com.zhiwei.toutiao.parse.TouTiaoChannelParse;
import com.zhiwei.toutiao.util.Tools;
......@@ -23,8 +24,9 @@ public class TouTiaoChannelExample {
if( i==0 ){
max_behot_time = 0;
}
String as = Tools.getAS().split("_")[0];
String cp = Tools.getAS().split("_")[1];
Signature signature = new Signature();
String as = signature.getAs();
String cp = signature.getCp();
String url = "http://www.toutiao.com/api/pc/feed/?category=news_tech&utm_source=toutiao"
+ "&widen=1&max_behot_time="+max_behot_time+"&max_behot_time_tmp="+max_behot_time
+"&tadrequire=true&as=" +as +"&cp=" + cp;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment