Commit d431fc6a by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !130
parents 1e591b17 8ae432b8
......@@ -50,7 +50,7 @@ redis.host = 192.168.0.39
redis.port = 6379
redis.database = 1
重启2
重启3
......
......@@ -6,6 +6,8 @@ import java.net.URLEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.alibaba.fastjson.JSON;
......@@ -130,7 +132,8 @@ public class WeiboHotSearchCrawler {
* @Description: TODO(手机端Iphone 微博热搜采集)
*/
public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
//String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
Map<String, String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody = null;
......@@ -144,12 +147,11 @@ public class WeiboHotSearchCrawler {
List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
JSONArray cards = json.getJSONArray("cards");
JSONArray json = JSONObject.parseObject(htmlBody).getJSONArray("cards");
int rank = 0;
// for (int i = 0; i < cards.size(); i++) {
try {
JSONObject card = cards.getJSONObject(0);
JSONObject card = json.getJSONObject(0);
JSONArray cardGroup = card.getJSONArray("card_group");
JSONObject topCard = cardGroup.getJSONObject(0);
if (!topCard.containsKey("pic")) {
......@@ -168,15 +170,13 @@ public class WeiboHotSearchCrawler {
String desc_extr = cardInfo.getString("desc_extr");
String heatLabel=null;
Long hotCount =null;
if (Objects.nonNull(desc_extr)){
String[] split = desc_extr.split(" ");
if (split.length>1){
heatLabel= split[0].trim();
hotCount= Long.valueOf(split[1].trim());
}else {
hotCount = cardInfo.getLongValue("desc_extr");
}
if (!StringUtils.isEmpty(desc_extr)&&Objects.nonNull(desc_extr)){
String regEx="[^0-9]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(desc_extr);
String num = m.replaceAll("").trim();
hotCount = Long.valueOf(num);
heatLabel= desc_extr.split(" ")[0];
}
String iconUrl = cardInfo.getString("icon");
String icon=null;
......@@ -212,6 +212,96 @@ public class WeiboHotSearchCrawler {
return Collections.emptyList();
}
// /**
// * @return void 返回类型
// * @Title: weiboHotSearchByPhoneTest
// * @author hero
// * @Description: TODO(手机端Iphone 微博热搜采集)
// */
// public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
// //String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
// String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
// Map<String, String> headerMap = new HashMap<>();
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
// String htmlBody = null;
// Request request = RequestUtils.wrapGet(url, headerMap);
// for (int count = 0; count <= 5; count++) {
// try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
// htmlBody = response.body().string();
// } catch (IOException e) {
// log.error("解析微博时热搜时出现连接失败", e);
// }
// List<HotSearchList> result = new ArrayList<HotSearchList>();
// if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
// try {
// JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
// JSONArray cards = json.getJSONArray("cards");
// int rank = 0;
//// for (int i = 0; i < cards.size(); i++) {
// try {
// JSONObject card = cards.getJSONObject(0);
// JSONArray cardGroup = card.getJSONArray("card_group");
// JSONObject topCard = cardGroup.getJSONObject(0);
// if (!topCard.containsKey("pic")) {
// rank = 1;
// }
// if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) {
//// String title = card.getString("title");
// boolean hot = true;
//// if (Objects.nonNull(title) && title.contains("实时上升热点")) {
//// hot = false;
//// rank = 51;
//// }
// for (int j = 0; j < cardGroup.size(); j++) {
// JSONObject cardInfo = cardGroup.getJSONObject(j);
// String name = cardInfo.getString("desc");
// String desc_extr = cardInfo.getString("desc_extr");
// String heatLabel=null;
// Long hotCount =null;
// if (Objects.nonNull(desc_extr)){
// String[] split = desc_extr.split(" ");
// if (split.length>1){
// heatLabel= split[0].trim();
// hotCount= Long.valueOf(split[1].trim());
//
// }else {
// hotCount = cardInfo.getLongValue("desc_extr");
// }
// }
// String iconUrl = cardInfo.getString("icon");
// String icon=null;
// if (StringUtils.isNotBlank(iconUrl)) {
// icon = iconUrl.split("_")[1].split(".png")[0];
// }
//// String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
// String id = cardInfo.getString("scheme");
// HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon, date);
// hotSearch.setHeatLabel(heatLabel);
// if (Objects.nonNull(iconUrl)){hotSearch.setIconUrl(iconUrl);}
// result.add(hotSearch);
// rank++;
// redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜");
// }
// } else {
// log.info("card 数据结构为:{}", card);
// }
// } catch (Exception e) {
// log.error("解析微博时热搜时出现解析错误", e);
// continue;
// }
//// }
//
// return result;
// } catch (Exception e) {
// log.error("解析微博时热搜时出现解析错误,数据不是json结构", e);
// }
// } else {
// log.info("解析微博时热搜时出现解析错误,页面结构有问题");
// }
// }
// return Collections.emptyList();
// }
/**
* 微博预热榜(实时上升热点采集)
*
......
......@@ -218,7 +218,7 @@ public class HotSearchCacheDAO {
nowDoc.put("pictureUrl",pictureUrl);
}
if("微博热搜".equals(type)){
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//更新微博话题贡献者,关于功能
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) {
......
package com.zhiwei.searchhotcrawler.util;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import sun.misc.BASE64Decoder;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.SecureRandom;
import java.util.Arrays;
import java.util.Objects;
import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import static java.util.Objects.isNull;
/**
* @author
* @version V1.0
......@@ -14,6 +24,9 @@ import javax.crypto.spec.SecretKeySpec;
* @date 2017-12-28 14:26
**/
public class AESUtils {
//默认偏移
public static final String VI_STR = "0102030405060708";
private static final String ALGORITHMSTR = "AES/ECB/PKCS5Padding";
private AESUtils() {
}
......@@ -25,11 +38,19 @@ public class AESUtils {
* @return 加密后的字符串
*/
public static String encrypt(String secret, String value) {
return encrypt(secret, value,VI_STR);
}
public static String encrypt(String secret, String value,String ivStr) {
SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.ENCRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
}
byte[] encrypted = cipher.doFinal(value.getBytes("UTF-8"));
return parseByte2HexStr(encrypted);
} catch (Exception e) {
......@@ -37,13 +58,31 @@ public class AESUtils {
}
}
public static byte[] encrypt(byte[] secretBytes, byte[] valueBytes,String ivStr) {
SecretKeySpec keySpec = new SecretKeySpec(Arrays.copyOf(secretBytes, 16), "AES");
try {
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.ENCRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
}
byte[] encrypted = cipher.doFinal(valueBytes);
return encrypted;
// return parseByte2HexStr(encrypted);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
String jsm=AESUtils.encrypt("wechat", "shenjinzhu");
System.out.println(jsm);
String jm=AESUtils.decrypt("wechat", jsm);
System.out.println(jm);
}
/**
* 解密
*
......@@ -52,11 +91,27 @@ public class AESUtils {
* @return 解密后的字符串
*/
public static String decrypt(String secret, String value) {
return decrypt(secret,value,VI_STR);
}
/**
* 解密
*
* @param secret 密钥
* @param value 待解密字符串
* @param ivStr 偏移字符
* @return 解密后的字符串
*/
public static String decrypt(String secret, String value,String ivStr) {
SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding");
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.DECRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
}
byte[] encrypted1 = parseHexStr2Byte(value);
byte[] original = cipher.doFinal(encrypted1);
return new String(original, "UTF-8");
......@@ -66,6 +121,32 @@ public class AESUtils {
}
/**
* 解密
*
* @param secretBytes 密钥组
* @param valueBytes 待解密字符组
* @param ivStr 偏移字符
* @return 解密后的字符串
*/
public static String decrypt(byte[] secretBytes, byte[] valueBytes,String ivStr) {
SecretKeySpec keySpec = new SecretKeySpec(Arrays.copyOf(secretBytes, 16), "AES");
try {
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.DECRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
}
// byte[] encrypted1 = parseHexStr2Byte(value);
byte[] original = cipher.doFinal(valueBytes);
return new String(original, "UTF-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 生成加密的密钥,保证长度为16位
*
* @param secret 用户的密钥
......@@ -75,6 +156,7 @@ public class AESUtils {
byte[] bytes;
try {
bytes = secret.getBytes("UTF-8");
// return new SecretKeySpec(Arrays.copyOf(bytes, 32), "AES");
return new SecretKeySpec(Arrays.copyOf(bytes, 16), "AES");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
......@@ -107,8 +189,9 @@ public class AESUtils {
* @return
*/
public static byte[] parseHexStr2Byte(String hexStr) {
if (hexStr.length() < 1)
if (hexStr.length() < 1){
return null;
}
byte[] result = new byte[hexStr.length() / 2];
for (int i = 0; i < hexStr.length() / 2; i++) {
int high = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 1), 16);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment