Commit d431fc6a by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !130
parents 1e591b17 8ae432b8
...@@ -50,7 +50,7 @@ redis.host = 192.168.0.39 ...@@ -50,7 +50,7 @@ redis.host = 192.168.0.39
redis.port = 6379 redis.port = 6379
redis.database = 1 redis.database = 1
重启2 重启3
......
...@@ -6,6 +6,8 @@ import java.net.URLEncoder; ...@@ -6,6 +6,8 @@ import java.net.URLEncoder;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.*; import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
...@@ -130,7 +132,8 @@ public class WeiboHotSearchCrawler { ...@@ -130,7 +132,8 @@ public class WeiboHotSearchCrawler {
* @Description: TODO(手机端Iphone 微博热搜采集) * @Description: TODO(手机端Iphone 微博热搜采集)
*/ */
public static List<HotSearchList> weiboHotSearchByPhone(Date date) { public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583"; //String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
Map<String, String> headerMap = new HashMap<>(); Map<String, String> headerMap = new HashMap<>();
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
String htmlBody = null; String htmlBody = null;
...@@ -144,12 +147,11 @@ public class WeiboHotSearchCrawler { ...@@ -144,12 +147,11 @@ public class WeiboHotSearchCrawler {
List<HotSearchList> result = new ArrayList<HotSearchList>(); List<HotSearchList> result = new ArrayList<HotSearchList>();
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) { if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
try { try {
JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data"); JSONArray json = JSONObject.parseObject(htmlBody).getJSONArray("cards");
JSONArray cards = json.getJSONArray("cards");
int rank = 0; int rank = 0;
// for (int i = 0; i < cards.size(); i++) { // for (int i = 0; i < cards.size(); i++) {
try { try {
JSONObject card = cards.getJSONObject(0); JSONObject card = json.getJSONObject(0);
JSONArray cardGroup = card.getJSONArray("card_group"); JSONArray cardGroup = card.getJSONArray("card_group");
JSONObject topCard = cardGroup.getJSONObject(0); JSONObject topCard = cardGroup.getJSONObject(0);
if (!topCard.containsKey("pic")) { if (!topCard.containsKey("pic")) {
...@@ -168,15 +170,13 @@ public class WeiboHotSearchCrawler { ...@@ -168,15 +170,13 @@ public class WeiboHotSearchCrawler {
String desc_extr = cardInfo.getString("desc_extr"); String desc_extr = cardInfo.getString("desc_extr");
String heatLabel=null; String heatLabel=null;
Long hotCount =null; Long hotCount =null;
if (Objects.nonNull(desc_extr)){ if (!StringUtils.isEmpty(desc_extr)&&Objects.nonNull(desc_extr)){
String[] split = desc_extr.split(" "); String regEx="[^0-9]";
if (split.length>1){ Pattern p = Pattern.compile(regEx);
heatLabel= split[0].trim(); Matcher m = p.matcher(desc_extr);
hotCount= Long.valueOf(split[1].trim()); String num = m.replaceAll("").trim();
hotCount = Long.valueOf(num);
}else { heatLabel= desc_extr.split(" ")[0];
hotCount = cardInfo.getLongValue("desc_extr");
}
} }
String iconUrl = cardInfo.getString("icon"); String iconUrl = cardInfo.getString("icon");
String icon=null; String icon=null;
...@@ -212,6 +212,96 @@ public class WeiboHotSearchCrawler { ...@@ -212,6 +212,96 @@ public class WeiboHotSearchCrawler {
return Collections.emptyList(); return Collections.emptyList();
} }
// /**
// * @return void 返回类型
// * @Title: weiboHotSearchByPhoneTest
// * @author hero
// * @Description: TODO(手机端Iphone 微博热搜采集)
// */
// public static List<HotSearchList> weiboHotSearchByPhone(Date date) {
// //String url = "https://m.weibo.cn/api/container/getIndex?containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot&title=%E5%BE%AE%E5%8D%9A%E7%83%AD%E6%90%9C&extparam=pos%3D0_0%26mi_cid%3D100103%26cate%3D10103%26filter_type%3Drealtimehot%26c_type%3D30&luicode=10000011&lfid=231583";
// String url = "https://api.weibo.cn/2/guest/page?c=android&s=3d477777&from=10A8395010&gsid=_2AkMoFNQvf8NhqwJRm_gWy2rkbo1_yA7EieKeSCX0JRM3HRl-wT9kqkIltRV6A-gElEGNj31RgrfclQ31YPAf7UBZPBx2&containerid=106003type%3D25%26t%3D3%26disable_hot%3D1%26filter_type%3Drealtimehot";
// Map<String, String> headerMap = new HashMap<>();
// headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36");
// String htmlBody = null;
// Request request = RequestUtils.wrapGet(url, headerMap);
// for (int count = 0; count <= 5; count++) {
// try (Response response = httpBoot.syncCall(request, ProxyHolder.NAT_HEAVY_PROXY)) {
// htmlBody = response.body().string();
// } catch (IOException e) {
// log.error("解析微博时热搜时出现连接失败", e);
// }
// List<HotSearchList> result = new ArrayList<HotSearchList>();
// if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("cards")) {
// try {
// JSONObject json = JSONObject.parseObject(htmlBody).getJSONObject("data");
// JSONArray cards = json.getJSONArray("cards");
// int rank = 0;
//// for (int i = 0; i < cards.size(); i++) {
// try {
// JSONObject card = cards.getJSONObject(0);
// JSONArray cardGroup = card.getJSONArray("card_group");
// JSONObject topCard = cardGroup.getJSONObject(0);
// if (!topCard.containsKey("pic")) {
// rank = 1;
// }
// if (Objects.nonNull(cardGroup) && !cardGroup.isEmpty()) {
//// String title = card.getString("title");
// boolean hot = true;
//// if (Objects.nonNull(title) && title.contains("实时上升热点")) {
//// hot = false;
//// rank = 51;
//// }
// for (int j = 0; j < cardGroup.size(); j++) {
// JSONObject cardInfo = cardGroup.getJSONObject(j);
// String name = cardInfo.getString("desc");
// String desc_extr = cardInfo.getString("desc_extr");
// String heatLabel=null;
// Long hotCount =null;
// if (Objects.nonNull(desc_extr)){
// String[] split = desc_extr.split(" ");
// if (split.length>1){
// heatLabel= split[0].trim();
// hotCount= Long.valueOf(split[1].trim());
//
// }else {
// hotCount = cardInfo.getLongValue("desc_extr");
// }
// }
// String iconUrl = cardInfo.getString("icon");
// String icon=null;
// if (StringUtils.isNotBlank(iconUrl)) {
// icon = iconUrl.split("_")[1].split(".png")[0];
// }
//// String id = "http://s.weibo.com/weibo/" + URLCodeUtil.getURLEncode(name, "utf-8") + "&Refer=top";
// String id = cardInfo.getString("scheme");
// HotSearchList hotSearch = new HotSearchList(id, name, hotCount, hot, rank, HotSearchType.微博热搜.name(), icon, date);
// hotSearch.setHeatLabel(heatLabel);
// if (Objects.nonNull(iconUrl)){hotSearch.setIconUrl(iconUrl);}
// result.add(hotSearch);
// rank++;
// redisDao.addDataToSet(RedisConfig.WEIBO_HOTSEARCHIDS, name + "_微博热搜");
// }
// } else {
// log.info("card 数据结构为:{}", card);
// }
// } catch (Exception e) {
// log.error("解析微博时热搜时出现解析错误", e);
// continue;
// }
//// }
//
// return result;
// } catch (Exception e) {
// log.error("解析微博时热搜时出现解析错误,数据不是json结构", e);
// }
// } else {
// log.info("解析微博时热搜时出现解析错误,页面结构有问题");
// }
// }
// return Collections.emptyList();
// }
/** /**
* 微博预热榜(实时上升热点采集) * 微博预热榜(实时上升热点采集)
* *
......
...@@ -218,7 +218,7 @@ public class HotSearchCacheDAO { ...@@ -218,7 +218,7 @@ public class HotSearchCacheDAO {
nowDoc.put("pictureUrl",pictureUrl); nowDoc.put("pictureUrl",pictureUrl);
} }
if("微博热搜".equals(type)){ if("微博热搜".equals(type)){
nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc); //nowDoc = WeiboHotSearchCrawler.weiboUpdate(nowDoc);
//更新微博话题贡献者,关于功能 //更新微博话题贡献者,关于功能
Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc); Document documentPC = WeiboHotSearchCrawler.weiboUpdatePC(nowDoc);
if (documentPC.containsKey("分类")) { if (documentPC.containsKey("分类")) {
......
package com.zhiwei.searchhotcrawler.util; package com.zhiwei.searchhotcrawler.util;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import sun.misc.BASE64Decoder;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.SecureRandom;
import java.util.Arrays; import java.util.Arrays;
import java.util.Objects;
import javax.crypto.Cipher; import javax.crypto.Cipher;
import javax.crypto.KeyGenerator;
import javax.crypto.spec.IvParameterSpec; import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec; import javax.crypto.spec.SecretKeySpec;
import static java.util.Objects.isNull;
/** /**
* @author * @author
* @version V1.0 * @version V1.0
...@@ -14,6 +24,9 @@ import javax.crypto.spec.SecretKeySpec; ...@@ -14,6 +24,9 @@ import javax.crypto.spec.SecretKeySpec;
* @date 2017-12-28 14:26 * @date 2017-12-28 14:26
**/ **/
public class AESUtils { public class AESUtils {
//默认偏移
public static final String VI_STR = "0102030405060708";
private static final String ALGORITHMSTR = "AES/ECB/PKCS5Padding";
private AESUtils() { private AESUtils() {
} }
...@@ -25,11 +38,19 @@ public class AESUtils { ...@@ -25,11 +38,19 @@ public class AESUtils {
* @return 加密后的字符串 * @return 加密后的字符串
*/ */
public static String encrypt(String secret, String value) { public static String encrypt(String secret, String value) {
return encrypt(secret, value,VI_STR);
}
public static String encrypt(String secret, String value,String ivStr) {
SecretKeySpec keySpec = getKey(secret); SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try { try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv); if (isNull(ivStr)){
cipher.init(Cipher.ENCRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
}
byte[] encrypted = cipher.doFinal(value.getBytes("UTF-8")); byte[] encrypted = cipher.doFinal(value.getBytes("UTF-8"));
return parseByte2HexStr(encrypted); return parseByte2HexStr(encrypted);
} catch (Exception e) { } catch (Exception e) {
...@@ -37,13 +58,31 @@ public class AESUtils { ...@@ -37,13 +58,31 @@ public class AESUtils {
} }
} }
public static byte[] encrypt(byte[] secretBytes, byte[] valueBytes,String ivStr) {
SecretKeySpec keySpec = new SecretKeySpec(Arrays.copyOf(secretBytes, 16), "AES");
try {
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.ENCRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.ENCRYPT_MODE, keySpec, iv);
}
byte[] encrypted = cipher.doFinal(valueBytes);
return encrypted;
// return parseByte2HexStr(encrypted);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) { public static void main(String[] args) {
String jsm=AESUtils.encrypt("wechat", "shenjinzhu"); String jsm=AESUtils.encrypt("wechat", "shenjinzhu");
System.out.println(jsm); System.out.println(jsm);
String jm=AESUtils.decrypt("wechat", jsm); String jm=AESUtils.decrypt("wechat", jsm);
System.out.println(jm); System.out.println(jm);
} }
/** /**
* 解密 * 解密
* *
...@@ -52,11 +91,27 @@ public class AESUtils { ...@@ -52,11 +91,27 @@ public class AESUtils {
* @return 解密后的字符串 * @return 解密后的字符串
*/ */
public static String decrypt(String secret, String value) { public static String decrypt(String secret, String value) {
return decrypt(secret,value,VI_STR);
}
/**
* 解密
*
* @param secret 密钥
* @param value 待解密字符串
* @param ivStr 偏移字符
* @return 解密后的字符串
*/
public static String decrypt(String secret, String value,String ivStr) {
SecretKeySpec keySpec = getKey(secret); SecretKeySpec keySpec = getKey(secret);
IvParameterSpec iv = new IvParameterSpec("0102030405060708".getBytes());
try { try {
Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv); if (isNull(ivStr)){
cipher.init(Cipher.DECRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
}
byte[] encrypted1 = parseHexStr2Byte(value); byte[] encrypted1 = parseHexStr2Byte(value);
byte[] original = cipher.doFinal(encrypted1); byte[] original = cipher.doFinal(encrypted1);
return new String(original, "UTF-8"); return new String(original, "UTF-8");
...@@ -66,6 +121,32 @@ public class AESUtils { ...@@ -66,6 +121,32 @@ public class AESUtils {
} }
/** /**
* 解密
*
* @param secretBytes 密钥组
* @param valueBytes 待解密字符组
* @param ivStr 偏移字符
* @return 解密后的字符串
*/
public static String decrypt(byte[] secretBytes, byte[] valueBytes,String ivStr) {
SecretKeySpec keySpec = new SecretKeySpec(Arrays.copyOf(secretBytes, 16), "AES");
try {
Cipher cipher = Cipher.getInstance(ALGORITHMSTR);
if (isNull(ivStr)){
cipher.init(Cipher.DECRYPT_MODE, keySpec);
}else {
IvParameterSpec iv = new IvParameterSpec(ivStr.getBytes());
cipher.init(Cipher.DECRYPT_MODE, keySpec, iv);
}
// byte[] encrypted1 = parseHexStr2Byte(value);
byte[] original = cipher.doFinal(valueBytes);
return new String(original, "UTF-8");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 生成加密的密钥,保证长度为16位 * 生成加密的密钥,保证长度为16位
* *
* @param secret 用户的密钥 * @param secret 用户的密钥
...@@ -75,6 +156,7 @@ public class AESUtils { ...@@ -75,6 +156,7 @@ public class AESUtils {
byte[] bytes; byte[] bytes;
try { try {
bytes = secret.getBytes("UTF-8"); bytes = secret.getBytes("UTF-8");
// return new SecretKeySpec(Arrays.copyOf(bytes, 32), "AES");
return new SecretKeySpec(Arrays.copyOf(bytes, 16), "AES"); return new SecretKeySpec(Arrays.copyOf(bytes, 16), "AES");
} catch (UnsupportedEncodingException e) { } catch (UnsupportedEncodingException e) {
e.printStackTrace(); e.printStackTrace();
...@@ -107,8 +189,9 @@ public class AESUtils { ...@@ -107,8 +189,9 @@ public class AESUtils {
* @return * @return
*/ */
public static byte[] parseHexStr2Byte(String hexStr) { public static byte[] parseHexStr2Byte(String hexStr) {
if (hexStr.length() < 1) if (hexStr.length() < 1){
return null; return null;
}
byte[] result = new byte[hexStr.length() / 2]; byte[] result = new byte[hexStr.length() / 2];
for (int i = 0; i < hexStr.length() / 2; i++) { for (int i = 0; i < hexStr.length() / 2; i++) {
int high = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 1), 16); int high = Integer.parseInt(hexStr.substring(i * 2, i * 2 + 1), 16);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment