Commit 53e3fcd8 by chenweitao

修改脉脉采集id上限

parent fcee33bb
...@@ -14,6 +14,7 @@ import okhttp3.Response; ...@@ -14,6 +14,7 @@ import okhttp3.Response;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
...@@ -37,6 +38,8 @@ public class MaiMaiHotSearchCrawler { ...@@ -37,6 +38,8 @@ public class MaiMaiHotSearchCrawler {
} catch (IOException e) { } catch (IOException e) {
log.error("脉脉热榜页面连接异常...", e); log.error("脉脉热榜页面连接异常...", e);
} }
//1024 - 26(时间戳+type) = 998 -> name.getBytes(StandardCharsets.UTF_8).length<998 -> 998/3 = 332
int nameLengthMax = 332;
if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("feeds")) { if (StringUtils.isNotBlank(htmlBody) && htmlBody.contains("feeds")) {
JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("feeds"); JSONArray jsonArray = JSONObject.parseObject(htmlBody).getJSONArray("feeds");
if (jsonArray != null) { if (jsonArray != null) {
...@@ -45,8 +48,10 @@ public class MaiMaiHotSearchCrawler { ...@@ -45,8 +48,10 @@ public class MaiMaiHotSearchCrawler {
JSONObject jsonObject = jsonArray.getJSONObject(i).getJSONObject("style35"); JSONObject jsonObject = jsonArray.getJSONObject(i).getJSONObject("style35");
if(jsonObject != null) { if(jsonObject != null) {
String name = jsonObject.getString("text"); String name = jsonObject.getString("text");
if (name.length()>750){ // 1024 - 26(时间戳+type)
name = name.substring(0,750); // name.getBytes(StandardCharsets.UTF_8).length<998)
if (name.length()>nameLengthMax){
name = name.substring(0,nameLengthMax);
} }
String maimaiUrl = jsonObject.getString("share_url"); String maimaiUrl = jsonObject.getString("share_url");
String icon = null; String icon = null;
...@@ -56,9 +61,6 @@ public class MaiMaiHotSearchCrawler { ...@@ -56,9 +61,6 @@ public class MaiMaiHotSearchCrawler {
String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info"); String hotValue = jsonArray.getJSONObject(i).getJSONObject("common").getString("hot_info");
Long count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0; Long count = hotValue.length() > 0 ? TipsUtils.getHotCount(hotValue) : 0;
HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date); HotSearchList hotSearchList = new HotSearchList(maimaiUrl, name, count, null, rank, HotSearchType.脉脉热榜.name(), icon, date);
if (hotSearchList.getName().length()>800){
log.info("脉脉数据名称过长,长度为:{},内容:{}",hotSearchList.getName().length(),hotSearchList.getName());
}
list.add(hotSearchList); list.add(hotSearchList);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment