Commit 7b625977 by chenweitao

Merge branch 'working' into 'master'

Working

See merge request !93
parents df9dbc99 b8f83e22
......@@ -18,6 +18,7 @@ import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.StringUtils;
import org.bson.Document;
import org.checkerframework.checker.units.qual.C;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
......@@ -47,6 +48,9 @@ public class WeiboHotSearchCrawler {
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
private static RedisDao redisDao = new RedisDao();
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
/**
* @Title: weiboHotSearchTest
* @author hero
......@@ -274,9 +278,13 @@ public class WeiboHotSearchCrawler {
}
try {
//调用weiBoMassageDao添加数据
WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//解析cards,获取热门微博、人物
if (Objects.isNull(weiBoMassageDao)){
weiBoMassageDao = new WeiBoMassageDao();
}
if (Objects.isNull(weiBoUserDao)){
weiBoUserDao = new WeiBoUserDao();
}
for (JSONObject jsonObject : cardsJsons) {
if (nonNull(jsonObject) && !jsonObject.isEmpty()) {
if (jsonObject.containsKey("mblog")) {
......@@ -292,14 +300,18 @@ public class WeiboHotSearchCrawler {
if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage);
}
analysisWeiBoUsers(cardGroup, document.getString("name"));
List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name"));
if (!weiBoUserList.isEmpty()){
for (int i = 0; i < weiBoUserList.size(); i++) {
weiBoUserDao.addWeiBoUser(weiBoUserList.get(i));
}
}
}
}
}
} catch (Exception e) {
log.error("解析cards失败,未获得热门微博,人物信息",e);
}
return document;
}
}
......@@ -333,10 +345,9 @@ public class WeiboHotSearchCrawler {
* @param topic
* @return
*/
public static void analysisWeiBoUsers(JSONArray cardGroup, String topic) {
public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList();
//解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) {
if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
......@@ -361,15 +372,10 @@ public class WeiboHotSearchCrawler {
}
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
weiBoUserList.add(weiBoUser);
}
}
return weiBoUserList;
} else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")){
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
......@@ -379,7 +385,6 @@ public class WeiboHotSearchCrawler {
String userName = user.getString("screen_name");
//获取认证信息
String attestationMassage = user.getString("verified_reason");
//获取粉丝数
String followers_count = user.getString("followers_count");
Long followerCount =null;
......@@ -391,16 +396,12 @@ public class WeiboHotSearchCrawler {
}
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
weiBoUserList.add(weiBoUser);
}
return weiBoUserList;
}
}
return Collections.emptyList();
}
......
......@@ -33,8 +33,7 @@ public class WeiBoMassageDao {
* @param weiBoMassage
*/
public void addWeiBoMassage(WeiBoMassage weiBoMassage){
log.info("weiBoMassage对象开始转document对象");
try {
Document document = new Document();
document.put("_id",weiBoMassage.getId());
document.put("userId",weiBoMassage.getUserId());
......@@ -67,14 +66,14 @@ public class WeiBoMassageDao {
document.put("root_text",weiBoMassage.getRoot_text());
document.put("root_source",weiBoMassage.getRoot_source());
}
log.info("weiBoMassage对象转document对象完成");
try {
mongoCollection.insertOne(document);
log.info("数据插入成功");
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e);
}
} catch (Exception e) {
log.error("WeiBoMassage对象转Document对象异常",e);
}
}
......
......@@ -23,7 +23,6 @@ public class WeiBoUserDao {
public WeiBoUserDao() {
String collName = DBConfig.weiBoUserCollName;
mongoCollection = mongoDatabase.getCollection(collName);
//给数据表创建索引
MongoDBTemplate.createIndex(DBConfig.dbName, collName);
}
......@@ -33,9 +32,7 @@ public class WeiBoUserDao {
* @param weiBoUser
*/
public void addWeiBoUser(WeiBoUser weiBoUser){
log.info("WeiBoUser对象开始转document对象");
try {
Document document = new Document();
document.put("_id",weiBoUser.getId());
document.put("userId",weiBoUser.getUserId());
......@@ -46,14 +43,14 @@ public class WeiBoUserDao {
document.put("topic",weiBoUser.getTopic());
document.put("time",weiBoUser.getTime());
document.put("followerCount",weiBoUser.getFollowerCount());
log.info("WeiBoUser对象转document对象完成");
try {
mongoCollection.insertOne(document);
log.info("数据插入成功");
} catch (Exception e) {
log.error("存储数据时出错,错误为:{}",e);
}
} catch (Exception e) {
log.error("WeiBoUser对象转Document对象异常",e);
}
}
}
#local service
#
#线上
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@115.236.59.88:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#old
#线上old
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.101:30000,192.168.0.106:30000,192.168.0.108:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#new
#线上new
mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.150:27017,192.168.0.151:27017,192.168.0.152:27017/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
#local
#mongoLocalUri=mongodb://192.168.0.81:27017/istarshine_data
#service
#mongoUri=mongodb://127.0.0.1:27017/
#ÿ
#备用库
#mongoUri=mongodb://202.107.192.94:37017/hot_search_list
#mongoUri=mongodb://192.168.0.66:27017/
#mongoUri=mongodb://searchhotcrawleruser:searchhotcrawler1q2w3e4r@192.168.0.101:30000/hot_search_list?authSource=admin&authMechanism=SCRAM-SHA-1
......@@ -20,3 +20,4 @@ topicCollName=topic_list
collWechatUserName=wechat_user
weiBoMassageCollName=weibo_massage
weiBoUserCollName=weibo_user
#
......@@ -46,7 +46,10 @@ import static java.util.Objects.nonNull;
{"classpath:applicationContext.xml"})
public class WeiboHotSearchTest {
//调用weiBoMassageDao添加数据
static WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
//调用weiBoUserDao添加数据
static WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
private static HttpBoot httpBoot = new HttpBoot.Builder().retryTimes(3).build();
@Test
......@@ -154,8 +157,14 @@ public class WeiboHotSearchTest {
}
}
//调用weiBoMassageDao添加数据
WeiBoMassageDao weiBoMassageDao = new WeiBoMassageDao();
try {
if (Objects.isNull(weiBoMassageDao)){
weiBoMassageDao = new WeiBoMassageDao();
}
if (Objects.isNull(weiBoUserDao)){
weiBoUserDao = new WeiBoUserDao();
}
//解析cards,获取热门微博、人物
for (JSONObject jsonObject : cardsJsons) {
if (nonNull(jsonObject) && !jsonObject.isEmpty()) {
......@@ -172,12 +181,18 @@ public class WeiboHotSearchTest {
if (Objects.nonNull(weiBoMassage)) {
weiBoMassageDao.addWeiBoMassage(weiBoMassage);
}
analysisWeiBoUsers(cardGroup, document.getString("name"));
List<WeiBoUser> weiBoUserList = analysisWeiBoUsers(cardGroup, document.getString("name"));
if (!weiBoUserList.isEmpty()){
for (int i = 0; i < weiBoUserList.size(); i++) {
weiBoUserDao.addWeiBoUser(weiBoUserList.get(i));
}
} else {
log.info("获取数据失败");
}
}
}
}
} catch (Exception e) {
log.error("解析cards失败,未获取热门微博、人物信息",e);
}
break;
}
......@@ -294,10 +309,9 @@ public class WeiboHotSearchTest {
* @param topic
* @return
*/
public static void analysisWeiBoUsers(JSONArray cardGroup, String topic) {
public static List<WeiBoUser> analysisWeiBoUsers(JSONArray cardGroup, String topic) {
List<WeiBoUser> weiBoUserList = new ArrayList<>();
//解析weibo人物信息
//创建weiBoUserDao
WeiBoUserDao weiBoUserDao = new WeiBoUserDao();
Date date = new Date();
for (int i = 0; i < cardGroup.size(); i++) {
if (3==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
......@@ -320,15 +334,10 @@ public class WeiboHotSearchTest {
followerCount = Long.valueOf(split[0])*10000;
}
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
weiBoUserList.add(weiBoUser);
}
}
return weiBoUserList;
} else if (10==Integer.valueOf(cardGroup.getJSONObject(i).getString("card_type"))) {
if (cardGroup.getJSONObject(i).containsKey("user")){
JSONObject user = cardGroup.getJSONObject(i).getJSONObject("user");
......@@ -348,18 +357,14 @@ public class WeiboHotSearchTest {
followerCount = Long.valueOf(followers_count);
}
WeiBoUser weiBoUser = new WeiBoUser(userId, attestationMassage, userName, topic,date,followerCount);
//判断weiBoUser是否为空添加数据
if (weiBoUser!= null) {
//调用weiBoUserDao中的方法添加数据
weiBoUserDao.addWeiBoUser(weiBoUser);
} else {
log.info("未采集到用户信息");
}
weiBoUserList.add( weiBoUser);
}
return weiBoUserList;
}
}
return Collections.emptyList();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment