Commit 2c471a78 by [zhangzhiwei]

修改微博热搜解析规则

parent c071c8fd
...@@ -38,10 +38,10 @@ public class WeiboHotSearchCrawler { ...@@ -38,10 +38,10 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型 * @return void 返回类型
*/ */
public static List<WeiboHotSearch> weiboHotSearch(){ public static List<WeiboHotSearch> weiboHotSearch(){
String url = "http://s.weibo.com/top/summary?cate=realtimehot"; String url = "https://s.weibo.com/top/summary?cate=realtimehot";
Map<String,String> headerMap = new HashMap<String,String>(); Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Referer", "http://s.weibo.com/top/summary?cate=realtimehot"); // headerMap.put("Referer", "https://s.weibo.com/top/summary?cate=realtimehot");
headerMap.put("Host", "s.weibo.com"); headerMap.put("Host", "s.weibo.com");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"); headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36");
headerMap.put("Upgrade-Insecure-Requests", "1"); headerMap.put("Upgrade-Insecure-Requests", "1");
...@@ -54,12 +54,12 @@ public class WeiboHotSearchCrawler { ...@@ -54,12 +54,12 @@ public class WeiboHotSearchCrawler {
htmlBody = HttpClientTemplateOK.get(url, null, headerMap); htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){ if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try { try {
String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0]; // String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
script = script.replace("(", "").replace(")", ""); // script = script.replace("(", "").replace(")", "");
JSONObject json = JSONObject.parseObject(script); // JSONObject json = JSONObject.parseObject(script);
String html = json.getString("html"); // String html = json.getString("html");
Document document = Jsoup.parse(html); Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("tbody").select("tr"); Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){ for(Element element : elements){
try { try {
String id = "http://s.weibo.com"+element.select("p.star_name").select("a").attr("href"); String id = "http://s.weibo.com"+element.select("p.star_name").select("a").attr("href");
...@@ -97,7 +97,7 @@ public class WeiboHotSearchCrawler { ...@@ -97,7 +97,7 @@ public class WeiboHotSearchCrawler {
return list; return list;
} }
/** /**
......
...@@ -16,18 +16,18 @@ public class HotSearchRun { ...@@ -16,18 +16,18 @@ public class HotSearchRun {
private ScheduledExecutorService scheduExec; private ScheduledExecutorService scheduExec;
public HotSearchRun() { public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(3); this.scheduExec = Executors.newScheduledThreadPool(2);
} }
public void showTimer() { public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES); scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SendZhihuHotSearchRun(), 0, 60 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SendWeiboHotSearchRun(), 0, 60 , TimeUnit.MINUTES);
} }
public static void main(String[] args) { public static void main(String[] args) {
new HotSearchRun().showTimer(); new HotSearchRun().showTimer();
new CacheListener().startListen(); new CacheListener().startListen();
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
} }
} }
...@@ -14,6 +14,7 @@ import com.zhiwei.searchhotcrawler.util.Template; ...@@ -14,6 +14,7 @@ import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant; import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread{ public class SendWeiboHotSearchRun extends Thread{
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO(); private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
...@@ -21,20 +22,29 @@ public class SendWeiboHotSearchRun extends Thread{ ...@@ -21,20 +22,29 @@ public class SendWeiboHotSearchRun extends Thread{
@Override @Override
public void run() { public void run() {
Calendar calendar = Calendar.getInstance(); while(true) {
int hour = calendar.get(Calendar.HOUR_OF_DAY); try {
logger.info("微博推送,当前系统时间为:" + hour); Calendar calendar = Calendar.getInstance();
if(hour > 6 && hour < 23){ int hour = calendar.get(Calendar.HOUR_OF_DAY);
List<DBObject> list = weiboHotSearchDAO.getWeiboHotOneHour(); logger.info("微博推送,当前系统时间为:" + hour);
if(list!=null && list.size()>0){ if(hour > 6 && hour < 23){
for(DBObject weibo : list){ List<DBObject> list = weiboHotSearchDAO.getWeiboHotOneHour();
String title = weibo.get("name").toString(); if(list!=null && list.size()>0){
String time = TimeParse.dateFormartString((Date)weibo.get("time"), "yyyy-MM-dd HH:mm:ss"); for(DBObject weibo : list){
String url = weibo.get("url").toString(); String title = weibo.get("name").toString();
sendTemplateByUserIds(title, time, url); String time = TimeParse.dateFormartString((Date)weibo.get("time"), "yyyy-MM-dd HH:mm:ss");
String url = weibo.get("url").toString();
sendTemplateByUserIds(title, time, url);
}
}else{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
} }
}else{ ZhiWeiTools.sleep(1*60*60*1000);
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); } catch (Exception e) {
logger.debug("微博热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000);
continue;
} }
} }
} }
......
...@@ -17,28 +17,39 @@ import com.zhiwei.searchhotcrawler.util.Template; ...@@ -17,28 +17,39 @@ import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil; import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant; import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse; import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{ public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO(); private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class); private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override @Override
public void run() { public void run() {
Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY); while(true) {
logger.info("知乎推送,当前系统时间为:"+hour); try {
if(hour > 6 && hour <23){ Calendar calendar = Calendar.getInstance();
List<DBObject> list = zhihuHotSearchDAO.getZhiHuHotSearch(); int hour = calendar.get(Calendar.HOUR_OF_DAY);
if(list!=null && list.size()>0){ logger.info("知乎推送,当前系统时间为:"+hour);
for(DBObject zhihu : list){ if(hour > 6 && hour <23){
String title = zhihu.get("display_query").toString(); List<DBObject> list = zhihuHotSearchDAO.getZhiHuHotSearch();
String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss"); if(list!=null && list.size()>0){
String url = zhihu.get("_id").toString(); for(DBObject zhihu : list){
if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){ String title = zhihu.get("display_query").toString();
sendTemplateByUserIds(title, time, url); String time = TimeParse.dateFormartString((Date)zhihu.get("time"), "yyyy-MM-dd HH:mm:ss");
String url = zhihu.get("_id").toString();
if(calendar.get(Calendar.HOUR_OF_DAY) > 6 && calendar.get(Calendar.HOUR_OF_DAY) < 23){
sendTemplateByUserIds(title, time, url);
}
}
}else{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
} }
} }
}else{ ZhiWeiTools.sleep(1*60*60*1000);
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null); } catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000);
continue;
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment