Commit 2c471a78 by [zhangzhiwei]

修改微博热搜解析规则

parent c071c8fd
......@@ -38,10 +38,10 @@ public class WeiboHotSearchCrawler {
* @return void 返回类型
*/
public static List<WeiboHotSearch> weiboHotSearch(){
String url = "http://s.weibo.com/top/summary?cate=realtimehot";
String url = "https://s.weibo.com/top/summary?cate=realtimehot";
Map<String,String> headerMap = new HashMap<String,String>();
headerMap.put("Referer", "http://s.weibo.com/top/summary?cate=realtimehot");
// headerMap.put("Referer", "https://s.weibo.com/top/summary?cate=realtimehot");
headerMap.put("Host", "s.weibo.com");
headerMap.put("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36");
headerMap.put("Upgrade-Insecure-Requests", "1");
......@@ -54,12 +54,12 @@ public class WeiboHotSearchCrawler {
htmlBody = HttpClientTemplateOK.get(url, null, headerMap);
if(htmlBody!=null && htmlBody.contains("pl_top_realtimehot")){
try {
String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
script = script.replace("(", "").replace(")", "");
JSONObject json = JSONObject.parseObject(script);
String html = json.getString("html");
Document document = Jsoup.parse(html);
Elements elements = document.select("tbody").select("tr");
// String script = htmlBody.split("<script>STK && STK.pageletM && STK.pageletM.view")[5].split("<\\/script>")[0];
// script = script.replace("(", "").replace(")", "");
// JSONObject json = JSONObject.parseObject(script);
// String html = json.getString("html");
Document document = Jsoup.parse(htmlBody);
Elements elements = document.select("div#pl_top_realtimehot").select("tbody").select("tr");
for(Element element : elements){
try {
String id = "http://s.weibo.com"+element.select("p.star_name").select("a").attr("href");
......
......@@ -16,18 +16,18 @@ public class HotSearchRun {
private ScheduledExecutorService scheduExec;
public HotSearchRun() {
this.scheduExec = Executors.newScheduledThreadPool(3);
this.scheduExec = Executors.newScheduledThreadPool(2);
}
public void showTimer() {
scheduExec.scheduleAtFixedRate(new WeiboHotSearchRun(), 0, 1, TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new ZhihuHotSearchRun(), 0, 1 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SendZhihuHotSearchRun(), 0, 60 , TimeUnit.MINUTES);
scheduExec.scheduleAtFixedRate(new SendWeiboHotSearchRun(), 0, 60 , TimeUnit.MINUTES);
}
public static void main(String[] args) {
new HotSearchRun().showTimer();
new CacheListener().startListen();
new SendWeiboHotSearchRun().start();
new SendZhihuHotSearchRun().start();
}
}
......@@ -14,6 +14,7 @@ import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendWeiboHotSearchRun extends Thread{
private WeiboHotSearchDAO weiboHotSearchDAO = new WeiboHotSearchDAO();
......@@ -21,6 +22,8 @@ public class SendWeiboHotSearchRun extends Thread{
@Override
public void run() {
while(true) {
try {
Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("微博推送,当前系统时间为:" + hour);
......@@ -37,6 +40,13 @@ public class SendWeiboHotSearchRun extends Thread{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
}
ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) {
logger.debug("微博热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000);
continue;
}
}
}
/**
......
......@@ -17,12 +17,16 @@ import com.zhiwei.searchhotcrawler.util.Template;
import com.zhiwei.searchhotcrawler.util.WechatCodeUtil;
import com.zhiwei.searchhotcrawler.util.WechatConstant;
import com.zhiwei.zhiweiTools.timeParse.TimeParse;
import com.zhiwei.zhiweiTools.tools.ZhiWeiTools;
public class SendZhihuHotSearchRun extends Thread{
private ZhihuHotSearchDAO zhihuHotSearchDAO = new ZhihuHotSearchDAO();
private static Logger logger = LoggerFactory.getLogger(SendZhihuHotSearchRun.class);
@Override
public void run() {
while(true) {
try {
Calendar calendar = Calendar.getInstance();
int hour = calendar.get(Calendar.HOUR_OF_DAY);
logger.info("知乎推送,当前系统时间为:"+hour);
......@@ -41,6 +45,13 @@ public class SendZhihuHotSearchRun extends Thread{
sendTemplateByUserIds("最近一小时无数据", TimeParse.dateFormartString(new Date(), "yyyy-MM-dd HH:mm:ss"), null);
}
}
ZhiWeiTools.sleep(1*60*60*1000);
} catch (Exception e) {
logger.debug("知乎热搜推送出现问题,问题为:::{}",e.fillInStackTrace());
ZhiWeiTools.sleep(1*60*60*1000);
continue;
}
}
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment