Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
searchhotcrawler
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhiwei
searchhotcrawler
Commits
04c6bdac
Commit
04c6bdac
authored
Dec 13, 2021
by
chenweitao
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'working' into 'master'
修复日志漏洞 See merge request
!161
parents
2290d4e2
c51af150
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
43 additions
and
55 deletions
+43
-55
src/main/java/com/zhiwei/searchhotcrawler/cache/CacheListener.java
+1
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
+2
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
+2
-3
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
+1
-2
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
+1
-1
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
+3
-6
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
+3
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
+3
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
+3
-4
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
+0
-0
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
+4
-4
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
+16
-15
No files found.
src/main/java/com/zhiwei/searchhotcrawler/cache/CacheListener.java
View file @
04c6bdac
package
com
.
zhiwei
.
searchhotcrawler
.
cache
;
import
lombok.extern.log4j.Log4j2
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.tools.tools.ZhiWeiTools
;
...
...
@@ -26,7 +24,7 @@ public class CacheListener {
}
ZhiWeiTools
.
sleep
(
500
);
}
}
}
}.
start
();
}
}
src/main/java/com/zhiwei/searchhotcrawler/crawler/BaiDuHotSearchCrawler.java
View file @
04c6bdac
...
...
@@ -129,7 +129,7 @@ public class BaiDuHotSearchCrawler {
String
everurl
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
attr
(
"href"
);
// 获取关键词(String)
String
kw
=
element
.
select
(
"td.keyword"
).
select
(
"a.list-title"
).
text
();
// log
ger
.info("关键词:{}", kw);
// log.info("关键词:{}", kw);
//从连接中获取正确编码关键词
try
{
if
(!
everurl
.
isEmpty
())
{
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/DouyinHotSearchCrawler.java
View file @
04c6bdac
...
...
@@ -9,8 +9,7 @@ import lombok.extern.log4j.Log4j2;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
...
...
@@ -71,7 +70,7 @@ public class DouyinHotSearchCrawler {
hotValueStr
=
wl
.
getString
(
"hot_value"
);
Long
hotValue
=
null
;
hotValue
=
Long
.
valueOf
(
hotValueStr
);
// log
ger
.info("热度为:::{}", hot_value);
// log.info("热度为:::{}", hot_value);
HotSearchList
douyin
=
new
HotSearchList
(
null
,
word
,
hotValue
,
position
,
HotSearchType
.
抖音热搜
.
name
(),
date
);
list
.
add
(
douyin
);
}
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/HotSearch36KrCrawler.java
View file @
04c6bdac
...
...
@@ -99,7 +99,7 @@ public class HotSearch36KrCrawler {
rank
++;
// 获取关键词(String)
String
keyWord
=
element
.
select
(
"p.title-wrapper"
).
select
(
"a.article-item-title"
).
text
();
// log
ger
.info("关键词:{}", kw);
// log.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"p.title-wrapper"
).
select
(
"a.article-item-title"
).
attr
(
"href"
);
// 获取搜索指数count(int)
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/SougoHotSearchCrawler.java
View file @
04c6bdac
...
...
@@ -13,8 +13,7 @@ import org.jsoup.Jsoup;
import
org.jsoup.nodes.Document
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.zhiwei.crawler.core.HttpBoot
;
import
com.zhiwei.crawler.core.proxy.ProxyHolder
;
...
...
@@ -70,7 +69,7 @@ public class SougoHotSearchCrawler {
// 获取关键词(String)
String
kw
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"title"
);
// log
ger
.info("关键词:{}", kw);
// log.info("关键词:{}", kw);
String
everurl
=
element
.
select
(
"li"
).
select
(
"a"
).
attr
(
"href"
);
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboSuperTopicCrawler.java
View file @
04c6bdac
...
...
@@ -13,8 +13,7 @@ import lombok.extern.log4j.Log4j2;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/WeiboTopicCrawler.java
View file @
04c6bdac
...
...
@@ -18,8 +18,7 @@ import org.apache.commons.lang3.StringUtils;
import
org.jsoup.Jsoup
;
import
org.jsoup.nodes.Element
;
import
org.jsoup.select.Elements
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.util.*
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/crawler/ZhihuHotSearchCrawler.java
View file @
04c6bdac
...
...
@@ -9,8 +9,7 @@ import java.util.Map;
import
lombok.extern.log4j.Log4j2
;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
...
...
src/main/java/com/zhiwei/searchhotcrawler/test/HotSearch36KrCrawlerTest.java
View file @
04c6bdac
...
...
@@ -99,7 +99,7 @@ public class HotSearch36KrCrawlerTest {
rank
++;
// 获取关键词(String)
String
keyWord
=
element
.
select
(
"p.title-wrapper"
).
select
(
"a.article-item-title"
).
text
();
// log
ger
.info("关键词:{}", kw);
// log.info("关键词:{}", kw);
// 获取关键词相关链接everurl(String)
String
everurl
=
element
.
select
(
"p.title-wrapper"
).
select
(
"a.article-item-title"
).
attr
(
"href"
);
// 获取搜索指数count(int)
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/BaiduHotSearchRun.java
View file @
04c6bdac
...
...
@@ -12,8 +12,6 @@ import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
...
...
@@ -39,8 +37,8 @@ public class BaiduHotSearchRun extends Thread{
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
log
.
info
(
"百度风云榜采集开始........"
);
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
...
...
@@ -70,4 +68,4 @@ public class BaiduHotSearchRun extends Thread{
// log.info("知乎话题采集结束........");
}
}
\ No newline at end of file
}
src/main/java/com/zhiwei/searchhotcrawler/timer/DouyinHotSearchRun.java
View file @
04c6bdac
...
...
@@ -12,8 +12,7 @@ import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
...
...
@@ -26,7 +25,7 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
public
class
DouyinHotSearchRun
extends
Thread
{
public
static
List
<
HotSearchList
>
list
=
new
ArrayList
<>();
@Override
public
void
run
()
{
boolean
f
=
true
;
...
...
@@ -41,7 +40,7 @@ public class DouyinHotSearchRun extends Thread{
ZhiWeiTools
.
sleep
(
50
);
}
}
/**
* 获取热搜列表
* TODO
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/SougoHotSearchRun.java
View file @
04c6bdac
...
...
@@ -10,8 +10,7 @@ import com.zhiwei.searchhotcrawler.dao.HotSearchListDAO;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
...
...
@@ -36,8 +35,8 @@ public class SougoHotSearchRun extends Thread {
ZhiWeiTools
.
sleep
(
50
);
}
}
private
void
getHotList
()
{
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/ZhihuHotSearchRun.java
View file @
04c6bdac
...
...
@@ -9,8 +9,7 @@ import com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO;
import
com.zhiwei.searchhotcrawler.util.TipsUtils
;
import
lombok.extern.log4j.Log4j2
;
import
org.bson.Document
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.mongodb.BasicDBObject
;
import
com.mongodb.DBObject
;
...
...
@@ -21,7 +20,7 @@ import com.zhiwei.tools.tools.ZhiWeiTools;
@Log4j2
public
class
ZhihuHotSearchRun
extends
Thread
{
@Override
public
void
run
()
{
boolean
f
=
true
;
...
...
@@ -37,7 +36,7 @@ public class ZhihuHotSearchRun extends Thread{
}
}
private
void
getHotList
()
{
// HotSearchListDAO hotSearchDAO = new HotSearchListDAO();
// HotSearchCacheDAO hotSearchCacheDAO = new HotSearchCacheDAO();
...
...
src/main/java/com/zhiwei/searchhotcrawler/timer/quartz/GatherTimer.java
View file @
04c6bdac
This diff is collapsed.
Click to expand it.
src/main/java/com/zhiwei/searchhotcrawler/util/TipsUtils.java
View file @
04c6bdac
...
...
@@ -4,9 +4,9 @@ import com.zhiwei.searchhotcrawler.bean.HotSearchList;
import
com.zhiwei.searchhotcrawler.dao.HotSearchCacheDAO
;
import
com.zhiwei.searchhotcrawler.dao.HotSearchListDAO
;
import
org.bson.Document
;
import
org.
checkerframework.checker.units.qual.A
;
import
org.
slf
4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.
apache.logging.log4j.LogManager
;
import
org.
apache.logging.log
4j.Logger
;
import
java.util.*
;
import
java.util.concurrent.ExecutorService
;
...
...
@@ -21,7 +21,7 @@ public class TipsUtils {
private
static
String
key
=
"a8e26ce3-8aaa-4d3e-bcf6-30b81526050b"
;
private
Logger
logger
=
LoggerFactory
.
getLogger
(
TipsUtils
.
class
);
private
static
final
Logger
logger
=
LogManager
.
getLogger
(
TipsUtils
.
class
);
private
static
Map
<
String
,
Date
>
typeTips
=
new
HashMap
<>();
...
...
src/main/java/com/zhiwei/searchhotcrawler/util/WechatCodeUtil.java
View file @
04c6bdac
...
...
@@ -8,8 +8,9 @@ import java.util.Map;
import
okhttp3.Request
;
import
okhttp3.Response
;
import
org.apache.commons.lang3.StringUtils
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.apache.logging.log4j.LogManager
;
import
org.apache.logging.log4j.Logger
;
import
com.alibaba.fastjson.JSONArray
;
import
com.alibaba.fastjson.JSONObject
;
...
...
@@ -21,7 +22,7 @@ import okhttp3.MediaType;
import
okhttp3.RequestBody
;
public
class
WechatCodeUtil
{
private
static
Logger
log
ger
=
LoggerFactory
.
getLogger
(
WechatCodeUtil
.
class
);
private
static
Logger
log
=
LogManager
.
getLogger
(
WechatCodeUtil
.
class
);
private
static
HttpBoot
httpBoot
=
new
HttpBoot
.
Builder
().
retryTimes
(
3
).
build
();
/**
* @Title: getToken
...
...
@@ -43,7 +44,7 @@ public class WechatCodeUtil {
result
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
log
ger
.
error
(
"获取微信公众号推送token失败,问题为:::{}"
,
e
.
fillInStackTrace
());
log
.
error
(
"获取微信公众号推送token失败,问题为:::{}"
,
e
.
fillInStackTrace
());
return
null
;
}
if
(
result
!=
null
)
{
...
...
@@ -75,7 +76,7 @@ public class WechatCodeUtil {
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
Exception
e
)
{
log
ger
.
error
(
"消息推送失败,错误为::{}"
,
e
.
fillInStackTrace
());
log
.
error
(
"消息推送失败,错误为::{}"
,
e
.
fillInStackTrace
());
msgid
=
0
;
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
))
{
...
...
@@ -85,7 +86,7 @@ public class WechatCodeUtil {
msgid
=
jsonObject
.
getIntValue
(
"msgid"
);
}
else
{
msgid
=
0
;
log
ger
.
info
(
"消息推送失败,错误为::{}"
,
jsonObject
.
toString
());
log
.
info
(
"消息推送失败,错误为::{}"
,
jsonObject
.
toString
());
}
}
}
...
...
@@ -117,7 +118,7 @@ public class WechatCodeUtil {
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
){
log
ger
.
error
(
"页面连接获取失败"
,
e
);
log
.
error
(
"页面连接获取失败"
,
e
);
return
null
;
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
...
...
@@ -126,12 +127,12 @@ public class WechatCodeUtil {
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
}
else
{
log
ger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
log
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
}
else
{
log
ger
.
info
(
"token 获取失败"
);
log
.
info
(
"token 获取失败"
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
...
...
@@ -156,7 +157,7 @@ public class WechatCodeUtil {
try
(
Response
response
=
httpBoot
.
syncCall
(
request
)){
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
){
log
ger
.
error
(
"页面链接获取失败"
,
e
);
log
.
error
(
"页面链接获取失败"
,
e
);
return
null
;
}
if
(
StringUtils
.
isNotBlank
(
htmlBody
)
&&
htmlBody
.
contains
(
"data"
))
{
...
...
@@ -165,12 +166,12 @@ public class WechatCodeUtil {
if
(
jsonObject
.
containsKey
(
"data"
))
{
return
(
List
<
String
>)
jsonObject
.
getJSONObject
(
"data"
).
getObject
(
"openid"
,
List
.
class
);
}
else
{
log
ger
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
log
.
info
(
"拉取用户列表时,出现问题{}"
,
jsonObject
);
}
}
}
}
else
{
log
ger
.
info
(
"token 获取失败"
);
log
.
info
(
"token 获取失败"
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
...
...
@@ -199,7 +200,7 @@ public class WechatCodeUtil {
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
ger
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
log
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
return
null
;
}
if
(
htmlBody
!=
null
)
{
...
...
@@ -232,7 +233,7 @@ public class WechatCodeUtil {
try
(
Response
response
=
httpBoot
.
syncCall
(
request
))
{
htmlBody
=
response
.
body
().
string
();
}
catch
(
IOException
e
)
{
log
ger
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
log
.
error
(
"获取分组id时出现错误"
,
e
.
fillInStackTrace
());
return
null
;
}
if
(
htmlBody
!=
null
)
{
...
...
@@ -245,7 +246,7 @@ public class WechatCodeUtil {
resultMap
.
put
(
name
,
id
);
}
}
else
{
log
ger
.
info
(
"获取分组id时出现错误,数据为:::{}"
,
htmlBody
);
log
.
info
(
"获取分组id时出现错误,数据为:::{}"
,
htmlBody
);
}
}
return
resultMap
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment