Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
automatic-test
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟涨钱
automatic-test
Commits
94af376c
Commit
94af376c
authored
Aug 24, 2021
by
朽木不可雕也
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加虎牙自动创建任务
parent
cee6683e
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
270 additions
and
4 deletions
+270
-4
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/config/BaseConfig.java
+21
-0
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/createtasks/HuyaCreateTaskJob.java
+115
-3
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/entity/BarrageCollectionTask.java
+63
-0
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/exception/BarrageHomePageException.java
+19
-0
pom.xml
+8
-0
public/pom.xml
+11
-0
public/src/main/java/com/zhiweidata/automatictest/publics/BeanContainer.java
+30
-1
public/src/main/resources/log4j2.xml
+3
-0
No files found.
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/config/BaseConfig.java
0 → 100644
View file @
94af376c
package
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
config
;
/**
* 基本配置
*
* @author aszswaz
* @createTime 2021-08-24 15:47:13
* @ide IntelliJ IDEA
*/
@SuppressWarnings
(
"JavaDoc"
)
public
class
BaseConfig
{
public
static
final
String
MONGO_LIVE_COLLECTION
=
"live_url"
;
/**
* 接口的基础url
*/
public
static
final
String
BASE_URL
=
"http://192.168.0.119:8080/barrage-crawler-server/"
;
/**
* 采集任务接口
*/
public
static
final
String
TASK_URL
=
BASE_URL
+
"tasks/"
;
}
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/createtasks/HuyaCreateTaskJob.java
View file @
94af376c
package
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
createtasks
;
import
com.fasterxml.jackson.core.JsonProcessingException
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.mongodb.client.MongoCollection
;
import
com.zhiweidata.automatictest.barragecrawlerserver.entity.BarrageCollectionTask
;
import
com.zhiweidata.automatictest.barragecrawlerserver.exception.BarrageHomePageException
;
import
java.io.IOException
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Set
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
lombok.NonNull
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.http.HttpResponse
;
import
org.apache.http.HttpStatus
;
import
org.apache.http.StatusLine
;
import
org.apache.http.client.methods.HttpGet
;
import
org.apache.http.client.methods.HttpPost
;
import
org.apache.http.entity.ByteArrayEntity
;
import
org.apache.http.entity.ContentType
;
import
org.apache.http.util.EntityUtils
;
import
org.bson.Document
;
import
org.jetbrains.annotations.NotNull
;
import
org.jsoup.nodes.Element
;
import
org.quartz.Job
;
import
org.quartz.JobExecutionContext
;
import
org.seimicrawler.xpath.JXDocument
;
import
org.seimicrawler.xpath.JXNode
;
import
static
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
config
.
BaseConfig
.
MONGO_LIVE_COLLECTION
;
import
static
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
config
.
BaseConfig
.
TASK_URL
;
import
static
com
.
zhiweidata
.
automatictest
.
publics
.
BeanContainer
.
HTTP_CLIENT
;
import
static
com
.
zhiweidata
.
automatictest
.
publics
.
BeanContainer
.
MONGO_DATABASE
;
import
static
com
.
zhiweidata
.
automatictest
.
publics
.
BeanContainer
.
STANDARD_DATE_FORMAT
;
/**
* 从虎牙直播的首页获得直播间的地址,并发送到弹幕采集服务器进行测试
...
...
@@ -17,17 +44,102 @@ import static com.zhiweidata.automatictest.publics.BeanContainer.HTTP_CLIENT;
* @ide IntelliJ IDEA
*/
@Slf4j
@SuppressWarnings
({
"JavaDoc"
,
"unused"
,
"SpellCheckingInspection"
})
@SuppressWarnings
({
"JavaDoc"
,
"unused"
,
"SpellCheckingInspection"
,
"GrazieInspection"
})
public
class
HuyaCreateTaskJob
implements
Job
{
/**
* 幻灯片中,直播间的url匹配
*/
private
static
final
Pattern
LIVE_URL
=
Pattern
.
compile
(
"var slides= \\[\\{[\\w\\W]+?}];"
);
/**
* 直播间地址集合
*/
private
final
MongoCollection
<
Document
>
liveUrls
=
MONGO_DATABASE
.
getCollection
(
MONGO_LIVE_COLLECTION
);
@Override
public
void
execute
(
JobExecutionContext
jobExecutionContext
)
{
try
{
HttpGet
get
=
new
HttpGet
(
"https://www.huya.com/"
);
HttpResponse
response
=
HTTP_CLIENT
.
execute
(
get
);
String
html
=
EntityUtils
.
toString
(
response
.
getEntity
());
System
.
out
.
println
(
html
);
StatusLine
line
=
response
.
getStatusLine
();
if
(
line
.
getStatusCode
()
!=
HttpStatus
.
SC_OK
)
{
log
.
error
(
"status code: {}, message: {}"
,
line
.
getStatusCode
(),
line
.
getReasonPhrase
());
return
;
}
this
.
createTaskByHtml
(
EntityUtils
.
toString
(
response
.
getEntity
()));
}
catch
(
Exception
e
)
{
log
.
error
(
e
.
getMessage
(),
e
);
}
}
/**
* 从 html 代码中解析到直播间的 url
*/
private
void
createTaskByHtml
(
@NonNull
String
html
)
throws
JsonProcessingException
{
JXDocument
document
=
JXDocument
.
create
(
html
);
final
Set
<
String
>
liveUrls
=
new
HashSet
<>();
List
<
JXNode
>
nodes
=
document
.
selN
(
"//a[@class='remen-item j_game-classify-remen-item']"
);
nodes
.
addAll
(
document
.
selN
(
"//div[@class='box-hd']/ul[@class='more-list']/li/a"
));
nodes
.
addAll
(
document
.
selN
(
"//li[@class='game-live-item']/a[@class='title']"
));
nodes
.
forEach
(
jxNode
->
{
Element
element
=
jxNode
.
asElement
();
liveUrls
.
add
(
element
.
attr
(
"href"
));
});
Matcher
matcher
=
LIVE_URL
.
matcher
(
html
);
if
(
matcher
.
find
())
{
String
javaScript
=
matcher
.
group
();
String
jsonString
=
javaScript
.
replaceFirst
(
"var slides= "
,
""
);
jsonString
=
jsonString
.
substring
(
0
,
jsonString
.
length
()
-
1
);
final
List
<?>
jsonList
=
new
ObjectMapper
().
readValue
(
jsonString
,
List
.
class
);
jsonList
.
forEach
(
element
->
{
Map
<?,
?>
elementMap
=
(
Map
<?,
?>)
element
;
liveUrls
.
add
(
String
.
format
(
"https://www.huya.com/%s"
,
elementMap
.
get
(
"profileRoom"
)));
});
}
if
(
liveUrls
.
isEmpty
())
{
throw
new
BarrageHomePageException
(
"虎牙弹幕获取失败"
);
}
int
size
=
liveUrls
.
size
();
if
(
size
>=
50
)
{
log
.
info
(
"虎牙直播间地址数量:{}"
,
size
);
}
else
{
log
.
warn
(
"虎牙直播间地址数量:{}"
,
size
);
}
liveUrls
.
forEach
(
this
::
createCollectionTask
);
}
/**
* 请求弹幕采集服务器创建采集任务
*/
private
void
createCollectionTask
(
String
liveUrl
)
{
try
{
BarrageCollectionTask
collectionTask
=
new
BarrageCollectionTask
(
liveUrl
,
this
.
getLiveName
(
liveUrl
));
if
(
log
.
isDebugEnabled
())
{
String
startTime
=
STANDARD_DATE_FORMAT
.
format
(
collectionTask
.
getStartTime
()
*
1000L
);
String
endTime
=
STANDARD_DATE_FORMAT
.
format
(
collectionTask
.
getEndTime
()
*
1000L
);
log
.
info
(
"collection start time: {}, collection end time: {}"
,
startTime
,
endTime
);
}
HttpPost
post
=
new
HttpPost
(
TASK_URL
);
ObjectMapper
objectMapper
=
new
ObjectMapper
();
post
.
setEntity
(
new
ByteArrayEntity
(
objectMapper
.
writeValueAsBytes
(
collectionTask
),
ContentType
.
APPLICATION_JSON
));
HttpResponse
response
=
HTTP_CLIENT
.
execute
(
post
);
Map
<?,
?>
jsonMap
=
objectMapper
.
readValue
(
response
.
getEntity
().
getContent
(),
Map
.
class
);
int
code
=
(
Integer
)
jsonMap
.
get
(
"code"
);
Document
document
=
new
Document
(
"liveUrl"
,
liveUrl
);
jsonMap
.
forEach
((
key
,
value
)
->
document
.
put
((
String
)
key
,
value
));
this
.
liveUrls
.
insertOne
(
document
);
}
catch
(
Exception
e
)
{
log
.
error
(
e
.
getMessage
(),
e
);
}
}
@NotNull
private
String
getLiveName
(
String
liveUrl
)
throws
IOException
{
HttpResponse
response
=
HTTP_CLIENT
.
execute
(
new
HttpGet
(
liveUrl
));
String
html
=
EntityUtils
.
toString
(
response
.
getEntity
());
JXDocument
document
=
JXDocument
.
create
(
html
);
JXNode
node
=
document
.
selNOne
(
"//h1[@id='J_roomTitle']"
);
return
node
.
asElement
().
text
();
}
}
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/entity/BarrageCollectionTask.java
0 → 100644
View file @
94af376c
package
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
entity
;
import
com.fasterxml.jackson.annotation.JsonProperty
;
import
java.util.Calendar
;
import
java.util.Random
;
import
lombok.EqualsAndHashCode
;
import
lombok.Getter
;
import
lombok.Setter
;
import
lombok.ToString
;
/**
* @author aszswaz
* @createTime 2021-08-24 15:53:39
* @ide IntelliJ IDEA
*/
@SuppressWarnings
(
"JavaDoc"
)
@Getter
@Setter
@EqualsAndHashCode
@ToString
public
class
BarrageCollectionTask
{
/**
* 任务名称
*/
@JsonProperty
(
value
=
"name"
)
private
String
name
;
/**
* 直播间地址
*/
@JsonProperty
(
value
=
"url"
)
private
String
url
;
/**
* 开始采集时间
*/
@JsonProperty
(
value
=
"startTime"
)
private
Integer
startTime
;
/**
* 采集结束时间
*/
@JsonProperty
(
value
=
"endTime"
)
private
Integer
endTime
;
public
BarrageCollectionTask
(
String
liveUrl
,
String
liveName
)
{
this
.
url
=
liveUrl
;
this
.
name
=
liveName
;
Calendar
calendar
=
Calendar
.
getInstance
();
calendar
.
set
(
Calendar
.
MINUTE
,
0
);
calendar
.
set
(
Calendar
.
SECOND
,
0
);
calendar
.
set
(
Calendar
.
MILLISECOND
,
0
);
calendar
.
add
(
Calendar
.
HOUR_OF_DAY
,
1
);
this
.
startTime
=
(
int
)
(
calendar
.
getTimeInMillis
()
/
1000
);
Random
random
=
new
Random
();
int
minute
;
do
{
minute
=
random
.
nextInt
(
301
);
}
while
(
minute
<=
0
);
calendar
.
add
(
Calendar
.
MINUTE
,
minute
);
this
.
endTime
=
(
int
)
(
calendar
.
getTimeInMillis
()
/
1000
);
}
}
barrage-crawler-server/src/main/java/com/zhiweidata/automatictest/barragecrawlerserver/exception/BarrageHomePageException.java
0 → 100644
View file @
94af376c
package
com
.
zhiweidata
.
automatictest
.
barragecrawlerserver
.
exception
;
/**
* 直播首页解析异常
*
* @author aszswaz
* @createTime 2021-08-24 15:23:45
* @ide IntelliJ IDEA
*/
@SuppressWarnings
({
"JavaDoc"
,
"unused"
})
public
class
BarrageHomePageException
extends
RuntimeException
{
public
BarrageHomePageException
(
String
message
)
{
super
(
message
);
}
public
BarrageHomePageException
(
String
message
,
Throwable
cause
)
{
super
(
message
,
cause
);
}
}
pom.xml
View file @
94af376c
...
...
@@ -71,6 +71,13 @@
<version>
${junit.version}
</version>
<scope>
test
</scope>
</dependency>
<dependency>
<groupId>
org.jetbrains
</groupId>
<artifactId>
annotations
</artifactId>
<version>
RELEASE
</version>
<scope>
compile
</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
public/pom.xml
View file @
94af376c
...
...
@@ -18,6 +18,7 @@
<httpclient.version>
4.5.13
</httpclient.version>
<lo4j.version>
2.14.1
</lo4j.version>
<quartz.version>
2.3.2
</quartz.version>
<mongodb.version>
4.3.0
</mongodb.version>
</properties>
<dependencies>
...
...
@@ -27,18 +28,27 @@
<artifactId>
httpclient
</artifactId>
<version>
${httpclient.version}
</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl -->
<dependency>
<groupId>
org.apache.logging.log4j
</groupId>
<artifactId>
log4j-slf4j-impl
</artifactId>
<version>
${lo4j.version}
</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
<dependency>
<groupId>
org.quartz-scheduler
</groupId>
<artifactId>
quartz
</artifactId>
<version>
${quartz.version}
</version>
</dependency>
<!--mongodb client-->
<dependency>
<groupId>
org.mongodb
</groupId>
<artifactId>
mongodb-driver-sync
</artifactId>
<version>
${mongodb.version}
</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
public/src/main/java/com/zhiweidata/automatictest/publics/BeanContainer.java
View file @
94af376c
package
com
.
zhiweidata
.
automatictest
.
publics
;
import
com.mongodb.client.MongoClient
;
import
com.mongodb.client.MongoClients
;
import
com.mongodb.client.MongoDatabase
;
import
lombok.extern.slf4j.Slf4j
;
import
org.apache.commons.lang3.time.FastDateFormat
;
import
org.apache.http.client.HttpClient
;
import
org.apache.http.client.config.RequestConfig
;
import
org.apache.http.impl.client.HttpClientBuilder
;
import
org.apache.http.impl.client.HttpClients
;
import
org.quartz.Scheduler
;
import
org.quartz.SchedulerException
;
...
...
@@ -25,9 +31,20 @@ public class BeanContainer {
* 全局任务调度器
*/
public
static
final
Scheduler
SCHEDULER
;
/**
* mongodb 客户端
*/
public
static
final
MongoClient
MONGO_CLIENT
;
/**
* 整个项目专用数据库
*/
public
static
final
MongoDatabase
MONGO_DATABASE
;
/**
* 标准日期格式化
*/
public
static
final
FastDateFormat
STANDARD_DATE_FORMAT
=
FastDateFormat
.
getInstance
(
"yyyy-MM-dd HH:mm:ss.SSS"
);
static
{
HTTP_CLIENT
=
HttpClients
.
createMinimal
();
Scheduler
scheduler
=
null
;
try
{
...
...
@@ -37,5 +54,17 @@ public class BeanContainer {
log
.
error
(
e
.
getMessage
(),
e
);
}
SCHEDULER
=
scheduler
;
HttpClientBuilder
clientBuilder
=
HttpClients
.
custom
();
RequestConfig
.
Builder
configBuilder
=
RequestConfig
.
custom
();
int
timeout
=
60
*
1000
;
configBuilder
.
setConnectionRequestTimeout
(
timeout
);
configBuilder
.
setConnectTimeout
(
timeout
);
configBuilder
.
setSocketTimeout
(
timeout
);
clientBuilder
.
setDefaultRequestConfig
(
configBuilder
.
build
());
HTTP_CLIENT
=
clientBuilder
.
build
();
MONGO_CLIENT
=
MongoClients
.
create
(
"mongodb://admin:z199809051593@192.168.0.119"
);
MONGO_DATABASE
=
MONGO_CLIENT
.
getDatabase
(
"automatic_test"
);
}
}
public/src/main/resources/log4j2.xml
View file @
94af376c
...
...
@@ -12,6 +12,9 @@
<!-- sync/async -->
<Loggers>
<logger
name=
"org.mongodb"
level=
"ERROR"
/>
<logger
name=
"com.zhiweidata.automatictest.barragecrawlerserver"
level=
"DEBUG"
/>
<Root
level=
"info"
includeLocation=
"true"
>
<AppenderRef
ref=
"console"
/>
</Root>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment