Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
datamanage
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
shenjinzhu
datamanage
Commits
d3eb81c6
Commit
d3eb81c6
authored
Jul 25, 2018
by
zhiwei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
处理模版误报问题
parent
d1f1a506
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
117 deletions
+30
-117
src/main/java/com/zhiwei/manage/handle/MainThread.java
+13
-36
src/main/java/com/zhiwei/manage/handle/Task.java
+0
-64
src/main/java/com/zhiwei/manage/service/CrawTemplateServiceImpl.java
+5
-0
src/main/java/com/zhiwei/manage/util/ESUtil.java
+12
-17
No files found.
src/main/java/com/zhiwei/manage/handle/MainThread.java
View file @
d3eb81c6
...
...
@@ -21,19 +21,16 @@ import com.zhiwei.manage.bean.Channel;
import
com.zhiwei.manage.bean.Data
;
import
com.zhiwei.manage.bean.FieldIntegerity
;
import
com.zhiwei.manage.bean.Message
;
import
com.zhiwei.manage.bean.Messages
;
import
com.zhiwei.manage.bean.NewsDelayed
;
import
com.zhiwei.manage.bean.PingUrl
;
import
com.zhiwei.manage.bean.ServerBean
;
import
com.zhiwei.manage.bean.Template
;
import
com.zhiwei.manage.orm.BeanFactory
;
import
com.zhiwei.manage.service.CrawTemplateServiceImpl
;
import
com.zhiwei.manage.service.DataServiceImpl
;
import
com.zhiwei.manage.service.MessageServiceImpl
;
import
com.zhiwei.manage.service.ServerServiceImpl
;
import
com.zhiwei.manage.
util.Config
;
import
com.zhiwei.manage.
service.TemplateServiceImpl
;
import
com.zhiwei.manage.util.ESUtil
;
import
com.zhiwei.manage.util.SendMailUtil
;
import
com.zhiwei.manage.util.TimeUtil
;
/**
...
...
@@ -56,7 +53,7 @@ public class MainThread extends Thread {
counts
.
put
(
"微博"
,
2000
);
counts
.
put
(
"微信"
,
3500
);
counts
.
put
(
"知乎"
,
300
);
//
counts.put("平媒", 300);
counts
.
put
(
"平媒"
,
300
);
counts
.
put
(
"今日头条"
,
1000
);
counts
.
put
(
"贴吧"
,
250
);
counts
.
put
(
"论坛"
,
30
);
...
...
@@ -174,8 +171,14 @@ public class MainThread extends Thread {
DataServiceImpl
dataService
=
BeanFactory
.
getBean
(
DataServiceImpl
.
class
);
ServerServiceImpl
serverService
=
BeanFactory
.
getBean
(
ServerServiceImpl
.
class
);
MessageServiceImpl
messageService
=
BeanFactory
.
getBean
(
MessageServiceImpl
.
class
);
TemplateServiceImpl
templateService
=
BeanFactory
.
getBean
(
TemplateServiceImpl
.
class
);
SimpleDateFormat
sdf
=
new
SimpleDateFormat
(
"yyyy-MM-dd HH"
);
List
<
Channel
>
channel
=
serverService
.
findDisChannel
();
/**初始化模版列表***/
if
(
allTmp
==
null
)
{
templateService
.
findAll
();
}
Calendar
loop
=
Calendar
.
getInstance
();
while
(
true
)
{
Calendar
start
=
Calendar
.
getInstance
();
...
...
@@ -190,38 +193,10 @@ public class MainThread extends Thread {
start
.
set
(
Calendar
.
HOUR_OF_DAY
,
0
);
start
.
setTime
(
sdf
.
parse
(
sdf
.
format
(
start
.
getTime
())));
end
.
setTime
(
sdf
.
parse
(
sdf
.
format
(
end
.
getTime
())));
int
count
=
0
;
channel
=
serverService
.
findDisChannel
();
for
(
Channel
chan
:
channel
)
{
map
.
put
(
chan
.
getPt
(),
chan
.
getValue
());
try
{
ServerBean
ser
=
serverService
.
findServer
(
chan
.
getPt
());
count
=
es
.
getCountByPt
(
start
.
getTime
(),
end
.
getTime
(),
chan
.
getPt
(),
chan
.
getValue
(),
ser
);
// 当日的数据量
System
.
out
.
println
(
count
);
Data
data
=
new
Data
();
data
.
setCount
(
count
);
data
.
setPt
(
chan
.
getPt
());
data
.
setTime
(
start
.
getTime
());
dataService
.
insert
(
data
);
log
.
info
(
sdf
.
format
(
start
.
getTime
())
+
"|"
+
chan
.
getPt
()
+
"|入库"
);
count
=
es
.
getCountByPt
(
inTime
.
getTime
(),
end
.
getTime
(),
chan
.
getPt
(),
chan
.
getValue
(),
ser
);
// 小时的数据量
if
(
inTime
.
get
(
Calendar
.
HOUR_OF_DAY
)
>
6
)
if
(
counts
.
get
(
chan
.
getPt
())
!=
null
)
if
(
count
<
counts
.
get
(
chan
.
getPt
()))
{
Messages
m
=
new
Messages
();
m
.
setTitle
(
"渠道低数据量预警{"
+
chan
.
getPt
()
+
"}"
);
m
.
setContent
(
"1小时内的数据量为:"
+
count
);
m
.
setDate
(
sdf
.
format
(
start
.
getTime
()));
for
(
String
mail
:
Config
.
getVal
(
"count_mail"
).
split
(
","
))
{
SendMailUtil
.
sendMessage
(
mail
,
m
);
}
}
}
catch
(
Exception
e
)
{
log
.
error
(
chan
.
getPt
()
+
"入库出错,错误信息{}"
,
e
);
}
}
Thread
.
sleep
(
1000
*
120
);
Thread
.
sleep
(
1000
*
10
);
Set
<
String
>
keys
=
allTmp
.
keySet
();
System
.
out
.
println
(
"keys size is :::"
+
keys
.
size
());
List
<
String
>
needRemove
=
new
ArrayList
<>();
for
(
Iterator
<
String
>
it
=
keys
.
iterator
();
it
.
hasNext
();)
{
String
key
=
it
.
next
();
...
...
@@ -231,6 +206,7 @@ public class MainThread extends Thread {
String
pt
=
crawService
.
findPt
(
tmp
.
getSpyderInfoId
());
cwCount
=
es
.
getCounts
(
start
.
getTime
(),
end
.
getTime
(),
pt
,
map
.
get
(
pt
),
tmp
.
getSpyderInfoId
(),
serverService
.
findServer
(
pt
));
log
.
info
(
tmp
.
getTempName
()+
"====="
+
start
.
getTime
()+
"======"
+
end
.
getTime
()+
"======"
+
pt
+
"====="
+
cwCount
);
try
{
Data
data
=
new
Data
();
data
.
setCount
(
cwCount
);
...
...
@@ -243,6 +219,7 @@ public class MainThread extends Thread {
loop
.
add
(
Calendar
.
DATE
,
-
2
);
cwCount
=
es
.
getCounts
(
start
.
getTime
(),
end
.
getTime
(),
pt
,
map
.
get
(
pt
),
tmp
.
getSpyderInfoId
(),
serverService
.
findServer
(
pt
));
log
.
info
(
tmp
.
getTempName
()+
"====="
+
start
.
getTime
()+
"======"
+
end
.
getTime
()+
"======"
+
pt
+
"====="
+
cwCount
);
if
(
cwCount
<
3
)
{
if
(
StringUtils
.
isBlank
(
tmp
.
getTempName
()))
{
continue
;
...
...
src/main/java/com/zhiwei/manage/handle/Task.java
View file @
d3eb81c6
...
...
@@ -133,70 +133,6 @@ public class Task implements Runnable {
}
}
}
// if (field != null) {//字段缺失率,采集部分已经对缺失字段做了处理,没什么必要检测了
// if (MainThread.mainMap.get(field.getTemplateId()) != null) {
// List<Map<String, Object>> list = newsService.findNews(field.getHost(),
// field.getPort(),
// field.getdBName(), field.getCollection(), field.getTestCount(),
// field.getSpyderInfoId(),
// field.getPt());
// if (list.size() ==0) {
// Template tp = MainThread.mainMap.get(field.getTemplateId());
// Message msg = new Message();
// msg.setCreateDate(new Date());
// msg.setErrorMsg("字段检测发现库内的数据为0");
// msg.setHandle(false);
// msg.setPt(field.getPt());
// msg.setTemplateCreator(tp.getCreator());
// msg.setTemplateLv(tp.getTemplateLv());
// msg.setErrorType("count2");
// msg.setTempName(field.getTempName());
// messageService.update(msg,field.getPt());
// } else {
// Message msg = messageService.findMsg(field.getTempName(), "count2");
// if (msg != null) {
// List<String> ids = new ArrayList<>();
// ids.add(msg.getMessageId());
// messageService.delete(ids, field.getPt());
// }
// }
// String[] fields = field.getField().split(",");
// int count = list.size();
// for (String fid : fields) {
// String str = "";
// int num = count;
// for (Map<String, Object> map : list) {
// if (map.get(fid) == null) {
// num--;
// str = String.valueOf(map.get("_id"));
// } else if (String.valueOf(map.get(fid)).length() < 1) {
// num--;
// str = String.valueOf(map.get("_id"));
// }
// }
// if (num / 1.0 / count < field.getFieldIntegrityRate() / 100.0) {
// Template tp = MainThread.mainMap.get(field.getTemplateId());
// Message msg = new Message();
// msg.setCreateDate(new Date());
// msg.setErrorMsg(fid + "字段存在缺失,完整率" + num / 1.0 / count + "其中一条:" + str);
// msg.setHandle(false);
// msg.setPt(field.getPt());
// msg.setErrorType("field");
// msg.setTemplateCreator(tp.getCreator());
// msg.setTemplateLv(tp.getTemplateLv());
// msg.setTempName(field.getTempName());
// messageService.update(msg,field.getPt());
// } else {
// Message msg = messageService.findMsg(field.getTempName(), "field");
// if (msg != null) {
// List<String> ids = new ArrayList<>();
// ids.add(msg.getMessageId());
// messageService.delete(ids, field.getPt());
// }
// }
// }
// }
// }
if
(
delayed
!=
null
)
{
log
.
info
(
delayed
.
getTempName
()
+
"开始检测"
);
if
(
MainThread
.
mainMap
.
get
(
delayed
.
getTemplateId
())
!=
null
)
{
...
...
src/main/java/com/zhiwei/manage/service/CrawTemplateServiceImpl.java
View file @
d3eb81c6
...
...
@@ -28,7 +28,12 @@ public class CrawTemplateServiceImpl implements CrawTemplateService {
@Override
public
String
findPt
(
String
spyId
)
{
CrawTemplate
ct
=
crawDao
.
getPt
(
spyId
);
if
(
ct
!=
null
)
{
return
getPtByCt
(
ct
);
}
else
{
return
null
;
}
}
...
...
src/main/java/com/zhiwei/manage/util/ESUtil.java
View file @
d3eb81c6
...
...
@@ -5,33 +5,23 @@ import java.text.SimpleDateFormat;
import
java.util.ArrayList
;
import
java.util.Calendar
;
import
java.util.Date
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
org.elasticsearch.action.search.SearchRequestBuilder
;
import
org.elasticsearch.action.search.SearchResponse
;
import
org.elasticsearch.client.Client
;
import
org.elasticsearch.common.recycler.Recycler.C
;
import
org.elasticsearch.index.query.BoolQueryBuilder
;
import
org.elasticsearch.index.query.QueryBuilder
;
import
org.elasticsearch.index.query.QueryBuilders
;
import
org.elasticsearch.search.SearchHit
;
import
org.elasticsearch.search.SearchHits
;
import
org.elasticsearch.search.sort.SortBuilder
;
import
org.elasticsearch.search.sort.SortOrder
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.data.domain.Sort
;
import
org.springframework.stereotype.Component
;
import
com.zhiwei.manage.bean.Data
;
import
com.zhiwei.manage.bean.EasyNews
;
import
com.zhiwei.manage.bean.ServerBean
;
import
com.zhiwei.manage.handle.DbDepot
;
import
com.zhiwei.manage.handle.MainThread
;
import
com.zhiwei.manage.orm.BeanFactory
;
import
com.zhiwei.manage.service.CrawTemplateService
;
/**
...
...
@@ -98,7 +88,7 @@ public class ESUtil {
esClient
.
close
();
return
(
int
)
searchHits
.
getTotalHits
();
}
catch
(
Exception
e
)
{
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
);
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
.
fillInStackTrace
()
);
}
return
0
;
}
...
...
@@ -169,7 +159,7 @@ public class ESUtil {
list
.
add
(
map
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
);
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
.
fillInStackTrace
()
);
}
return
list
;
}
...
...
@@ -240,7 +230,7 @@ public class ESUtil {
loopTime
.
add
(
Calendar
.
HOUR_OF_DAY
,
1
);
}
}
catch
(
Exception
e
)
{
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
);
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
.
fillInStackTrace
()
);
}
return
list
;
}
...
...
@@ -248,10 +238,8 @@ public class ESUtil {
public
int
getCounts
(
Date
start
,
Date
end
,
String
pt
,
String
value
,
String
spid
,
ServerBean
ser
)
{
Calendar
startTime
=
Calendar
.
getInstance
();
startTime
.
setTime
(
start
);
// startTime.add(Calendar.HOUR, -8);
Calendar
endTime
=
Calendar
.
getInstance
();
endTime
.
setTime
(
end
);
// endTime.add(Calendar.HOUR, -8);
Client
esClient
=
null
;
try
{
esClient
=
getClient
(
pt
,
ser
);
...
...
@@ -265,9 +253,13 @@ public class ESUtil {
srb
.
setSize
(
1
);
SearchResponse
response
=
srb
.
execute
().
actionGet
();
SearchHits
searchHits
=
response
.
getHits
();
if
(
searchHits
!=
null
)
{
return
(
int
)
searchHits
.
getTotalHits
();
}
return
0
;
}
catch
(
Exception
e
)
{
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
);
e
.
fillInStackTrace
();
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
.
fillInStackTrace
());
}
return
0
;
}
...
...
@@ -299,9 +291,12 @@ public class ESUtil {
srb
.
setSize
(
1
);
SearchResponse
response
=
srb
.
execute
().
actionGet
();
SearchHits
searchHits
=
response
.
getHits
();
if
(
searchHits
!=
null
)
{
return
(
int
)
searchHits
.
getTotalHits
();
}
return
0
;
}
catch
(
Exception
e
)
{
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
);
log
.
error
(
"es检索出错,错误信息【{}】"
,
e
.
fillInStackTrace
()
);
}
return
0
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment