Merge pull request #14835 from influxdata/fix/deadman-durations

feat(deadman): add staleTime duration to deadman check
pull/14843/head
Michael Desa 2019-08-28 11:02:59 -04:00 committed by GitHub
commit 09054e0585
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 30 additions and 24 deletions

View File

@ -159,7 +159,6 @@ func TestService_handleGetChecks(t *testing.T) {
"status": "active",
"statusMessageTemplate": "",
"tags": null,
"timeSince": 0,
"type": "deadman",
"labels": [
{
@ -531,7 +530,6 @@ func TestService_handleGetCheck(t *testing.T) {
"status": "active",
"statusMessageTemplate": "",
"tags": null,
"timeSince": 0,
"type": "deadman",
"orgID": "020f755c3c082000",
"name": "hello"
@ -659,7 +657,7 @@ func TestService_handlePostCheck(t *testing.T) {
{Key: "k2", Value: "v2"},
},
},
TimeSince: 13,
TimeSince: mustDuration("13s"),
ReportZero: true,
Level: notification.Warn,
},
@ -702,7 +700,7 @@ func TestService_handlePostCheck(t *testing.T) {
"text": ""
},
"type": "deadman",
"timeSince": 13,
"timeSince": "13s",
"createdAt": "0001-01-01T00:00:00Z",
"updatedAt": "0001-01-01T00:00:00Z",
"id": "020f755c3c082000",
@ -946,7 +944,6 @@ func TestService_handlePatchCheck(t *testing.T) {
"status": "",
"statusMessageTemplate": "",
"tags": null,
"timeSince": 0,
"type": "deadman",
"labels": []
}
@ -1121,7 +1118,6 @@ func TestService_handleUpdateCheck(t *testing.T) {
"status": "active",
"statusMessageTemplate": "",
"tags": null,
"timeSince": 0,
"type": "deadman",
"labels": []
}

View File

@ -9335,8 +9335,11 @@ components:
type: string
enum: [deadman]
timeSince:
description: seconds before deadman triggers
type: integer
description: string duration before deadman triggers
type: string
staleTime:
description: string duration for time that a series is considered stale and should not trigger deadman
type: string
reportZero:
description: if only zero values reported since time, trigger alert
type: boolean

View File

@ -184,7 +184,7 @@ func TestJSON(t *testing.T) {
UpdatedAt: timeGen2.Now(),
},
},
TimeSince: 33,
TimeSince: mustDuration("33s"),
ReportZero: true,
Level: notification.Warn,
},

View File

@ -17,8 +17,8 @@ var _ influxdb.Check = &Deadman{}
// Deadman is the deadman check.
type Deadman struct {
Base
// seconds before deadman triggers
TimeSince uint `json:"timeSince"`
TimeSince *notification.Duration `json:"timeSince,omitempty"`
StaleTime *notification.Duration `json:"staleTime,omitempty"`
// If only zero values reported since time, trigger alert.
// TODO(desa): Is this implemented in Flux?
ReportZero bool `json:"reportZero"`
@ -45,7 +45,7 @@ func (c Deadman) GenerateFlux() (string, error) {
// an error for each error found when the script is parsed.
func (c Deadman) GenerateFluxAST() (*ast.Package, error) {
p := parser.ParseSource(c.Query.Text)
replaceDurationsWithEvery(p, c.Every)
replaceDurationsWithEvery(p, c.StaleTime)
removeStopFromRange(p)
if errs := ast.GetErrors(p); len(errs) != 0 {
@ -86,7 +86,7 @@ func (c Deadman) generateLevelFn() ast.Statement {
}
func (c Deadman) generateFluxASTChecksFunction() ast.Statement {
dur := flux.Duration(int64(c.TimeSince), "s")
dur := (*ast.DurationLiteral)(c.TimeSince)
now := flux.Call(flux.Identifier("now"), flux.Object())
sub := flux.Call(flux.Member("experimental", "subDuration"), flux.Object(flux.Property("from", now), flux.Property("d", dur)))
return flux.ExpressionStatement(flux.Pipe(

View File

@ -35,7 +35,7 @@ func TestDeadman_GenerateFlux(t *testing.T) {
Every: mustDuration("1h"),
StatusMessageTemplate: "whoa! {r.dead}",
Query: influxdb.DashboardQuery{
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> aggregateWindow(every: 1m, fn: mean) |> yield()`,
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> yield()`,
BuilderConfig: influxdb.BuilderConfig{
Tags: []struct {
Key string `json:"key"`
@ -49,7 +49,8 @@ func TestDeadman_GenerateFlux(t *testing.T) {
},
},
},
TimeSince: 60,
TimeSince: mustDuration("60s"),
StaleTime: mustDuration("10m"),
Level: notification.Info,
},
},
@ -59,8 +60,7 @@ import "influxdata/influxdb/monitor"
import "experimental"
data = from(bucket: "foo")
|> range(start: -1h)
|> aggregateWindow(every: 1h, fn: mean)
|> range(start: -10m)
option task = {name: "moo", every: 1h}

View File

@ -67,7 +67,8 @@ var deadman1 = &check.Deadman{
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 21,
TimeSince: mustDuration("21s"),
StaleTime: mustDuration("1h"),
ReportZero: true,
Level: notification.Critical,
}
@ -280,7 +281,8 @@ func CreateCheck(
{Key: "k2", Value: "v2"},
},
},
TimeSince: 21,
TimeSince: mustDuration("21s"),
StaleTime: mustDuration("1h"),
ReportZero: true,
Level: notification.Critical,
},
@ -334,7 +336,8 @@ func CreateCheck(
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 21,
TimeSince: mustDuration("21s"),
StaleTime: mustDuration("1h"),
ReportZero: true,
Level: notification.Critical,
},
@ -1284,7 +1287,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 12,
TimeSince: mustDuration("12s"),
StaleTime: mustDuration("1h"),
ReportZero: false,
Level: notification.Warn,
},
@ -1325,7 +1329,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 12,
TimeSince: mustDuration("12s"),
StaleTime: mustDuration("1h"),
ReportZero: false,
Level: notification.Warn,
},
@ -1393,7 +1398,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 12,
TimeSince: mustDuration("12s"),
StaleTime: mustDuration("1h"),
ReportZero: false,
Level: notification.Warn,
},
@ -1501,7 +1507,8 @@ func PatchCheck(
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
},
},
TimeSince: 21,
TimeSince: mustDuration("21s"),
StaleTime: mustDuration("1h"),
ReportZero: true,
Level: notification.Critical,
},