Merge pull request #14835 from influxdata/fix/deadman-durations
feat(deadman): add staleTime duration to deadman checkpull/14843/head
commit
09054e0585
|
@ -159,7 +159,6 @@ func TestService_handleGetChecks(t *testing.T) {
|
|||
"status": "active",
|
||||
"statusMessageTemplate": "",
|
||||
"tags": null,
|
||||
"timeSince": 0,
|
||||
"type": "deadman",
|
||||
"labels": [
|
||||
{
|
||||
|
@ -531,7 +530,6 @@ func TestService_handleGetCheck(t *testing.T) {
|
|||
"status": "active",
|
||||
"statusMessageTemplate": "",
|
||||
"tags": null,
|
||||
"timeSince": 0,
|
||||
"type": "deadman",
|
||||
"orgID": "020f755c3c082000",
|
||||
"name": "hello"
|
||||
|
@ -659,7 +657,7 @@ func TestService_handlePostCheck(t *testing.T) {
|
|||
{Key: "k2", Value: "v2"},
|
||||
},
|
||||
},
|
||||
TimeSince: 13,
|
||||
TimeSince: mustDuration("13s"),
|
||||
ReportZero: true,
|
||||
Level: notification.Warn,
|
||||
},
|
||||
|
@ -702,7 +700,7 @@ func TestService_handlePostCheck(t *testing.T) {
|
|||
"text": ""
|
||||
},
|
||||
"type": "deadman",
|
||||
"timeSince": 13,
|
||||
"timeSince": "13s",
|
||||
"createdAt": "0001-01-01T00:00:00Z",
|
||||
"updatedAt": "0001-01-01T00:00:00Z",
|
||||
"id": "020f755c3c082000",
|
||||
|
@ -946,7 +944,6 @@ func TestService_handlePatchCheck(t *testing.T) {
|
|||
"status": "",
|
||||
"statusMessageTemplate": "",
|
||||
"tags": null,
|
||||
"timeSince": 0,
|
||||
"type": "deadman",
|
||||
"labels": []
|
||||
}
|
||||
|
@ -1121,7 +1118,6 @@ func TestService_handleUpdateCheck(t *testing.T) {
|
|||
"status": "active",
|
||||
"statusMessageTemplate": "",
|
||||
"tags": null,
|
||||
"timeSince": 0,
|
||||
"type": "deadman",
|
||||
"labels": []
|
||||
}
|
||||
|
|
|
@ -9335,8 +9335,11 @@ components:
|
|||
type: string
|
||||
enum: [deadman]
|
||||
timeSince:
|
||||
description: seconds before deadman triggers
|
||||
type: integer
|
||||
description: string duration before deadman triggers
|
||||
type: string
|
||||
staleTime:
|
||||
description: string duration for time that a series is considered stale and should not trigger deadman
|
||||
type: string
|
||||
reportZero:
|
||||
description: if only zero values reported since time, trigger alert
|
||||
type: boolean
|
||||
|
|
|
@ -184,7 +184,7 @@ func TestJSON(t *testing.T) {
|
|||
UpdatedAt: timeGen2.Now(),
|
||||
},
|
||||
},
|
||||
TimeSince: 33,
|
||||
TimeSince: mustDuration("33s"),
|
||||
ReportZero: true,
|
||||
Level: notification.Warn,
|
||||
},
|
||||
|
|
|
@ -17,8 +17,8 @@ var _ influxdb.Check = &Deadman{}
|
|||
// Deadman is the deadman check.
|
||||
type Deadman struct {
|
||||
Base
|
||||
// seconds before deadman triggers
|
||||
TimeSince uint `json:"timeSince"`
|
||||
TimeSince *notification.Duration `json:"timeSince,omitempty"`
|
||||
StaleTime *notification.Duration `json:"staleTime,omitempty"`
|
||||
// If only zero values reported since time, trigger alert.
|
||||
// TODO(desa): Is this implemented in Flux?
|
||||
ReportZero bool `json:"reportZero"`
|
||||
|
@ -45,7 +45,7 @@ func (c Deadman) GenerateFlux() (string, error) {
|
|||
// an error for each error found when the script is parsed.
|
||||
func (c Deadman) GenerateFluxAST() (*ast.Package, error) {
|
||||
p := parser.ParseSource(c.Query.Text)
|
||||
replaceDurationsWithEvery(p, c.Every)
|
||||
replaceDurationsWithEvery(p, c.StaleTime)
|
||||
removeStopFromRange(p)
|
||||
|
||||
if errs := ast.GetErrors(p); len(errs) != 0 {
|
||||
|
@ -86,7 +86,7 @@ func (c Deadman) generateLevelFn() ast.Statement {
|
|||
}
|
||||
|
||||
func (c Deadman) generateFluxASTChecksFunction() ast.Statement {
|
||||
dur := flux.Duration(int64(c.TimeSince), "s")
|
||||
dur := (*ast.DurationLiteral)(c.TimeSince)
|
||||
now := flux.Call(flux.Identifier("now"), flux.Object())
|
||||
sub := flux.Call(flux.Member("experimental", "subDuration"), flux.Object(flux.Property("from", now), flux.Property("d", dur)))
|
||||
return flux.ExpressionStatement(flux.Pipe(
|
||||
|
|
|
@ -35,7 +35,7 @@ func TestDeadman_GenerateFlux(t *testing.T) {
|
|||
Every: mustDuration("1h"),
|
||||
StatusMessageTemplate: "whoa! {r.dead}",
|
||||
Query: influxdb.DashboardQuery{
|
||||
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> aggregateWindow(every: 1m, fn: mean) |> yield()`,
|
||||
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> yield()`,
|
||||
BuilderConfig: influxdb.BuilderConfig{
|
||||
Tags: []struct {
|
||||
Key string `json:"key"`
|
||||
|
@ -49,7 +49,8 @@ func TestDeadman_GenerateFlux(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
TimeSince: 60,
|
||||
TimeSince: mustDuration("60s"),
|
||||
StaleTime: mustDuration("10m"),
|
||||
Level: notification.Info,
|
||||
},
|
||||
},
|
||||
|
@ -59,8 +60,7 @@ import "influxdata/influxdb/monitor"
|
|||
import "experimental"
|
||||
|
||||
data = from(bucket: "foo")
|
||||
|> range(start: -1h)
|
||||
|> aggregateWindow(every: 1h, fn: mean)
|
||||
|> range(start: -10m)
|
||||
|
||||
option task = {name: "moo", every: 1h}
|
||||
|
||||
|
|
|
@ -67,7 +67,8 @@ var deadman1 = &check.Deadman{
|
|||
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 21,
|
||||
TimeSince: mustDuration("21s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: true,
|
||||
Level: notification.Critical,
|
||||
}
|
||||
|
@ -280,7 +281,8 @@ func CreateCheck(
|
|||
{Key: "k2", Value: "v2"},
|
||||
},
|
||||
},
|
||||
TimeSince: 21,
|
||||
TimeSince: mustDuration("21s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: true,
|
||||
Level: notification.Critical,
|
||||
},
|
||||
|
@ -334,7 +336,8 @@ func CreateCheck(
|
|||
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 21,
|
||||
TimeSince: mustDuration("21s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: true,
|
||||
Level: notification.Critical,
|
||||
},
|
||||
|
@ -1284,7 +1287,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
|||
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 12,
|
||||
TimeSince: mustDuration("12s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: false,
|
||||
Level: notification.Warn,
|
||||
},
|
||||
|
@ -1325,7 +1329,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
|||
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 12,
|
||||
TimeSince: mustDuration("12s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: false,
|
||||
Level: notification.Warn,
|
||||
},
|
||||
|
@ -1393,7 +1398,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
|||
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 12,
|
||||
TimeSince: mustDuration("12s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: false,
|
||||
Level: notification.Warn,
|
||||
},
|
||||
|
@ -1501,7 +1507,8 @@ func PatchCheck(
|
|||
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||
},
|
||||
},
|
||||
TimeSince: 21,
|
||||
TimeSince: mustDuration("21s"),
|
||||
StaleTime: mustDuration("1h"),
|
||||
ReportZero: true,
|
||||
Level: notification.Critical,
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue