Merge pull request #14835 from influxdata/fix/deadman-durations
feat(deadman): add staleTime duration to deadman checkpull/14843/head
commit
09054e0585
|
@ -159,7 +159,6 @@ func TestService_handleGetChecks(t *testing.T) {
|
||||||
"status": "active",
|
"status": "active",
|
||||||
"statusMessageTemplate": "",
|
"statusMessageTemplate": "",
|
||||||
"tags": null,
|
"tags": null,
|
||||||
"timeSince": 0,
|
|
||||||
"type": "deadman",
|
"type": "deadman",
|
||||||
"labels": [
|
"labels": [
|
||||||
{
|
{
|
||||||
|
@ -531,7 +530,6 @@ func TestService_handleGetCheck(t *testing.T) {
|
||||||
"status": "active",
|
"status": "active",
|
||||||
"statusMessageTemplate": "",
|
"statusMessageTemplate": "",
|
||||||
"tags": null,
|
"tags": null,
|
||||||
"timeSince": 0,
|
|
||||||
"type": "deadman",
|
"type": "deadman",
|
||||||
"orgID": "020f755c3c082000",
|
"orgID": "020f755c3c082000",
|
||||||
"name": "hello"
|
"name": "hello"
|
||||||
|
@ -659,7 +657,7 @@ func TestService_handlePostCheck(t *testing.T) {
|
||||||
{Key: "k2", Value: "v2"},
|
{Key: "k2", Value: "v2"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 13,
|
TimeSince: mustDuration("13s"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Warn,
|
Level: notification.Warn,
|
||||||
},
|
},
|
||||||
|
@ -702,7 +700,7 @@ func TestService_handlePostCheck(t *testing.T) {
|
||||||
"text": ""
|
"text": ""
|
||||||
},
|
},
|
||||||
"type": "deadman",
|
"type": "deadman",
|
||||||
"timeSince": 13,
|
"timeSince": "13s",
|
||||||
"createdAt": "0001-01-01T00:00:00Z",
|
"createdAt": "0001-01-01T00:00:00Z",
|
||||||
"updatedAt": "0001-01-01T00:00:00Z",
|
"updatedAt": "0001-01-01T00:00:00Z",
|
||||||
"id": "020f755c3c082000",
|
"id": "020f755c3c082000",
|
||||||
|
@ -946,7 +944,6 @@ func TestService_handlePatchCheck(t *testing.T) {
|
||||||
"status": "",
|
"status": "",
|
||||||
"statusMessageTemplate": "",
|
"statusMessageTemplate": "",
|
||||||
"tags": null,
|
"tags": null,
|
||||||
"timeSince": 0,
|
|
||||||
"type": "deadman",
|
"type": "deadman",
|
||||||
"labels": []
|
"labels": []
|
||||||
}
|
}
|
||||||
|
@ -1121,7 +1118,6 @@ func TestService_handleUpdateCheck(t *testing.T) {
|
||||||
"status": "active",
|
"status": "active",
|
||||||
"statusMessageTemplate": "",
|
"statusMessageTemplate": "",
|
||||||
"tags": null,
|
"tags": null,
|
||||||
"timeSince": 0,
|
|
||||||
"type": "deadman",
|
"type": "deadman",
|
||||||
"labels": []
|
"labels": []
|
||||||
}
|
}
|
||||||
|
|
|
@ -9335,8 +9335,11 @@ components:
|
||||||
type: string
|
type: string
|
||||||
enum: [deadman]
|
enum: [deadman]
|
||||||
timeSince:
|
timeSince:
|
||||||
description: seconds before deadman triggers
|
description: string duration before deadman triggers
|
||||||
type: integer
|
type: string
|
||||||
|
staleTime:
|
||||||
|
description: string duration for time that a series is considered stale and should not trigger deadman
|
||||||
|
type: string
|
||||||
reportZero:
|
reportZero:
|
||||||
description: if only zero values reported since time, trigger alert
|
description: if only zero values reported since time, trigger alert
|
||||||
type: boolean
|
type: boolean
|
||||||
|
|
|
@ -184,7 +184,7 @@ func TestJSON(t *testing.T) {
|
||||||
UpdatedAt: timeGen2.Now(),
|
UpdatedAt: timeGen2.Now(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 33,
|
TimeSince: mustDuration("33s"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Warn,
|
Level: notification.Warn,
|
||||||
},
|
},
|
||||||
|
|
|
@ -17,8 +17,8 @@ var _ influxdb.Check = &Deadman{}
|
||||||
// Deadman is the deadman check.
|
// Deadman is the deadman check.
|
||||||
type Deadman struct {
|
type Deadman struct {
|
||||||
Base
|
Base
|
||||||
// seconds before deadman triggers
|
TimeSince *notification.Duration `json:"timeSince,omitempty"`
|
||||||
TimeSince uint `json:"timeSince"`
|
StaleTime *notification.Duration `json:"staleTime,omitempty"`
|
||||||
// If only zero values reported since time, trigger alert.
|
// If only zero values reported since time, trigger alert.
|
||||||
// TODO(desa): Is this implemented in Flux?
|
// TODO(desa): Is this implemented in Flux?
|
||||||
ReportZero bool `json:"reportZero"`
|
ReportZero bool `json:"reportZero"`
|
||||||
|
@ -45,7 +45,7 @@ func (c Deadman) GenerateFlux() (string, error) {
|
||||||
// an error for each error found when the script is parsed.
|
// an error for each error found when the script is parsed.
|
||||||
func (c Deadman) GenerateFluxAST() (*ast.Package, error) {
|
func (c Deadman) GenerateFluxAST() (*ast.Package, error) {
|
||||||
p := parser.ParseSource(c.Query.Text)
|
p := parser.ParseSource(c.Query.Text)
|
||||||
replaceDurationsWithEvery(p, c.Every)
|
replaceDurationsWithEvery(p, c.StaleTime)
|
||||||
removeStopFromRange(p)
|
removeStopFromRange(p)
|
||||||
|
|
||||||
if errs := ast.GetErrors(p); len(errs) != 0 {
|
if errs := ast.GetErrors(p); len(errs) != 0 {
|
||||||
|
@ -86,7 +86,7 @@ func (c Deadman) generateLevelFn() ast.Statement {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Deadman) generateFluxASTChecksFunction() ast.Statement {
|
func (c Deadman) generateFluxASTChecksFunction() ast.Statement {
|
||||||
dur := flux.Duration(int64(c.TimeSince), "s")
|
dur := (*ast.DurationLiteral)(c.TimeSince)
|
||||||
now := flux.Call(flux.Identifier("now"), flux.Object())
|
now := flux.Call(flux.Identifier("now"), flux.Object())
|
||||||
sub := flux.Call(flux.Member("experimental", "subDuration"), flux.Object(flux.Property("from", now), flux.Property("d", dur)))
|
sub := flux.Call(flux.Member("experimental", "subDuration"), flux.Object(flux.Property("from", now), flux.Property("d", dur)))
|
||||||
return flux.ExpressionStatement(flux.Pipe(
|
return flux.ExpressionStatement(flux.Pipe(
|
||||||
|
|
|
@ -35,7 +35,7 @@ func TestDeadman_GenerateFlux(t *testing.T) {
|
||||||
Every: mustDuration("1h"),
|
Every: mustDuration("1h"),
|
||||||
StatusMessageTemplate: "whoa! {r.dead}",
|
StatusMessageTemplate: "whoa! {r.dead}",
|
||||||
Query: influxdb.DashboardQuery{
|
Query: influxdb.DashboardQuery{
|
||||||
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> aggregateWindow(every: 1m, fn: mean) |> yield()`,
|
Text: `from(bucket: "foo") |> range(start: -1d, stop: now()) |> yield()`,
|
||||||
BuilderConfig: influxdb.BuilderConfig{
|
BuilderConfig: influxdb.BuilderConfig{
|
||||||
Tags: []struct {
|
Tags: []struct {
|
||||||
Key string `json:"key"`
|
Key string `json:"key"`
|
||||||
|
@ -49,7 +49,8 @@ func TestDeadman_GenerateFlux(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 60,
|
TimeSince: mustDuration("60s"),
|
||||||
|
StaleTime: mustDuration("10m"),
|
||||||
Level: notification.Info,
|
Level: notification.Info,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -59,8 +60,7 @@ import "influxdata/influxdb/monitor"
|
||||||
import "experimental"
|
import "experimental"
|
||||||
|
|
||||||
data = from(bucket: "foo")
|
data = from(bucket: "foo")
|
||||||
|> range(start: -1h)
|
|> range(start: -10m)
|
||||||
|> aggregateWindow(every: 1h, fn: mean)
|
|
||||||
|
|
||||||
option task = {name: "moo", every: 1h}
|
option task = {name: "moo", every: 1h}
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,8 @@ var deadman1 = &check.Deadman{
|
||||||
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 21,
|
TimeSince: mustDuration("21s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Critical,
|
Level: notification.Critical,
|
||||||
}
|
}
|
||||||
|
@ -280,7 +281,8 @@ func CreateCheck(
|
||||||
{Key: "k2", Value: "v2"},
|
{Key: "k2", Value: "v2"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 21,
|
TimeSince: mustDuration("21s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Critical,
|
Level: notification.Critical,
|
||||||
},
|
},
|
||||||
|
@ -334,7 +336,8 @@ func CreateCheck(
|
||||||
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2006, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 21,
|
TimeSince: mustDuration("21s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Critical,
|
Level: notification.Critical,
|
||||||
},
|
},
|
||||||
|
@ -1284,7 +1287,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
||||||
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 12,
|
TimeSince: mustDuration("12s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: false,
|
ReportZero: false,
|
||||||
Level: notification.Warn,
|
Level: notification.Warn,
|
||||||
},
|
},
|
||||||
|
@ -1325,7 +1329,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
||||||
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 12,
|
TimeSince: mustDuration("12s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: false,
|
ReportZero: false,
|
||||||
Level: notification.Warn,
|
Level: notification.Warn,
|
||||||
},
|
},
|
||||||
|
@ -1393,7 +1398,8 @@ data = from(bucket: "telegraf") |> range(start: -1m)`,
|
||||||
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2002, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 12,
|
TimeSince: mustDuration("12s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: false,
|
ReportZero: false,
|
||||||
Level: notification.Warn,
|
Level: notification.Warn,
|
||||||
},
|
},
|
||||||
|
@ -1501,7 +1507,8 @@ func PatchCheck(
|
||||||
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
UpdatedAt: time.Date(2007, 5, 4, 1, 2, 3, 0, time.UTC),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
TimeSince: 21,
|
TimeSince: mustDuration("21s"),
|
||||||
|
StaleTime: mustDuration("1h"),
|
||||||
ReportZero: true,
|
ReportZero: true,
|
||||||
Level: notification.Critical,
|
Level: notification.Critical,
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue