From c2a663dcdd15fd63fdfc19deeca222718142b2a4 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Mon, 31 Oct 2016 18:11:05 -0500 Subject: [PATCH 01/14] Add kapacitor tickscript template generator for threshold, relative, deadman --- kapacitor/alerts.go | 134 +++++++++++++++++ kapacitor/alerts_test.go | 315 +++++++++++++++++++++++++++++++++++++++ kapacitor/templates.go | 105 +++++++++++++ server/kapacitors.go | 16 ++ server/mux.go | 6 + 5 files changed, 576 insertions(+) create mode 100644 kapacitor/alerts.go create mode 100644 kapacitor/alerts_test.go create mode 100644 kapacitor/templates.go diff --git a/kapacitor/alerts.go b/kapacitor/alerts.go new file mode 100644 index 0000000000..16fc18e8d8 --- /dev/null +++ b/kapacitor/alerts.go @@ -0,0 +1,134 @@ +package tickscripts + +import ( + "bytes" + "fmt" + "log" + "text/template" + "time" + + "github.com/influxdata/kapacitor/pipeline" + "github.com/influxdata/kapacitor/tick/ast" + "github.com/influxdata/kapacitor/tick/stateful" +) + +// Alert defines alerting strings in template rendering +type Alert struct { + Service string // Alerting service + Operator string // Operator for alert comparison + Aggregate string // Statistic aggregate over window of data +} + +// TickTemplate task to be used by kapacitor +type TickTemplate string + +// Threshold generates a tickscript template with an alert +func (a *Alert) Threshold() (TickTemplate, error) { + if err := ValidateAlert(a); err != nil { + return "", err + } + + tickscript, err := execTemplate(ThresholdTemplate, a) + if err != nil { + return "", err + } + + if err := validateTick(tickscript); err != nil { + return "", err + } + + return formatTick(tickscript) +} + +// Relative creates a tickscript that alerts on relative changes over windows of data +func (a *Alert) Relative() (TickTemplate, error) { + if err := ValidateAlert(a); err != nil { + return "", err + } + + tickscript, err := execTemplate(RelativeTemplate, a) + if err != nil { + return "", err + } + + if err := validateTick(tickscript); err != nil { + return "", err + } + + return formatTick(tickscript) +} + +// Deadman creates a tickscript that alerts when no data has been received for a time. +func (a *Alert) Deadman() (TickTemplate, error) { + if err := ValidateAlert(a); err != nil { + return "", err + } + + tickscript, err := execTemplate(DeadmanTemplate, a) + if err != nil { + return "", err + } + + if err := validateTick(tickscript); err != nil { + return "", err + } + + return formatTick(tickscript) +} + +// ValidateAlert checks if the alert is a valid kapacitor alert service. +func ValidateAlert(alert *Alert) error { + // Simple tick script to check alert service. + // If a pipeline cannot be created then we know this is an invalid + // service. At least with this version of kapacitor! + script := fmt.Sprintf("stream|from()|alert().%s()", alert.Service) + return validateTick(script) +} + +func formatTick(tickscript string) (TickTemplate, error) { + node, err := ast.Parse(tickscript) + if err != nil { + log.Fatalf("parse execution: %s", err) + return "", err + } + + output := new(bytes.Buffer) + node.Format(output, "", true) + return TickTemplate(output.String()), nil +} + +func validateTick(script string) error { + scope := stateful.NewScope() + _, err := pipeline.CreateTemplatePipeline(script, pipeline.StreamEdge, scope, &deadman{}) + return err +} + +func execTemplate(tick string, alert *Alert) (string, error) { + p := template.New("template") + t, err := p.Parse(tick) + if err != nil { + log.Fatalf("template parse: %s", err) + return "", err + } + buf := new(bytes.Buffer) + err = t.Execute(buf, &alert) + if err != nil { + log.Fatalf("template execution: %s", err) + return "", err + } + return buf.String(), nil +} + +type deadman struct { + interval time.Duration + threshold float64 + id string + message string + global bool +} + +func (d deadman) Interval() time.Duration { return d.interval } +func (d deadman) Threshold() float64 { return d.threshold } +func (d deadman) Id() string { return d.id } +func (d deadman) Message() string { return d.message } +func (d deadman) Global() bool { return d.global } diff --git a/kapacitor/alerts_test.go b/kapacitor/alerts_test.go new file mode 100644 index 0000000000..32ca6fffed --- /dev/null +++ b/kapacitor/alerts_test.go @@ -0,0 +1,315 @@ +package tickscripts + +import "testing" + +func TestValidateAlert(t *testing.T) { + tests := []struct { + name string + alert Alert + wantErr bool + }{ + { + name: "Test valid template alert", + alert: Alert{ + Service: "slack", + }, + wantErr: false, + }, + { + name: "Test invalid template alert", + alert: Alert{ + Service: "invalid", + }, + wantErr: true, + }, + } + for _, tt := range tests { + if err := ValidateAlert(&tt.alert); (err != nil) != tt.wantErr { + t.Errorf("%q. ValidateAlert() error = %v, wantErr %v", tt.name, err, tt.wantErr) + } + } +} + +func Test_validateTick(t *testing.T) { + type args struct { + script string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Valid Script", + args: args{ + script: "stream|from()", + }, + wantErr: false, + }, + { + name: "Invalid Script", + args: args{ + script: "stream|nothing", + }, + wantErr: true, + }, + } + for _, tt := range tests { + if err := validateTick(tt.args.script); (err != nil) != tt.wantErr { + t.Errorf("%q. validateTick() error = %v, wantErr %v", tt.name, err, tt.wantErr) + } + } +} + +func TestThreshold(t *testing.T) { + tests := []struct { + name string + alert Alert + want TickTemplate + wantErr bool + }{ + { + name: "Test valid template alert", + alert: Alert{ + Service: "slack", + Operator: ">", + }, + want: `var database = 'telegraf' + +var rp = 'autogen' + +var measurement string + +var metric string + +var groupby = ['host'] + +var crit int + +var period duration + +var every duration + +var message string + +var id string + +stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |window() + .period(period) + .every(every) + |mean(metric) + .as('stat') + |alert() + .id(id) + .message(message) + .crit(lambda: "stat" > crit) + .slack() +`, + wantErr: false, + }, + { + name: "Test valid template alert", + alert: Alert{ + Service: "invalid", + Operator: ">", + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + got, err := tt.alert.Threshold() + if (err != nil) != tt.wantErr { + t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Threshold() = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestRelative(t *testing.T) { + tests := []struct { + name string + alert Alert + want TickTemplate + wantErr bool + }{ + { + name: "Test valid template alert", + alert: Alert{ + Service: "slack", + Operator: ">", + Aggregate: "mean", + }, + want: `var database = 'telegraf' + +var rp = 'autogen' + +var measurement string + +var metric string + +var groupby = ['host'] + +var crit int + +var period duration + +var every duration + +var shift duration + +var message string + +var id string + +var data = stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + +var past = data + |window() + .period(period) + .every(every) + .align() + |mean(metric) + .as('stat') + |shift(shift) + +var current = data + |window() + .period(period) + .every(every) + .align() + |mean(metric) + .as('stat') + +past + |join(current) + .as('past', 'current') + |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) + .keep() + .as('perc') + |alert() + .id(id) + .message(message) + .crit(lambda: "perc" > crit) + .slack() +`, + wantErr: false, + }, + { + name: "Test invalid service template", + alert: Alert{ + Service: "invalid", + Operator: ">", + Aggregate: "mean", + }, + want: "", + wantErr: true, + }, + { + name: "Test invalid aggregate template", + alert: Alert{ + Service: "slack", + Operator: ">", + Aggregate: "invalid", + }, + want: "", + wantErr: true, + }, + { + name: "Test invalid operator template", + alert: Alert{ + Service: "slack", + Operator: "invalid", + Aggregate: "mean", + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + got, err := tt.alert.Relative() + if (err != nil) != tt.wantErr { + t.Errorf("%q. Relative() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Relative() = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestDeadman(t *testing.T) { + tests := []struct { + name string + alert Alert + want TickTemplate + wantErr bool + }{ + { + name: "Test valid template alert", + alert: Alert{ + Service: "slack", + }, + want: `var database = 'telegraf' + +var rp = 'autogen' + +var measurement string + +var groupby = ['host'] + +var threshold float + +var period duration + +var id string + +var message string + +stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |deadman(threshold, period) + .id(id) + .message(message) + .slack() +`, + wantErr: false, + }, + { + name: "Test valid template alert", + alert: Alert{ + Service: "invalid", + }, + want: "", + wantErr: true, + }, + } + for _, tt := range tests { + got, err := tt.alert.Deadman() + if (err != nil) != tt.wantErr { + t.Errorf("%q. Deadman() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Deadman() = %v, want %v", tt.name, got, tt.want) + } + } +} diff --git a/kapacitor/templates.go b/kapacitor/templates.go new file mode 100644 index 0000000000..1891b1758a --- /dev/null +++ b/kapacitor/templates.go @@ -0,0 +1,105 @@ +package tickscripts + +// TODO: I don't think mean is correct here. It's probably any value. +// TODO: seems like we should only have statechanges + +// ThresholdTemplate is a tickscript template template for threshold alerts +var ThresholdTemplate = `var database = 'telegraf' +var rp = 'autogen' +var measurement string +var metric string +var groupby = ['host'] +var crit int +var period duration +var every duration +var message string +var id string + +stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |window() + .period(period) + .every(every) + |mean(metric) + .as('stat') + |alert() + .id(id) + .message(message) + .crit(lambda: "stat" {{ .Operator }} crit) + .{{ .Service }}()` + +// RelativeTemplate compares one window of data versus another. +var RelativeTemplate = `var database = 'telegraf' +var rp = 'autogen' +var measurement string +var metric string +var groupby = ['host'] +var crit int +var period duration +var every duration +var shift duration +var message string +var id string + +var data = stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + +var past = data + |window() + .period(period) + .every(every) + .align() + |{{ .Aggregate }}(metric) + .as('stat') + |shift(shift) + +var current = data + |window() + .period(period) + .every(every) + .align() + |{{ .Aggregate }}(metric) + .as('stat') + +past + |join(current) + .as('past', 'current') + |eval(lambda: abs(float("current.stat" - "past.stat"))/float("past.stat")) + .keep() + .as('perc') + |alert() + .id(id) + .message(message) + .crit(lambda: "perc" {{ .Operator }} crit) + .{{ .Service }}()` + +// DeadmanTemplate checks if any data has been streamed in the last period of time +var DeadmanTemplate = `var database = 'telegraf' +var rp = 'autogen' +var measurement string +var groupby = ['host'] +var threshold float +var period duration + +var id string +var message string + +stream + |from() + .database(database) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |deadman(threshold, period) + .id(id) + .message(message) + .{{ .Service }}() +` diff --git a/server/kapacitors.go b/server/kapacitors.go index edb175c37a..3de3784b1e 100644 --- a/server/kapacitors.go +++ b/server/kapacitors.go @@ -258,3 +258,19 @@ func (h *Service) UpdateKapacitor(w http.ResponseWriter, r *http.Request) { res := newKapacitor(srv) encodeJSON(w, http.StatusOK, res, h.Logger) } + +// KapacitorTasksPost proxies POST to kapacitor +func (h *Service) KapacitorTasksPost(w http.ResponseWriter, r *http.Request) { +} + +// KapacitorTasksPatch proxies PATCH to kapacitor +func (h *Service) KapacitorTasksPatch(w http.ResponseWriter, r *http.Request) { +} + +// KapacitorTasksGet proxies GET to kapacitor +func (h *Service) KapacitorTasksGet(w http.ResponseWriter, r *http.Request) { +} + +// KapacitorTasksDelete proxies DELETE to kapacitor +func (h *Service) KapacitorTasksDelete(w http.ResponseWriter, r *http.Request) { +} diff --git a/server/mux.go b/server/mux.go index 4c9c0a62ac..6966c6562c 100644 --- a/server/mux.go +++ b/server/mux.go @@ -68,6 +68,12 @@ func NewMux(opts MuxOpts, service Service) http.Handler { router.PATCH("/chronograf/v1/sources/:id/kapacitors/:kid", service.UpdateKapacitor) router.DELETE("/chronograf/v1/sources/:id/kapacitors/:kid", service.RemoveKapacitor) + // Kapacitor Tasks + router.GET("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksGet) + router.POST("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksPost) + router.PATCH("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksPatch) + router.DELETE("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksDelete) + // Kapacitor Proxy router.GET("/chronograf/v1/sources/:id/kapacitors/:kid/proxy", service.KapacitorProxyGet) router.POST("/chronograf/v1/sources/:id/kapacitors/:kid/proxy", service.KapacitorProxyPost) From a4b4107e1457ed4413c0e72da0e02ed64ab57c41 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Mon, 31 Oct 2016 19:19:32 -0500 Subject: [PATCH 02/14] Add alert interface --- .gitignore | 1 + chronograf.go | 13 +++++++++++-- kapacitor/alerts.go | 27 ++++++++++++++++++++------- kapacitor/alerts_test.go | 12 ++++++++---- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 7b86fcf60b..1de95340dc 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ node_modules/ build/ chronograf.db npm-debug.log +.vscode diff --git a/chronograf.go b/chronograf.go index 52f4090121..de1bedaa40 100644 --- a/chronograf.go +++ b/chronograf.go @@ -68,7 +68,7 @@ type Source struct { Name string `json:"name"` // Name is the user-defined name for the source Type string `json:"type,omitempty"` // Type specifies which kinds of source (enterprise vs oss) Username string `json:"username,omitempty"` // Username is the username to connect to the source - Password string `json:"password,omitempty"` // Password is in CLEARTEXT FIXME + Password string `json:"password,omitempty"` // Password is in CLEARTEXT // TODO: fixme URL string `json:"url"` // URL are the connections to the source Default bool `json:"default"` // Default specifies the default source for the application } @@ -87,13 +87,22 @@ type SourcesStore interface { Update(context.Context, Source) error } +// TickTemplate task to be used by kapacitor +type TickTemplate string + +// Alert generates tickscript templates for kapacitor +type Alert interface { + // Generate will create the tickscript to be used as a kapacitor template + Generate() (TickTemplate, error) +} + // Server represents a proxy connection to an HTTP server type Server struct { ID int // ID is the unique ID of the server SrcID int // SrcID of the data source Name string // Name is the user-defined name for the server Username string // Username is the username to connect to the server - Password string // Password is in CLEARTEXT FIXME + Password string // Password is in CLEARTEXT // TODO: FIXME URL string // URL are the connections to the server } diff --git a/kapacitor/alerts.go b/kapacitor/alerts.go index 16fc18e8d8..0cca087c2a 100644 --- a/kapacitor/alerts.go +++ b/kapacitor/alerts.go @@ -7,23 +7,36 @@ import ( "text/template" "time" + "github.com/influxdata/chronograf" "github.com/influxdata/kapacitor/pipeline" "github.com/influxdata/kapacitor/tick/ast" "github.com/influxdata/kapacitor/tick/stateful" ) +var _ chronograf.Alert = &Alert{} + // Alert defines alerting strings in template rendering type Alert struct { + Trigger string // Specifies the type of alert Service string // Alerting service Operator string // Operator for alert comparison Aggregate string // Statistic aggregate over window of data } -// TickTemplate task to be used by kapacitor -type TickTemplate string +func (a *Alert) Generate() (chronograf.TickTemplate, error) { + switch a.Trigger { + case "threshold": + return a.Threshold() + case "relative": + return a.Relative() + case "deadman": + return a.Deadman() + } + return "", fmt.Errorf("Unknown tigger mechanism %s", a.Trigger) +} // Threshold generates a tickscript template with an alert -func (a *Alert) Threshold() (TickTemplate, error) { +func (a *Alert) Threshold() (chronograf.TickTemplate, error) { if err := ValidateAlert(a); err != nil { return "", err } @@ -41,7 +54,7 @@ func (a *Alert) Threshold() (TickTemplate, error) { } // Relative creates a tickscript that alerts on relative changes over windows of data -func (a *Alert) Relative() (TickTemplate, error) { +func (a *Alert) Relative() (chronograf.TickTemplate, error) { if err := ValidateAlert(a); err != nil { return "", err } @@ -59,7 +72,7 @@ func (a *Alert) Relative() (TickTemplate, error) { } // Deadman creates a tickscript that alerts when no data has been received for a time. -func (a *Alert) Deadman() (TickTemplate, error) { +func (a *Alert) Deadman() (chronograf.TickTemplate, error) { if err := ValidateAlert(a); err != nil { return "", err } @@ -85,7 +98,7 @@ func ValidateAlert(alert *Alert) error { return validateTick(script) } -func formatTick(tickscript string) (TickTemplate, error) { +func formatTick(tickscript string) (chronograf.TickTemplate, error) { node, err := ast.Parse(tickscript) if err != nil { log.Fatalf("parse execution: %s", err) @@ -94,7 +107,7 @@ func formatTick(tickscript string) (TickTemplate, error) { output := new(bytes.Buffer) node.Format(output, "", true) - return TickTemplate(output.String()), nil + return chronograf.TickTemplate(output.String()), nil } func validateTick(script string) error { diff --git a/kapacitor/alerts_test.go b/kapacitor/alerts_test.go index 32ca6fffed..194c9032f8 100644 --- a/kapacitor/alerts_test.go +++ b/kapacitor/alerts_test.go @@ -1,6 +1,10 @@ package tickscripts -import "testing" +import ( + "testing" + + "github.com/influxdata/chronograf" +) func TestValidateAlert(t *testing.T) { tests := []struct { @@ -65,7 +69,7 @@ func TestThreshold(t *testing.T) { tests := []struct { name string alert Alert - want TickTemplate + want chronograf.TickTemplate wantErr bool }{ { @@ -139,7 +143,7 @@ func TestRelative(t *testing.T) { tests := []struct { name string alert Alert - want TickTemplate + want chronograf.TickTemplate wantErr bool }{ { @@ -256,7 +260,7 @@ func TestDeadman(t *testing.T) { tests := []struct { name string alert Alert - want TickTemplate + want chronograf.TickTemplate wantErr bool }{ { From c3b4fa6ddacfd1e5d601d9f81833fc16d07679d6 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Wed, 2 Nov 2016 19:59:25 -0500 Subject: [PATCH 03/14] Add kapacitor script generation --- chronograf.go | 62 ++++- kapacitor/alerts.go | 150 +---------- kapacitor/alerts_test.go | 320 +++-------------------- kapacitor/data.go | 47 ++++ kapacitor/data_test.go | 57 +++++ kapacitor/influxout.go | 20 ++ kapacitor/influxout_test.go | 34 +++ kapacitor/server.go | 61 +++++ kapacitor/templates.go | 117 +++------ kapacitor/tickscripts.go | 40 +++ kapacitor/tickscripts_test.go | 467 ++++++++++++++++++++++++++++++++++ kapacitor/triggers.go | 18 ++ kapacitor/triggers_test.go | 103 ++++++++ kapacitor/validate.go | 54 ++++ kapacitor/validate_test.go | 58 +++++ kapacitor/vars.go | 171 +++++++++++++ 16 files changed, 1271 insertions(+), 508 deletions(-) create mode 100644 kapacitor/data.go create mode 100644 kapacitor/data_test.go create mode 100644 kapacitor/influxout.go create mode 100644 kapacitor/influxout_test.go create mode 100644 kapacitor/server.go create mode 100644 kapacitor/tickscripts.go create mode 100644 kapacitor/tickscripts_test.go create mode 100644 kapacitor/triggers.go create mode 100644 kapacitor/triggers_test.go create mode 100644 kapacitor/validate.go create mode 100644 kapacitor/validate_test.go create mode 100644 kapacitor/vars.go diff --git a/chronograf.go b/chronograf.go index de1bedaa40..85eb043f91 100644 --- a/chronograf.go +++ b/chronograf.go @@ -87,13 +87,63 @@ type SourcesStore interface { Update(context.Context, Source) error } -// TickTemplate task to be used by kapacitor -type TickTemplate string +// QueryConfig represents UI query from the data explorer +type QueryConfig struct { + ID string `json:"id,omitempty"` + Database string `json:"database"` + Measurement string `json:"measurement"` + RetentionPolicy string `json:"retentionPolicy"` + Fields []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + } `json:"fields"` + Tags map[string][]string `json:"tags"` + GroupBy struct { + Time string `json:"time"` + Tags []string `json:"tags"` + } `json:"groupBy"` + AreTagsAccepted bool `json:"areTagsAccepted"` + RawText string `json:"rawText,omitempty"` +} -// Alert generates tickscript templates for kapacitor -type Alert interface { - // Generate will create the tickscript to be used as a kapacitor template - Generate() (TickTemplate, error) +// AlertRule represents rules for building a tickscript alerting task +type AlertRule struct { + ID string `json:"id,omitempty"` // ID is the unique ID of the alert + Name string `json:"name"` // Name is the user-defined name for the alert + Version string `json:"version"` // Version of the alert + Query QueryConfig `json:"query"` // Query is the filter of data for the alert. + Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert + AlertServices []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty) + Type string `json:"type"` // Type specifies kind of AlertRule (stream, batch) + Operator string `json:"operator"` // Operator for alert comparison + Aggregate string `json:"aggregate"` // Statistic aggregate over window of data + Period string `json:"period"` // Period is the window to search for alerting criteria + Every string `json:"every"` // Every how often to check for the alerting criteria + Critical string `json:"critical"` // Critical is the boundary value when alert goes critical + Shift string `json:"shift"` // Shift is the amount of time to look into the past for the alert to compare to the present +} + +// AlertRulesStore stores rules for building tickscript alerting tasks +type AlertRulesStore interface { + // All returns all rules in the store + All(context.Context) ([]AlertRule, error) + // Add creates a new rule in the AlertRulesStore and returns AlertRule with ID + Add(context.Context, AlertRule) (AlertRule, error) + // Delete the AlertRule from the store + Delete(context.Context, AlertRule) error + // Get retrieves AlertRule if `ID` exists + Get(ctx context.Context, ID string) (AlertRule, error) + // Update the AlertRule in the store. + Update(context.Context, AlertRule) error +} + +// TICKScript task to be used by kapacitor +type TICKScript string + +// Ticker generates tickscript tasks for kapacitor +type Ticker interface { + // Generate will create the tickscript to be used as a kapacitor task + Generate(AlertRule) (TICKScript, error) } // Server represents a proxy connection to an HTTP server diff --git a/kapacitor/alerts.go b/kapacitor/alerts.go index 0cca087c2a..0236543014 100644 --- a/kapacitor/alerts.go +++ b/kapacitor/alerts.go @@ -1,147 +1,19 @@ -package tickscripts +package kapacitor import ( - "bytes" "fmt" - "log" - "text/template" - "time" "github.com/influxdata/chronograf" - "github.com/influxdata/kapacitor/pipeline" - "github.com/influxdata/kapacitor/tick/ast" - "github.com/influxdata/kapacitor/tick/stateful" ) -var _ chronograf.Alert = &Alert{} - -// Alert defines alerting strings in template rendering -type Alert struct { - Trigger string // Specifies the type of alert - Service string // Alerting service - Operator string // Operator for alert comparison - Aggregate string // Statistic aggregate over window of data +// AlertServices generates alert chaining methods to be attached to an alert from all rule Services +func AlertServices(rule chronograf.AlertRule) (string, error) { + alert := "" + for _, service := range rule.AlertServices { + if err := ValidateAlert(service); err != nil { + return "", err + } + alert = alert + fmt.Sprintf(".%s()", service) + } + return alert, nil } - -func (a *Alert) Generate() (chronograf.TickTemplate, error) { - switch a.Trigger { - case "threshold": - return a.Threshold() - case "relative": - return a.Relative() - case "deadman": - return a.Deadman() - } - return "", fmt.Errorf("Unknown tigger mechanism %s", a.Trigger) -} - -// Threshold generates a tickscript template with an alert -func (a *Alert) Threshold() (chronograf.TickTemplate, error) { - if err := ValidateAlert(a); err != nil { - return "", err - } - - tickscript, err := execTemplate(ThresholdTemplate, a) - if err != nil { - return "", err - } - - if err := validateTick(tickscript); err != nil { - return "", err - } - - return formatTick(tickscript) -} - -// Relative creates a tickscript that alerts on relative changes over windows of data -func (a *Alert) Relative() (chronograf.TickTemplate, error) { - if err := ValidateAlert(a); err != nil { - return "", err - } - - tickscript, err := execTemplate(RelativeTemplate, a) - if err != nil { - return "", err - } - - if err := validateTick(tickscript); err != nil { - return "", err - } - - return formatTick(tickscript) -} - -// Deadman creates a tickscript that alerts when no data has been received for a time. -func (a *Alert) Deadman() (chronograf.TickTemplate, error) { - if err := ValidateAlert(a); err != nil { - return "", err - } - - tickscript, err := execTemplate(DeadmanTemplate, a) - if err != nil { - return "", err - } - - if err := validateTick(tickscript); err != nil { - return "", err - } - - return formatTick(tickscript) -} - -// ValidateAlert checks if the alert is a valid kapacitor alert service. -func ValidateAlert(alert *Alert) error { - // Simple tick script to check alert service. - // If a pipeline cannot be created then we know this is an invalid - // service. At least with this version of kapacitor! - script := fmt.Sprintf("stream|from()|alert().%s()", alert.Service) - return validateTick(script) -} - -func formatTick(tickscript string) (chronograf.TickTemplate, error) { - node, err := ast.Parse(tickscript) - if err != nil { - log.Fatalf("parse execution: %s", err) - return "", err - } - - output := new(bytes.Buffer) - node.Format(output, "", true) - return chronograf.TickTemplate(output.String()), nil -} - -func validateTick(script string) error { - scope := stateful.NewScope() - _, err := pipeline.CreateTemplatePipeline(script, pipeline.StreamEdge, scope, &deadman{}) - return err -} - -func execTemplate(tick string, alert *Alert) (string, error) { - p := template.New("template") - t, err := p.Parse(tick) - if err != nil { - log.Fatalf("template parse: %s", err) - return "", err - } - buf := new(bytes.Buffer) - err = t.Execute(buf, &alert) - if err != nil { - log.Fatalf("template execution: %s", err) - return "", err - } - return buf.String(), nil -} - -type deadman struct { - interval time.Duration - threshold float64 - id string - message string - global bool -} - -func (d deadman) Interval() time.Duration { return d.interval } -func (d deadman) Threshold() float64 { return d.threshold } -func (d deadman) Id() string { return d.id } -func (d deadman) Message() string { return d.message } -func (d deadman) Global() bool { return d.global } diff --git a/kapacitor/alerts_test.go b/kapacitor/alerts_test.go index 194c9032f8..1672cf7e08 100644 --- a/kapacitor/alerts_test.go +++ b/kapacitor/alerts_test.go @@ -1,4 +1,4 @@ -package tickscripts +package kapacitor import ( "testing" @@ -6,314 +6,66 @@ import ( "github.com/influxdata/chronograf" ) -func TestValidateAlert(t *testing.T) { +func TestAlertServices(t *testing.T) { tests := []struct { name string - alert Alert + rule chronograf.AlertRule + want chronograf.TICKScript wantErr bool }{ { - name: "Test valid template alert", - alert: Alert{ - Service: "slack", + name: "Test several valid services", + rule: chronograf.AlertRule{ + AlertServices: []string{"slack", "victorOps", "email"}, }, - wantErr: false, + want: `alert() + .slack() + .victorOps() + .email() +`, }, { - name: "Test invalid template alert", - alert: Alert{ - Service: "invalid", + name: "Test single invalid services amongst several valid", + rule: chronograf.AlertRule{ + AlertServices: []string{"slack", "invalid", "email"}, }, + want: ``, wantErr: true, }, - } - for _, tt := range tests { - if err := ValidateAlert(&tt.alert); (err != nil) != tt.wantErr { - t.Errorf("%q. ValidateAlert() error = %v, wantErr %v", tt.name, err, tt.wantErr) - } - } -} - -func Test_validateTick(t *testing.T) { - type args struct { - script string - } - tests := []struct { - name string - args args - wantErr bool - }{ { - name: "Valid Script", - args: args{ - script: "stream|from()", - }, - wantErr: false, - }, - { - name: "Invalid Script", - args: args{ - script: "stream|nothing", + name: "Test single invalid service", + rule: chronograf.AlertRule{ + AlertServices: []string{"invalid"}, }, + want: ``, wantErr: true, }, - } - for _, tt := range tests { - if err := validateTick(tt.args.script); (err != nil) != tt.wantErr { - t.Errorf("%q. validateTick() error = %v, wantErr %v", tt.name, err, tt.wantErr) - } - } -} - -func TestThreshold(t *testing.T) { - tests := []struct { - name string - alert Alert - want chronograf.TickTemplate - wantErr bool - }{ { - name: "Test valid template alert", - alert: Alert{ - Service: "slack", - Operator: ">", + name: "Test single valid service", + rule: chronograf.AlertRule{ + AlertServices: []string{"slack"}, }, - want: `var database = 'telegraf' - -var rp = 'autogen' - -var measurement string - -var metric string - -var groupby = ['host'] - -var crit int - -var period duration - -var every duration - -var message string - -var id string - -stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) - |window() - .period(period) - .every(every) - |mean(metric) - .as('stat') - |alert() - .id(id) - .message(message) - .crit(lambda: "stat" > crit) + want: `alert() .slack() `, - wantErr: false, - }, - { - name: "Test valid template alert", - alert: Alert{ - Service: "invalid", - Operator: ">", - }, - want: "", - wantErr: true, }, } for _, tt := range tests { - got, err := tt.alert.Threshold() + got, err := AlertServices(tt.rule) if (err != nil) != tt.wantErr { - t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr) + t.Errorf("%q. AlertServices() error = %v, wantErr %v", tt.name, err, tt.wantErr) continue } - if got != tt.want { - t.Errorf("%q. Threshold() = %v, want %v", tt.name, got, tt.want) - } - } -} - -func TestRelative(t *testing.T) { - tests := []struct { - name string - alert Alert - want chronograf.TickTemplate - wantErr bool - }{ - { - name: "Test valid template alert", - alert: Alert{ - Service: "slack", - Operator: ">", - Aggregate: "mean", - }, - want: `var database = 'telegraf' - -var rp = 'autogen' - -var measurement string - -var metric string - -var groupby = ['host'] - -var crit int - -var period duration - -var every duration - -var shift duration - -var message string - -var id string - -var data = stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) - -var past = data - |window() - .period(period) - .every(every) - .align() - |mean(metric) - .as('stat') - |shift(shift) - -var current = data - |window() - .period(period) - .every(every) - .align() - |mean(metric) - .as('stat') - -past - |join(current) - .as('past', 'current') - |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) - .keep() - .as('perc') - |alert() - .id(id) - .message(message) - .crit(lambda: "perc" > crit) - .slack() -`, - wantErr: false, - }, - { - name: "Test invalid service template", - alert: Alert{ - Service: "invalid", - Operator: ">", - Aggregate: "mean", - }, - want: "", - wantErr: true, - }, - { - name: "Test invalid aggregate template", - alert: Alert{ - Service: "slack", - Operator: ">", - Aggregate: "invalid", - }, - want: "", - wantErr: true, - }, - { - name: "Test invalid operator template", - alert: Alert{ - Service: "slack", - Operator: "invalid", - Aggregate: "mean", - }, - want: "", - wantErr: true, - }, - } - for _, tt := range tests { - got, err := tt.alert.Relative() - if (err != nil) != tt.wantErr { - t.Errorf("%q. Relative() error = %v, wantErr %v", tt.name, err, tt.wantErr) - continue - } - if got != tt.want { - t.Errorf("%q. Relative() = %v, want %v", tt.name, got, tt.want) - } - } -} - -func TestDeadman(t *testing.T) { - tests := []struct { - name string - alert Alert - want chronograf.TickTemplate - wantErr bool - }{ - { - name: "Test valid template alert", - alert: Alert{ - Service: "slack", - }, - want: `var database = 'telegraf' - -var rp = 'autogen' - -var measurement string - -var groupby = ['host'] - -var threshold float - -var period duration - -var id string - -var message string - -stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) - |deadman(threshold, period) - .id(id) - .message(message) - .slack() -`, - wantErr: false, - }, - { - name: "Test valid template alert", - alert: Alert{ - Service: "invalid", - }, - want: "", - wantErr: true, - }, - } - for _, tt := range tests { - got, err := tt.alert.Deadman() - if (err != nil) != tt.wantErr { - t.Errorf("%q. Deadman() error = %v, wantErr %v", tt.name, err, tt.wantErr) - continue - } - if got != tt.want { - t.Errorf("%q. Deadman() = %v, want %v", tt.name, got, tt.want) + if tt.wantErr { + continue + } + formatted, err := formatTick("alert()" + got) + if err != nil { + t.Errorf("%q. formatTick() error = %v", tt.name, err) + continue + } + if formatted != tt.want { + t.Errorf("%q. AlertServices() = %v, want %v", tt.name, formatted, tt.want) } } } diff --git a/kapacitor/data.go b/kapacitor/data.go new file mode 100644 index 0000000000..bc8c11f04c --- /dev/null +++ b/kapacitor/data.go @@ -0,0 +1,47 @@ +package kapacitor + +import ( + "fmt" + + "github.com/influxdata/chronograf" +) + +// Data returns the tickscript data section for querying +// TODO: Someone else needs to build the var period and var every +func Data(q chronograf.QueryConfig) (string, error) { + if q.RawText != "" { + batch := ` + var data = batch + |query(''' + %s + ''') + .period(period) + .every(every) + .align()` + batch = fmt.Sprintf(batch, q.RawText) + if q.GroupBy.Time != "" { + batch = batch + fmt.Sprintf(".groupBy(%s)", q.GroupBy.Time) + } + return batch, nil + } + stream := `var data = stream + |from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + ` + + stream = fmt.Sprintf("%s\n.groupBy(groupby)\n", stream) + stream = stream + "|window().period(period).every(every).align()\n" + + for _, field := range q.Fields { + for _, fnc := range field.Funcs { + stream = stream + fmt.Sprintf(`|%s(field).as(metric)`, fnc) + break // only support a single field + } + break // only support a single field + } + + stream = stream + "|where(where_filter)\n" + return stream, nil +} diff --git a/kapacitor/data_test.go b/kapacitor/data_test.go new file mode 100644 index 0000000000..0455291477 --- /dev/null +++ b/kapacitor/data_test.go @@ -0,0 +1,57 @@ +package kapacitor + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/influxdata/chronograf" +) + +var config = `{ + "id": "93e17825-2fb0-4507-87bd-a0c136947f7e", + "database": "telegraf", + "measurement": "cpu", + "retentionPolicy": "default", + "fields": [{ + "field": "usage_user", + "funcs": ["mean"] + }], + "tags": { + "host": [ + "acc-0eabc309-eu-west-1-data-3", + "prod" + ], + "cpu": [ + "cpu_total" + ] + }, + "groupBy": { + "time": null, + "tags": [ + "host", + "cluster_id" + ] + }, + "areTagsAccepted": true, + "rawText": null +}` + +func TestData(t *testing.T) { + q := chronograf.QueryConfig{} + err := json.Unmarshal([]byte(config), &q) + if err != nil { + t.Errorf("Error unmarshaling %v", err) + } + if tick, err := Data(q); err != nil { + t.Errorf("Error creating tick %v", err) + } else { + formatted, err := formatTick(tick) + if err != nil { + fmt.Printf(tick) + t.Errorf("Error formatting tick %v", err) + } + fmt.Printf("%s", formatted) + } + +} diff --git a/kapacitor/influxout.go b/kapacitor/influxout.go new file mode 100644 index 0000000000..882c3aac45 --- /dev/null +++ b/kapacitor/influxout.go @@ -0,0 +1,20 @@ +package kapacitor + +import ( + "fmt" + + "github.com/influxdata/chronograf" +) + +// InfluxOut creates a kapacitor influxDBOut node to write alert data to Database, RP, Measurement. +func InfluxOut(rule chronograf.AlertRule) string { + return fmt.Sprintf(` + trigger + |influxDBOut() + .create() + .database(output_db) + .retentionPolicy(output_rp) + .measurement(output_mt) + .tag('name', '%s') + `, rule.Name) +} diff --git a/kapacitor/influxout_test.go b/kapacitor/influxout_test.go new file mode 100644 index 0000000000..84b517c107 --- /dev/null +++ b/kapacitor/influxout_test.go @@ -0,0 +1,34 @@ +package kapacitor + +import "testing" +import "github.com/influxdata/chronograf" + +func TestInfluxOut(t *testing.T) { + tests := []struct { + name string + want chronograf.TICKScript + }{ + { + name: "Test influxDBOut kapacitor node", + want: `trigger + |influxDBOut() + .create() + .database(output_db) + .retentionPolicy(output_rp) + .measurement(output_mt) + .tag('name', 'name') +`, + }, + } + for _, tt := range tests { + got := InfluxOut(chronograf.AlertRule{Name: "name"}) + formatted, err := formatTick(got) + if err != nil { + t.Errorf("%q. formatTick() error = %v", tt.name, err) + continue + } + if formatted != tt.want { + t.Errorf("%q. InfluxOut() = %v, want %v", tt.name, formatted, tt.want) + } + } +} diff --git a/kapacitor/server.go b/kapacitor/server.go new file mode 100644 index 0000000000..f9e9d71daf --- /dev/null +++ b/kapacitor/server.go @@ -0,0 +1,61 @@ +package kapacitor + +import ( + "context" + + client "github.com/influxdata/kapacitor/client/v1" +) + +type Server struct { + URL string + Username string + Password string +} + +const ( + templatePrefix = "chronograf_v1_" + templatePattern = "chronograf_v1_*" +) + +// Template plus its read-only attributes. +type Template struct { + ID string `json:"id"` + Type string `json:"type"` + TICKscript string `json:"script"` +} + +func (s *Server) Templates(ctx context.Context) ([]Template, error) { + var creds *client.Credentials + if s.Username != "" { + creds = &client.Credentials{ + Method: client.UserAuthentication, + Username: s.Username, + Password: s.Password, + } + } + + kapa, err := client.New(client.Config{ + URL: s.URL, + Credentials: creds, + }) + if err != nil { + return nil, err + } + + templates, err := kapa.ListTemplates(&client.ListTemplatesOptions{ + Pattern: templatePattern, + }) + if err != nil { + return nil, err + } + + res := []Template{} + for _, t := range templates { + res = append(res, Template{ + ID: t.ID, + Type: t.Type.String(), + TICKscript: t.TICKscript, + }) + } + return res, nil +} diff --git a/kapacitor/templates.go b/kapacitor/templates.go index 1891b1758a..cda3e62041 100644 --- a/kapacitor/templates.go +++ b/kapacitor/templates.go @@ -1,105 +1,64 @@ -package tickscripts +package kapacitor -// TODO: I don't think mean is correct here. It's probably any value. -// TODO: seems like we should only have statechanges +import ( + "bytes" + "log" + "text/template" +) -// ThresholdTemplate is a tickscript template template for threshold alerts -var ThresholdTemplate = `var database = 'telegraf' -var rp = 'autogen' -var measurement string -var metric string -var groupby = ['host'] -var crit int -var period duration -var every duration -var message string -var id string - -stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) - |window() - .period(period) - .every(every) - |mean(metric) - .as('stat') +// ThresholdTrigger is the trickscript trigger for alerts that exceed a value +var ThresholdTrigger = ` + var trigger = data|{{ .Aggregate }}(metric) + .as('value') |alert() + .stateChangesOnly() .id(id) .message(message) - .crit(lambda: "stat" {{ .Operator }} crit) - .{{ .Service }}()` - -// RelativeTemplate compares one window of data versus another. -var RelativeTemplate = `var database = 'telegraf' -var rp = 'autogen' -var measurement string -var metric string -var groupby = ['host'] -var crit int -var period duration -var every duration -var shift duration -var message string -var id string - -var data = stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) + .crit(lambda: "value" {{ .Operator }} crit)` +// RelativeTrigger compares one window of data versus another. +var RelativeTrigger = ` var past = data - |window() - .period(period) - .every(every) - .align() |{{ .Aggregate }}(metric) .as('stat') |shift(shift) var current = data - |window() - .period(period) - .every(every) - .align() |{{ .Aggregate }}(metric) .as('stat') -past +var trigger = past |join(current) .as('past', 'current') |eval(lambda: abs(float("current.stat" - "past.stat"))/float("past.stat")) .keep() - .as('perc') + .as('value') |alert() + .stateChangesOnly() .id(id) .message(message) - .crit(lambda: "perc" {{ .Operator }} crit) - .{{ .Service }}()` + .crit(lambda: "value" {{ .Operator }} crit)` -// DeadmanTemplate checks if any data has been streamed in the last period of time -var DeadmanTemplate = `var database = 'telegraf' -var rp = 'autogen' -var measurement string -var groupby = ['host'] -var threshold float -var period duration - -var id string -var message string - -stream - |from() - .database(database) - .retentionPolicy(rp) - .measurement(measurement) - .groupBy(groupby) - |deadman(threshold, period) +// DeadmanTrigger checks if any data has been streamed in the last period of time +var DeadmanTrigger = ` + var trigger = data|deadman(threshold, period) + .stateChangesOnly() .id(id) .message(message) - .{{ .Service }}() ` + +func execTemplate(tick string, alert interface{}) (string, error) { + p := template.New("template") + t, err := p.Parse(tick) + if err != nil { + log.Fatalf("template parse: %s", err) + return "", err + } + buf := new(bytes.Buffer) + err = t.Execute(buf, alert) + if err != nil { + log.Fatalf("template execution: %s", err) + return "", err + } + return buf.String(), nil +} diff --git a/kapacitor/tickscripts.go b/kapacitor/tickscripts.go new file mode 100644 index 0000000000..827e1a810f --- /dev/null +++ b/kapacitor/tickscripts.go @@ -0,0 +1,40 @@ +package kapacitor + +import ( + "fmt" + + "github.com/influxdata/chronograf" +) + +var _ chronograf.Ticker = &Alert{} + +// Alert defines alerting strings in template rendering +type Alert struct { + Trigger string // Specifies the type of alert + Service string // Alerting service + Operator string // Operator for alert comparison + Aggregate string // Statistic aggregate over window of data +} + +// Generate creates a Tickscript from the alertrule +func (a *Alert) Generate(rule chronograf.AlertRule) (chronograf.TICKScript, error) { + vars, err := Vars(rule) + if err != nil { + return "", nil + } + data, err := Data(rule.Query) + if err != nil { + return "", nil + } + trigger, err := Trigger(rule) + if err != nil { + return "", err + } + services, err := AlertServices(rule) + if err != nil { + return "", err + } + output := InfluxOut(rule) + raw := fmt.Sprintf("%s\n%s\n%s%s\n%s", vars, data, trigger, services, output) + return formatTick(raw) +} diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go new file mode 100644 index 0000000000..3a088ac85b --- /dev/null +++ b/kapacitor/tickscripts_test.go @@ -0,0 +1,467 @@ +package kapacitor + +import ( + "fmt" + "testing" + + "github.com/influxdata/chronograf" +) + +func TestGenerate(t *testing.T) { + alert := chronograf.AlertRule{ + Name: "name", + Version: "1.0", + Trigger: "relative", + AlertServices: []string{"slack", "victorOps", "email"}, + Type: "stream", + Operator: ">", + Aggregate: "mean", + Period: "10m", + Every: "30s", + Critical: "90", + Shift: "1m", + Query: chronograf.QueryConfig{ + Database: "telegraf", + Measurement: "cpu", + RetentionPolicy: "autogen", + Fields: []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + }{ + { + Field: "usage_user", + Funcs: []string{"mean"}, + }, + }, + Tags: map[string][]string{ + "host": []string{ + "acc-0eabc309-eu-west-1-data-3", + "prod", + }, + "cpu": []string{ + "cpu_total", + }, + }, + GroupBy: struct { + Time string `json:"time"` + Tags []string `json:"tags"` + }{ + Time: "", + Tags: []string{"host", "cluster_id"}, + }, + AreTagsAccepted: true, + RawText: "", + }, + } + gen := Alert{} + _, err := gen.Generate(alert) + if err != nil { + t.Errorf("Error generating alert: %v", err) + } +} + +func TestThreshold(t *testing.T) { + alert := chronograf.AlertRule{ + Name: "name", + Version: "1.0", + Trigger: "threshold", + AlertServices: []string{"slack", "victorOps", "email"}, + Type: "stream", + Operator: ">", + Aggregate: "mean", + Period: "10m", + Every: "30s", + Critical: "90", + Shift: "1m", + Query: chronograf.QueryConfig{ + Database: "telegraf", + Measurement: "cpu", + RetentionPolicy: "autogen", + Fields: []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + }{ + { + Field: "usage_user", + Funcs: []string{"mean"}, + }, + }, + Tags: map[string][]string{ + "host": []string{ + "acc-0eabc309-eu-west-1-data-3", + "prod", + }, + "cpu": []string{ + "cpu_total", + }, + }, + GroupBy: struct { + Time string `json:"time"` + Tags []string `json:"tags"` + }{ + Time: "", + Tags: []string{"host", "cluster_id"}, + }, + AreTagsAccepted: true, + RawText: "", + }, + } + + tests := []struct { + name string + alert chronograf.AlertRule + want chronograf.TICKScript + wantErr bool + }{ + { + name: "Test valid template alert", + alert: alert, + want: `var db = 'telegraf' + +var rp = 'autogen' + +var measurement = 'cpu' + +var field = 'usage_user' + +var groupby = ['host', 'cluster_id'] + +var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') + +var period = 10m + +var every = 30s + +var metric = 'metric' + +var crit = 90 + +var output_db = 'chronograf' + +var output_rp = 'autogen' + +var output_mt = 'alerts' + +var data = stream + |from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |window() + .period(period) + .every(every) + .align() + |mean(field) + .as(metric) + |where(where_filter) + +var trigger = data + |mean(metric) + .as('value') + |alert() + .stateChangesOnly() + .id(id) + .message(message) + .crit(lambda: "value" > crit) + .slack() + .victorOps() + .email() + +trigger + |influxDBOut() + .create() + .database(output_db) + .retentionPolicy(output_rp) + .measurement(output_mt) + .tag('name', 'name') +`, + wantErr: false, + }, + } + for _, tt := range tests { + gen := Alert{} + got, err := gen.Generate(tt.alert) + if (err != nil) != tt.wantErr { + t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Threshold() = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestRelative(t *testing.T) { + alert := chronograf.AlertRule{ + Name: "name", + Version: "1.0", + Trigger: "relative", + AlertServices: []string{"slack", "victorOps", "email"}, + Type: "stream", + Operator: ">", + Aggregate: "mean", + Period: "10m", + Every: "30s", + Critical: "90", + Shift: "1m", + Query: chronograf.QueryConfig{ + Database: "telegraf", + Measurement: "cpu", + RetentionPolicy: "autogen", + Fields: []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + }{ + { + Field: "usage_user", + Funcs: []string{"mean"}, + }, + }, + Tags: map[string][]string{ + "host": []string{ + "acc-0eabc309-eu-west-1-data-3", + "prod", + }, + "cpu": []string{ + "cpu_total", + }, + }, + GroupBy: struct { + Time string `json:"time"` + Tags []string `json:"tags"` + }{ + Time: "", + Tags: []string{"host", "cluster_id"}, + }, + AreTagsAccepted: true, + RawText: "", + }, + } + + tests := []struct { + name string + alert chronograf.AlertRule + want chronograf.TICKScript + wantErr bool + }{ + { + name: "Test valid template alert", + alert: alert, + want: `var db = 'telegraf' + +var rp = 'autogen' + +var measurement = 'cpu' + +var field = 'usage_user' + +var groupby = ['host', 'cluster_id'] + +var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') + +var period = 10m + +var every = 30s + +var metric = 'metric' + +var shift = -1m + +var crit = 90 + +var output_db = 'chronograf' + +var output_rp = 'autogen' + +var output_mt = 'alerts' + +var data = stream + |from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |window() + .period(period) + .every(every) + .align() + |mean(field) + .as(metric) + |where(where_filter) + +var past = data + |mean(metric) + .as('stat') + |shift(shift) + +var current = data + |mean(metric) + .as('stat') + +var trigger = past + |join(current) + .as('past', 'current') + |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) + .keep() + .as('value') + |alert() + .stateChangesOnly() + .id(id) + .message(message) + .crit(lambda: "value" > crit) + .slack() + .victorOps() + .email() + +trigger + |influxDBOut() + .create() + .database(output_db) + .retentionPolicy(output_rp) + .measurement(output_mt) + .tag('name', 'name') +`, + wantErr: false, + }, + } + for _, tt := range tests { + gen := Alert{} + got, err := gen.Generate(tt.alert) + if (err != nil) != tt.wantErr { + t.Errorf("%q. Relative() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Relative() = %v, want %v", tt.name, got, tt.want) + } + } +} + +func TestDeadman(t *testing.T) { + alert := chronograf.AlertRule{ + Name: "name", + Version: "1.0", + Trigger: "deadman", + AlertServices: []string{"slack", "victorOps", "email"}, + Type: "stream", + Operator: ">", + Aggregate: "mean", + Period: "10m", + Every: "30s", + Critical: "90", + Shift: "1m", + Query: chronograf.QueryConfig{ + Database: "telegraf", + Measurement: "cpu", + RetentionPolicy: "autogen", + Fields: []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + }{ + { + Field: "usage_user", + Funcs: []string{"mean"}, + }, + }, + Tags: map[string][]string{ + "host": []string{ + "acc-0eabc309-eu-west-1-data-3", + "prod", + }, + "cpu": []string{ + "cpu_total", + }, + }, + GroupBy: struct { + Time string `json:"time"` + Tags []string `json:"tags"` + }{ + Time: "", + Tags: []string{"host", "cluster_id"}, + }, + AreTagsAccepted: true, + RawText: "", + }, + } + + tests := []struct { + name string + alert chronograf.AlertRule + want chronograf.TICKScript + wantErr bool + }{ + { + name: "Test valid template alert", + alert: alert, + want: `var db = 'telegraf' + +var rp = 'autogen' + +var measurement = 'cpu' + +var field = 'usage_user' + +var groupby = ['host', 'cluster_id'] + +var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') + +var period = 10m + +var every = 30s + +var threshold = 0 + +var output_db = 'chronograf' + +var output_rp = 'autogen' + +var output_mt = 'alerts' + +var data = stream + |from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupby) + |window() + .period(period) + .every(every) + .align() + |mean(field) + .as(metric) + |where(where_filter) + +var trigger = data + |deadman(threshold, period) + .stateChangesOnly() + .id(id) + .message(message) + .slack() + .victorOps() + .email() + +trigger + |influxDBOut() + .create() + .database(output_db) + .retentionPolicy(output_rp) + .measurement(output_mt) + .tag('name', 'name') +`, + wantErr: false, + }, + } + for _, tt := range tests { + gen := Alert{} + got, err := gen.Generate(tt.alert) + fmt.Printf("%s", got) + if (err != nil) != tt.wantErr { + t.Errorf("%q. Deadman() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + t.Errorf("%q. Deadman() = %v, want %v", tt.name, got, tt.want) + } + } +} diff --git a/kapacitor/triggers.go b/kapacitor/triggers.go new file mode 100644 index 0000000000..c64d2f70c4 --- /dev/null +++ b/kapacitor/triggers.go @@ -0,0 +1,18 @@ +package kapacitor + +import "github.com/influxdata/chronograf" +import "fmt" + +// Trigger returns the trigger mechanism for a tickscript +func Trigger(rule chronograf.AlertRule) (string, error) { + switch rule.Trigger { + case "deadman": + return DeadmanTrigger, nil + case "relative": + return execTemplate(RelativeTrigger, rule) + case "threshold": + return execTemplate(ThresholdTrigger, rule) + default: + return "", fmt.Errorf("Unknown trigger type: %s", rule.Type) + } +} diff --git a/kapacitor/triggers_test.go b/kapacitor/triggers_test.go new file mode 100644 index 0000000000..7399d880f3 --- /dev/null +++ b/kapacitor/triggers_test.go @@ -0,0 +1,103 @@ +package kapacitor + +import ( + "testing" + + "github.com/influxdata/chronograf" +) + +func TestTrigger(t *testing.T) { + tests := []struct { + name string + rule chronograf.AlertRule + want string + wantErr bool + }{ + { + name: "Test Deadman", + rule: chronograf.AlertRule{ + Trigger: "deadman", + Operator: ">", + Aggregate: "mean", + }, + want: `var trigger = data + |deadman(threshold, period) + .stateChangesOnly() + .id(id) + .message(message) +`, + wantErr: false, + }, + { + name: "Test Relative", + rule: chronograf.AlertRule{ + Trigger: "relative", + Operator: ">", + Aggregate: "mean", + }, + want: `var past = data + |mean(metric) + .as('stat') + |shift(shift) + +var current = data + |mean(metric) + .as('stat') + +var trigger = past + |join(current) + .as('past', 'current') + |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) + .keep() + .as('value') + |alert() + .stateChangesOnly() + .id(id) + .message(message) + .crit(lambda: "value" > crit) +`, + wantErr: false, + }, + { + name: "Test Threshold", + rule: chronograf.AlertRule{ + Trigger: "threshold", + Operator: ">", + Aggregate: "median", + }, + want: `var trigger = data + |median(metric) + .as('value') + |alert() + .stateChangesOnly() + .id(id) + .message(message) + .crit(lambda: "value" > crit) +`, + wantErr: false, + }, + { + name: "Test Invalid", + rule: chronograf.AlertRule{ + Type: "invalid", + }, + want: ``, + wantErr: true, + }, + } + for _, tt := range tests { + got, err := Trigger(tt.rule) + if (err != nil) != tt.wantErr { + t.Errorf("%q. Trigger() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + formatted, err := formatTick(got) + if err != nil { + t.Errorf("%q. formatTick() error = %v", tt.name, err) + continue + } + if string(formatted) != tt.want { + t.Errorf("%q. Trigger() = \n%v\n want \n%v\n", tt.name, string(formatted), tt.want) + } + } +} diff --git a/kapacitor/validate.go b/kapacitor/validate.go new file mode 100644 index 0000000000..36a17018f8 --- /dev/null +++ b/kapacitor/validate.go @@ -0,0 +1,54 @@ +package kapacitor + +import ( + "bytes" + "fmt" + "log" + "time" + + "github.com/influxdata/chronograf" + "github.com/influxdata/kapacitor/pipeline" + "github.com/influxdata/kapacitor/tick/ast" + "github.com/influxdata/kapacitor/tick/stateful" +) + +// ValidateAlert checks if the alert is a valid kapacitor alert service. +func ValidateAlert(service string) error { + // Simple tick script to check alert service. + // If a pipeline cannot be created then we know this is an invalid + // service. At least with this version of kapacitor! + script := fmt.Sprintf("stream|from()|alert().%s()", service) + return validateTick(script) +} + +func formatTick(tickscript string) (chronograf.TICKScript, error) { + node, err := ast.Parse(tickscript) + if err != nil { + log.Fatalf("parse execution: %s", err) + return "", err + } + + output := new(bytes.Buffer) + node.Format(output, "", true) + return chronograf.TICKScript(output.String()), nil +} + +func validateTick(script string) error { + scope := stateful.NewScope() + _, err := pipeline.CreateTemplatePipeline(script, pipeline.StreamEdge, scope, &deadman{}) + return err +} + +type deadman struct { + interval time.Duration + threshold float64 + id string + message string + global bool +} + +func (d deadman) Interval() time.Duration { return d.interval } +func (d deadman) Threshold() float64 { return d.threshold } +func (d deadman) Id() string { return d.id } +func (d deadman) Message() string { return d.message } +func (d deadman) Global() bool { return d.global } diff --git a/kapacitor/validate_test.go b/kapacitor/validate_test.go new file mode 100644 index 0000000000..8fc66cd42c --- /dev/null +++ b/kapacitor/validate_test.go @@ -0,0 +1,58 @@ +package kapacitor + +import "testing" + +func TestValidateAlert(t *testing.T) { + tests := []struct { + name string + service string + wantErr bool + }{ + { + name: "Test valid template alert", + service: "slack", + wantErr: false, + }, + { + name: "Test invalid template alert", + service: "invalid", + wantErr: true, + }, + } + for _, tt := range tests { + if err := ValidateAlert(tt.service); (err != nil) != tt.wantErr { + t.Errorf("%q. ValidateAlert() error = %v, wantErr %v", tt.name, err, tt.wantErr) + } + } +} + +func Test_validateTick(t *testing.T) { + type args struct { + script string + } + tests := []struct { + name string + args args + wantErr bool + }{ + { + name: "Valid Script", + args: args{ + script: "stream|from()", + }, + wantErr: false, + }, + { + name: "Invalid Script", + args: args{ + script: "stream|nothing", + }, + wantErr: true, + }, + } + for _, tt := range tests { + if err := validateTick(tt.args.script); (err != nil) != tt.wantErr { + t.Errorf("%q. validateTick() error = %v, wantErr %v", tt.name, err, tt.wantErr) + } + } +} diff --git a/kapacitor/vars.go b/kapacitor/vars.go new file mode 100644 index 0000000000..872dc34e99 --- /dev/null +++ b/kapacitor/vars.go @@ -0,0 +1,171 @@ +package kapacitor + +import ( + "fmt" + "strings" + + "github.com/influxdata/chronograf" +) + +var ( + // Database is the output database for alerts. + Database = "chronograf" + // RP will be autogen for alerts because it is default. + RP = "autogen" + // Measurement will be alerts so that the app knows where to get this data. + Measurement = "alerts" +) + +// Vars builds the top level vars for a kapacitor alert script +func Vars(rule chronograf.AlertRule) (string, error) { + fld, err := field(rule.Query) + if err != nil { + return "", err + } + switch rule.Trigger { + case "threshold": + vars := ` + var db = '%s' + var rp = '%s' + var measurement = '%s' + var field = '%s' + var groupby = %s + var where_filter = %s + + var period = %s + var every = %s + var metric = '%s' + var crit = %s + var output_db = '%s' + var output_rp = '%s' + var output_mt = '%s' + ` + return fmt.Sprintf(vars, + rule.Query.Database, + rule.Query.RetentionPolicy, + rule.Query.Measurement, + fld, + groupBy(rule.Query), + whereFilter(rule.Query), + rule.Period, + rule.Every, + metric(rule.Query), + rule.Critical, + Database, + RP, + Measurement, + ), nil + case "relative": + vars := ` + var db = '%s' + var rp = '%s' + var measurement = '%s' + var field = '%s' + var groupby = %s + var where_filter = %s + + var period = %s + var every = %s + var metric = '%s' + var shift = -%s + var crit = %s + var output_db = '%s' + var output_rp = '%s' + var output_mt = '%s' + ` + return fmt.Sprintf(vars, + rule.Query.Database, + rule.Query.RetentionPolicy, + rule.Query.Measurement, + fld, + groupBy(rule.Query), + whereFilter(rule.Query), + rule.Period, + rule.Every, + metric(rule.Query), + rule.Shift, + rule.Critical, + Database, + RP, + Measurement, + ), nil + case "deadman": + vars := ` + var db = '%s' + var rp = '%s' + var measurement = '%s' + var field = '%s' + var groupby = %s + var where_filter = %s + + var period = %s + var every = %s + var threshold = %s + var output_db = '%s' + var output_rp = '%s' + var output_mt = '%s' + ` + return fmt.Sprintf(vars, + rule.Query.Database, + rule.Query.RetentionPolicy, + rule.Query.Measurement, + fld, + groupBy(rule.Query), + whereFilter(rule.Query), + rule.Period, + rule.Every, + "0", // deadman threshold hardcoded to zero + Database, + RP, + Measurement, + ), nil + default: + return "", fmt.Errorf("Unknown trigger mechanism") + } +} + +func groupBy(q chronograf.QueryConfig) string { + groups := []string{} + for _, tag := range q.GroupBy.Tags { + groups = append(groups, fmt.Sprintf("'%s'", tag)) + } + return "[" + strings.Join(groups, ",") + "]" +} + +func field(q chronograf.QueryConfig) (string, error) { + for _, field := range q.Fields { + return field.Field, nil + } + return "", fmt.Errorf("No fields set in query") +} + +// metric will be metric unless there are no field aggregates. If no aggregates, then it is the field name. +func metric(q chronograf.QueryConfig) string { + for _, field := range q.Fields { + if field.Field != "" && len(field.Funcs) == 0 { + return field.Field + } + } + return "metric" +} + +func whereFilter(q chronograf.QueryConfig) string { + operator := "==" + if !q.AreTagsAccepted { + operator = "!=" + } + + outer := []string{} + for tag, values := range q.Tags { + inner := []string{} + for _, value := range values { + inner = append(inner, fmt.Sprintf(`"%s" %s '%s'`, tag, operator, value)) + } + outer = append(outer, "("+strings.Join(inner, " OR ")+")") + } + if len(outer) > 0 { + return "lambda: " + strings.Join(outer, " AND ") + } + + return "lambda: TRUE" +} From 463e10a0b7efb0ca0313f22381de81460c28ea4b Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Wed, 2 Nov 2016 20:50:16 -0500 Subject: [PATCH 04/14] Update kapacitor tick generation to validate with pipeline --- chronograf.go | 1 + kapacitor/tickscripts.go | 16 +++++++++------- kapacitor/tickscripts_test.go | 23 +++++++++++++++++++---- kapacitor/validate.go | 8 +++++--- kapacitor/validate_test.go | 20 +++++++------------- kapacitor/vars.go | 13 ++++++++++++- 6 files changed, 53 insertions(+), 28 deletions(-) diff --git a/chronograf.go b/chronograf.go index 85eb043f91..ca872f601a 100644 --- a/chronograf.go +++ b/chronograf.go @@ -121,6 +121,7 @@ type AlertRule struct { Every string `json:"every"` // Every how often to check for the alerting criteria Critical string `json:"critical"` // Critical is the boundary value when alert goes critical Shift string `json:"shift"` // Shift is the amount of time to look into the past for the alert to compare to the present + Message string `json:"message"` // Message included with alert } // AlertRulesStore stores rules for building tickscript alerting tasks diff --git a/kapacitor/tickscripts.go b/kapacitor/tickscripts.go index 827e1a810f..8aa97d062d 100644 --- a/kapacitor/tickscripts.go +++ b/kapacitor/tickscripts.go @@ -9,12 +9,7 @@ import ( var _ chronograf.Ticker = &Alert{} // Alert defines alerting strings in template rendering -type Alert struct { - Trigger string // Specifies the type of alert - Service string // Alerting service - Operator string // Operator for alert comparison - Aggregate string // Statistic aggregate over window of data -} +type Alert struct{} // Generate creates a Tickscript from the alertrule func (a *Alert) Generate(rule chronograf.AlertRule) (chronograf.TICKScript, error) { @@ -36,5 +31,12 @@ func (a *Alert) Generate(rule chronograf.AlertRule) (chronograf.TICKScript, erro } output := InfluxOut(rule) raw := fmt.Sprintf("%s\n%s\n%s%s\n%s", vars, data, trigger, services, output) - return formatTick(raw) + tick, err := formatTick(raw) + if err != nil { + return "", err + } + if err := validateTick(tick); err != nil { + return "", err + } + return tick, nil } diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 3a088ac85b..a14c3d79f9 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -1,7 +1,6 @@ package kapacitor import ( - "fmt" "testing" "github.com/influxdata/chronograf" @@ -73,6 +72,7 @@ func TestThreshold(t *testing.T) { Every: "30s", Critical: "90", Shift: "1m", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -128,6 +128,10 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') +var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + +var message = 'message' + var period = 10m var every = 30s @@ -205,6 +209,7 @@ func TestRelative(t *testing.T) { Every: "30s", Critical: "90", Shift: "1m", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -260,6 +265,10 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') +var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + +var message = 'message' + var period = 10m var every = 30s @@ -351,6 +360,7 @@ func TestDeadman(t *testing.T) { Every: "30s", Critical: "90", Shift: "1m", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -404,13 +414,19 @@ var field = 'usage_user' var groupby = ['host', 'cluster_id'] -var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') +var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') + +var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + +var message = 'message' var period = 10m var every = 30s -var threshold = 0 +var metric = 'metric' + +var threshold = 0.0 var output_db = 'chronograf' @@ -455,7 +471,6 @@ trigger for _, tt := range tests { gen := Alert{} got, err := gen.Generate(tt.alert) - fmt.Printf("%s", got) if (err != nil) != tt.wantErr { t.Errorf("%q. Deadman() error = %v, wantErr %v", tt.name, err, tt.wantErr) continue diff --git a/kapacitor/validate.go b/kapacitor/validate.go index 36a17018f8..da7d0a5903 100644 --- a/kapacitor/validate.go +++ b/kapacitor/validate.go @@ -8,6 +8,7 @@ import ( "github.com/influxdata/chronograf" "github.com/influxdata/kapacitor/pipeline" + "github.com/influxdata/kapacitor/tick" "github.com/influxdata/kapacitor/tick/ast" "github.com/influxdata/kapacitor/tick/stateful" ) @@ -18,7 +19,7 @@ func ValidateAlert(service string) error { // If a pipeline cannot be created then we know this is an invalid // service. At least with this version of kapacitor! script := fmt.Sprintf("stream|from()|alert().%s()", service) - return validateTick(script) + return validateTick(chronograf.TICKScript(script)) } func formatTick(tickscript string) (chronograf.TICKScript, error) { @@ -33,9 +34,10 @@ func formatTick(tickscript string) (chronograf.TICKScript, error) { return chronograf.TICKScript(output.String()), nil } -func validateTick(script string) error { +func validateTick(script chronograf.TICKScript) error { scope := stateful.NewScope() - _, err := pipeline.CreateTemplatePipeline(script, pipeline.StreamEdge, scope, &deadman{}) + predefinedVars := map[string]tick.Var{} + _, err := pipeline.CreatePipeline(string(script), pipeline.StreamEdge, scope, &deadman{}, predefinedVars) return err } diff --git a/kapacitor/validate_test.go b/kapacitor/validate_test.go index 8fc66cd42c..701fe67086 100644 --- a/kapacitor/validate_test.go +++ b/kapacitor/validate_test.go @@ -1,6 +1,7 @@ package kapacitor import "testing" +import "github.com/influxdata/chronograf" func TestValidateAlert(t *testing.T) { tests := []struct { @@ -27,31 +28,24 @@ func TestValidateAlert(t *testing.T) { } func Test_validateTick(t *testing.T) { - type args struct { - script string - } tests := []struct { name string - args args + script chronograf.TICKScript wantErr bool }{ { - name: "Valid Script", - args: args{ - script: "stream|from()", - }, + name: "Valid Script", + script: "stream|from()", wantErr: false, }, { - name: "Invalid Script", - args: args{ - script: "stream|nothing", - }, + name: "Invalid Script", + script: "stream|nothing", wantErr: true, }, } for _, tt := range tests { - if err := validateTick(tt.args.script); (err != nil) != tt.wantErr { + if err := validateTick(tt.script); (err != nil) != tt.wantErr { t.Errorf("%q. validateTick() error = %v, wantErr %v", tt.name, err, tt.wantErr) } } diff --git a/kapacitor/vars.go b/kapacitor/vars.go index 872dc34e99..4a94afe567 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -32,6 +32,8 @@ func Vars(rule chronograf.AlertRule) (string, error) { var groupby = %s var where_filter = %s + var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + var message = '%s' var period = %s var every = %s var metric = '%s' @@ -47,6 +49,7 @@ func Vars(rule chronograf.AlertRule) (string, error) { fld, groupBy(rule.Query), whereFilter(rule.Query), + rule.Message, rule.Period, rule.Every, metric(rule.Query), @@ -64,6 +67,8 @@ func Vars(rule chronograf.AlertRule) (string, error) { var groupby = %s var where_filter = %s + var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + var message = '%s' var period = %s var every = %s var metric = '%s' @@ -80,6 +85,7 @@ func Vars(rule chronograf.AlertRule) (string, error) { fld, groupBy(rule.Query), whereFilter(rule.Query), + rule.Message, rule.Period, rule.Every, metric(rule.Query), @@ -98,8 +104,11 @@ func Vars(rule chronograf.AlertRule) (string, error) { var groupby = %s var where_filter = %s + var id = 'kapacitor/{{ .Name }}/{{ .Group }}' + var message = '%s' var period = %s var every = %s + var metric = '%s' var threshold = %s var output_db = '%s' var output_rp = '%s' @@ -112,9 +121,11 @@ func Vars(rule chronograf.AlertRule) (string, error) { fld, groupBy(rule.Query), whereFilter(rule.Query), + rule.Message, rule.Period, rule.Every, - "0", // deadman threshold hardcoded to zero + metric(rule.Query), + "0.0", // deadman threshold hardcoded to zero Database, RP, Measurement, From 7061818605e242fc5429d5eff7141fb30e3a878a Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Wed, 2 Nov 2016 20:55:29 -0500 Subject: [PATCH 05/14] Add kapacitor as a build dependency --- Godeps | 1 + 1 file changed, 1 insertion(+) diff --git a/Godeps b/Godeps index 001f26c705..aaef709f9a 100644 --- a/Godeps +++ b/Godeps @@ -6,6 +6,7 @@ github.com/elazarl/go-bindata-assetfs 9a6736ed45b44bf3835afeebb3034b57ed329f3e github.com/gogo/protobuf 6abcf94fd4c97dcb423fdafd42fe9f96ca7e421b github.com/google/go-github 1bc362c7737e51014af7299e016444b654095ad9 github.com/google/go-querystring 9235644dd9e52eeae6fa48efd539fdc351a0af53 +github.com/influxdata/kapacitor 0eb8c348b210dd3d32cb240a417f9e6ded1b691d github.com/influxdata/usage-client 6d3895376368aa52a3a81d2a16e90f0f52371967 github.com/jessevdk/go-flags 4cc2832a6e6d1d3b815e2b9d544b2a4dfb3ce8fa github.com/satori/go.uuid b061729afc07e77a8aa4fad0a2fd840958f1942a From d2a1f4ce73508f39463f1bfaeb7d3a76f8e1d39c Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Wed, 2 Nov 2016 21:05:21 -0500 Subject: [PATCH 06/14] Update tickscript lambda function generation to be sorted --- kapacitor/tickscripts_test.go | 6 +++--- kapacitor/vars.go | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index a14c3d79f9..2fa1939207 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -126,7 +126,7 @@ var field = 'usage_user' var groupby = ['host', 'cluster_id'] -var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') +var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') var id = 'kapacitor/{{ .Name }}/{{ .Group }}' @@ -263,7 +263,7 @@ var field = 'usage_user' var groupby = ['host', 'cluster_id'] -var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') +var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') var id = 'kapacitor/{{ .Name }}/{{ .Group }}' @@ -414,7 +414,7 @@ var field = 'usage_user' var groupby = ['host', 'cluster_id'] -var where_filter = lambda: ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') AND ("cpu" == 'cpu_total') +var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') var id = 'kapacitor/{{ .Name }}/{{ .Group }}' diff --git a/kapacitor/vars.go b/kapacitor/vars.go index 4a94afe567..eecafcbaf6 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -2,6 +2,7 @@ package kapacitor import ( "fmt" + "sort" "strings" "github.com/influxdata/chronograf" @@ -175,6 +176,7 @@ func whereFilter(q chronograf.QueryConfig) string { outer = append(outer, "("+strings.Join(inner, " OR ")+")") } if len(outer) > 0 { + sort.Strings(outer) return "lambda: " + strings.Join(outer, " AND ") } From b65c3708115de57e9d85c2c99ab493899390b0f3 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 01:10:02 -0500 Subject: [PATCH 07/14] Add kapacitor task creation/deletion/updating --- kapacitor/client.go | 142 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 kapacitor/client.go diff --git a/kapacitor/client.go b/kapacitor/client.go new file mode 100644 index 0000000000..b6266ae04c --- /dev/null +++ b/kapacitor/client.go @@ -0,0 +1,142 @@ +package kapacitor + +import ( + "context" + "fmt" + + "github.com/influxdata/chronograf" + client "github.com/influxdata/kapacitor/client/v1" +) + +type Client struct { + URL string + Username string + Password string + ID chronograf.ID + Ticker chronograf.Ticker +} + +const ( + Prefix = "chronograf-v1-" + Pattern = "chronograf-v1-*" +) + +// Task represents a running kapacitor task +type Task struct { + ID string // Kapacitor ID + Href string // Kapacitor relative URI + TICKScript chronograf.TICKScript // TICKScript is the running script +} + +func (c *Client) Create(ctx context.Context, rule chronograf.AlertRule) (*Task, error) { + kapa, err := c.kapaClient(ctx) + if err != nil { + return nil, err + } + + id, err := c.ID.Generate() + if err != nil { + return nil, err + } + + script, err := c.Ticker.Generate(rule) + if err != nil { + return nil, err + } + + taskType, err := toTask(rule.Type) + if err != nil { + return nil, err + } + + kapaID := Prefix + id + task, err := kapa.CreateTask(client.CreateTaskOptions{ + ID: kapaID, + Type: taskType, + DBRPs: []client.DBRP{{Database: rule.Query.Database, RetentionPolicy: rule.Query.RetentionPolicy}}, + TICKscript: string(script), + Status: client.Enabled, + }) + if err != nil { + return nil, err + } + + return &Task{ + ID: kapaID, + Href: task.Link.Href, + TICKScript: script, + }, nil +} + +func (c *Client) Delete(ctx context.Context, href string) error { + kapa, err := c.kapaClient(ctx) + if err != nil { + return err + } + return kapa.DeleteTask(client.Link{Href: href}) +} + +func (c *Client) Update(ctx context.Context, href string, rule chronograf.AlertRule) (*Task, error) { + kapa, err := c.kapaClient(ctx) + if err != nil { + return nil, err + } + + script, err := c.Ticker.Generate(rule) + if err != nil { + return nil, err + } + + taskType, err := toTask(rule.Type) + if err != nil { + return nil, err + } + + opts := client.UpdateTaskOptions{ + TICKscript: string(script), + Status: client.Enabled, + Type: taskType, + DBRPs: []client.DBRP{ + { + Database: rule.Query.Database, + RetentionPolicy: rule.Query.RetentionPolicy, + }, + }, + } + + task, err := kapa.UpdateTask(client.Link{Href: href}, opts) + if err != nil { + return nil, err + } + + return &Task{ + ID: task.ID, + Href: task.Link.Href, + TICKScript: script, + }, nil +} + +func (c *Client) kapaClient(ctx context.Context) (*client.Client, error) { + var creds *client.Credentials + if c.Username != "" { + creds = &client.Credentials{ + Method: client.UserAuthentication, + Username: c.Username, + Password: c.Password, + } + } + + return client.New(client.Config{ + URL: c.URL, + Credentials: creds, + }) +} +func toTask(taskType string) (client.TaskType, error) { + if taskType == "stream" { + return client.StreamTask, nil + } else if taskType == "batch" { + return client.BatchTask, nil + } else { + return 0, fmt.Errorf("Unknown alert type %s", taskType) + } +} From f68491ce31686d4ae6e22708e5d0f0b1186f3ac6 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 01:42:52 -0500 Subject: [PATCH 08/14] Add basic kapacitor task routes. Still need schema and storage --- kapacitor/client.go | 4 ++ kapacitor/server.go | 61 ---------------- server/kapacitors.go | 162 ++++++++++++++++++++++++++++++++++++++++++- server/mux.go | 6 +- 4 files changed, 167 insertions(+), 66 deletions(-) delete mode 100644 kapacitor/server.go diff --git a/kapacitor/client.go b/kapacitor/client.go index b6266ae04c..b45ef12115 100644 --- a/kapacitor/client.go +++ b/kapacitor/client.go @@ -28,6 +28,10 @@ type Task struct { TICKScript chronograf.TICKScript // TICKScript is the running script } +func (c *Client) Href(ID string) string { + return fmt.Sprintf("/kapacitor/v1/tasks/%s", ID) +} + func (c *Client) Create(ctx context.Context, rule chronograf.AlertRule) (*Task, error) { kapa, err := c.kapaClient(ctx) if err != nil { diff --git a/kapacitor/server.go b/kapacitor/server.go deleted file mode 100644 index f9e9d71daf..0000000000 --- a/kapacitor/server.go +++ /dev/null @@ -1,61 +0,0 @@ -package kapacitor - -import ( - "context" - - client "github.com/influxdata/kapacitor/client/v1" -) - -type Server struct { - URL string - Username string - Password string -} - -const ( - templatePrefix = "chronograf_v1_" - templatePattern = "chronograf_v1_*" -) - -// Template plus its read-only attributes. -type Template struct { - ID string `json:"id"` - Type string `json:"type"` - TICKscript string `json:"script"` -} - -func (s *Server) Templates(ctx context.Context) ([]Template, error) { - var creds *client.Credentials - if s.Username != "" { - creds = &client.Credentials{ - Method: client.UserAuthentication, - Username: s.Username, - Password: s.Password, - } - } - - kapa, err := client.New(client.Config{ - URL: s.URL, - Credentials: creds, - }) - if err != nil { - return nil, err - } - - templates, err := kapa.ListTemplates(&client.ListTemplatesOptions{ - Pattern: templatePattern, - }) - if err != nil { - return nil, err - } - - res := []Template{} - for _, t := range templates { - res = append(res, Template{ - ID: t.ID, - Type: t.Type.String(), - TICKscript: t.TICKscript, - }) - } - return res, nil -} diff --git a/server/kapacitors.go b/server/kapacitors.go index 3de3784b1e..3fa1f425c9 100644 --- a/server/kapacitors.go +++ b/server/kapacitors.go @@ -7,7 +7,9 @@ import ( "net/url" "strconv" + "github.com/bouk/httprouter" "github.com/influxdata/chronograf" + kapa "github.com/influxdata/chronograf/kapacitor" ) type postKapacitorRequest struct { @@ -261,16 +263,170 @@ func (h *Service) UpdateKapacitor(w http.ResponseWriter, r *http.Request) { // KapacitorTasksPost proxies POST to kapacitor func (h *Service) KapacitorTasksPost(w http.ResponseWriter, r *http.Request) { + id, err := paramID("kid", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + srcID, err := paramID("id", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + ctx := r.Context() + srv, err := h.ServersStore.Get(ctx, id) + if err != nil || srv.SrcID != srcID { + notFound(w, id) + return + } + + c := kapa.Client{ + URL: srv.URL, + Username: srv.Username, + Password: srv.Password, + } + + var rule chronograf.AlertRule + task, err := c.Create(ctx, rule) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + // TODO: Set the tickscript the store + // TODO: possibly use the Href in update to the store + _ = task.TICKScript + _ = task.ID + _ = task.Href + // TODO: Add the task from the store + // TODO: Return POST response + w.WriteHeader(http.StatusNoContent) + } -// KapacitorTasksPatch proxies PATCH to kapacitor -func (h *Service) KapacitorTasksPatch(w http.ResponseWriter, r *http.Request) { +// KapacitorTasksPut proxies PATCH to kapacitor +func (h *Service) KapacitorTasksPut(w http.ResponseWriter, r *http.Request) { + id, err := paramID("kid", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + srcID, err := paramID("id", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + ctx := r.Context() + srv, err := h.ServersStore.Get(ctx, id) + if err != nil || srv.SrcID != srcID { + notFound(w, id) + return + } + + tid := httprouter.GetParamFromContext(ctx, "tid") + c := kapa.Client{ + URL: srv.URL, + Username: srv.Username, + Password: srv.Password, + Ticker: &kapa.Alert{}, + } + // TODO: Pull rule from PUT parameters + var rule chronograf.AlertRule + task, err := c.Update(ctx, c.Href(tid), rule) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + // TODO: Set the tickscript in the update to the store + // TODO: possibly use the Href in update to the store + _ = task.TICKScript + // TODO: Update the task from the store + // TODO: Return Patch response + w.WriteHeader(http.StatusNoContent) } -// KapacitorTasksGet proxies GET to kapacitor +// KapacitorTasksGet retrieves all tasks func (h *Service) KapacitorTasksGet(w http.ResponseWriter, r *http.Request) { + id, err := paramID("kid", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + srcID, err := paramID("id", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + ctx := r.Context() + srv, err := h.ServersStore.Get(ctx, id) + if err != nil || srv.SrcID != srcID { + notFound(w, id) + return + } + // TODO: GET tasks from store +} + +// KapacitorTasksGet retrieves specific task +func (h *Service) KapacitorTasksID(w http.ResponseWriter, r *http.Request) { + id, err := paramID("kid", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + srcID, err := paramID("id", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + ctx := r.Context() + srv, err := h.ServersStore.Get(ctx, id) + if err != nil || srv.SrcID != srcID { + notFound(w, id) + return + } + tid := httprouter.GetParamFromContext(ctx, "tid") + // TODO: GET task from store + _ = tid } // KapacitorTasksDelete proxies DELETE to kapacitor func (h *Service) KapacitorTasksDelete(w http.ResponseWriter, r *http.Request) { + id, err := paramID("kid", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + srcID, err := paramID("id", r) + if err != nil { + Error(w, http.StatusUnprocessableEntity, err.Error()) + return + } + + ctx := r.Context() + srv, err := h.ServersStore.Get(ctx, id) + if err != nil || srv.SrcID != srcID { + notFound(w, id) + return + } + + // TODO: Delete the task from the store + tid := httprouter.GetParamFromContext(ctx, "tid") + c := kapa.Client{ + URL: srv.URL, + Username: srv.Username, + Password: srv.Password, + } + if err := c.Delete(ctx, c.Href(tid)); err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + w.WriteHeader(http.StatusNoContent) } diff --git a/server/mux.go b/server/mux.go index 6966c6562c..77aa6e8387 100644 --- a/server/mux.go +++ b/server/mux.go @@ -71,8 +71,10 @@ func NewMux(opts MuxOpts, service Service) http.Handler { // Kapacitor Tasks router.GET("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksGet) router.POST("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksPost) - router.PATCH("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksPatch) - router.DELETE("/chronograf/v1/sources/:id/kapacitors/:kid/tasks", service.KapacitorTasksDelete) + + router.GET("/chronograf/v1/sources/:id/kapacitors/:kid/tasks/:tid", service.KapacitorTasksID) + router.PUT("/chronograf/v1/sources/:id/kapacitors/:kid/tasks/:tid", service.KapacitorTasksPut) + router.DELETE("/chronograf/v1/sources/:id/kapacitors/:kid/tasks/:tid", service.KapacitorTasksDelete) // Kapacitor Proxy router.GET("/chronograf/v1/sources/:id/kapacitors/:kid/proxy", service.KapacitorProxyGet) From 77bde5d7c5e0117a4dbeee873f6182a13da539d1 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 01:59:09 -0500 Subject: [PATCH 09/14] Add tasks routes to swagger doc and run through linter. --- server/kapacitors.go | 2 + server/swagger.json | 1014 ++++++++++++++++++++++++------------------ 2 files changed, 585 insertions(+), 431 deletions(-) diff --git a/server/kapacitors.go b/server/kapacitors.go index 3fa1f425c9..e96f88a5c2 100644 --- a/server/kapacitors.go +++ b/server/kapacitors.go @@ -38,6 +38,7 @@ func (p *postKapacitorRequest) Valid() error { type kapaLinks struct { Proxy string `json:"proxy"` // URL location of proxy endpoint for this source Self string `json:"self"` // Self link mapping to this resource + Tasks string `json:"tasks"` // Tasks link for defining task alerts for kapacitor } type kapacitor struct { @@ -104,6 +105,7 @@ func newKapacitor(srv chronograf.Server) kapacitor { Links: kapaLinks{ Self: fmt.Sprintf("%s/%d/kapacitors/%d", httpAPISrcs, srv.SrcID, srv.ID), Proxy: fmt.Sprintf("%s/%d/kapacitors/%d/proxy", httpAPISrcs, srv.SrcID, srv.ID), + Tasks: fmt.Sprintf("%s/%d/kapacitors/%d/tasks", httpAPISrcs, srv.SrcID, srv.ID), }, } } diff --git a/server/swagger.json b/server/swagger.json index 74c589bcc0..e2b1354696 100644 --- a/server/swagger.json +++ b/server/swagger.json @@ -57,16 +57,14 @@ }, "post": { "summary": "Create new data source", - "parameters": [ - { - "name": "source", - "in": "body", - "description": "Configuration options for data source", - "schema": { - "$ref": "#/definitions/Source" - } + "parameters": [{ + "name": "source", + "in": "body", + "description": "Configuration options for data source", + "schema": { + "$ref": "#/definitions/Source" } - ], + }], "responses": { "201": { "description": "Successfully create data source", @@ -92,15 +90,13 @@ }, "/sources/{id}": { "get": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the data source", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the data source", + "required": true + }], "summary": "Configured data sources", "description": "These data sources store time series data.", "responses": { @@ -126,24 +122,21 @@ }, "patch": { "summary": "Update data source configuration", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of a data source", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of a data source", + "required": true + }, { + "name": "config", + "in": "body", + "description": "data source configuration", + "schema": { + "$ref": "#/definitions/Source" }, - { - "name": "config", - "in": "body", - "description": "data source configuration", - "schema": { - "$ref": "#/definitions/Source" - }, - "required": true - } - ], + "required": true + }], "responses": { "200": { "description": "Data source's configuration was changed", @@ -166,15 +159,13 @@ } }, "delete": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }], "summary": "This specific data source will be removed from the data store", "responses": { "204": { @@ -198,24 +189,21 @@ "/sources/{id}/proxy": { "post": { "description": "Query the backend time series data source and return the response according to `format`", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the data source", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the data source", + "required": true + }, { + "name": "query", + "in": "body", + "description": "Query Parameters", + "schema": { + "$ref": "#/definitions/Proxy" }, - { - "name": "query", - "in": "body", - "description": "Query Parameters", - "schema": { - "$ref": "#/definitions/Proxy" - }, - "required": true - } - ], + "required": true + }], "responses": { "200": { "description": "Result of the query from the backend time series data source.", @@ -270,16 +258,14 @@ }, "post": { "summary": "Create new user for this data source", - "parameters": [ - { - "name": "user", - "in": "body", - "description": "Configuration options for new user", - "schema": { - "$ref": "#/definitions/User" - } + "parameters": [{ + "name": "user", + "in": "body", + "description": "Configuration options for new user", + "schema": { + "$ref": "#/definitions/User" } - ], + }], "responses": { "201": { "description": "Successfully created new user", @@ -305,15 +291,13 @@ }, "/users/{user_id}": { "get": { - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of the specific user", - "required": true - } - ], + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of the specific user", + "required": true + }], "summary": "Returns information about a specific user", "description": "Specific User.\n", "responses": { @@ -339,24 +323,21 @@ }, "patch": { "summary": "Update user configuration", - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of the specific user", - "required": true + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of the specific user", + "required": true + }, { + "name": "config", + "in": "body", + "description": "user configuration", + "schema": { + "$ref": "#/definitions/User" }, - { - "name": "config", - "in": "body", - "description": "user configuration", - "schema": { - "$ref": "#/definitions/User" - }, - "required": true - } - ], + "required": true + }], "responses": { "200": { "description": "Users's configuration was changed", @@ -379,15 +360,13 @@ } }, "delete": { - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of the specific user", - "required": true - } - ], + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of the specific user", + "required": true + }], "summary": "This specific user will be removed from the data store", "responses": { "204": { @@ -410,15 +389,13 @@ }, "/users/{user_id}/explorations": { "get": { - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "All Data Explorations returned only for this user.", - "required": true - } - ], + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "All Data Explorations returned only for this user.", + "required": true + }], "responses": { "200": { "description": "Data Explorations saved sessions for user are returned.", @@ -442,23 +419,20 @@ }, "post": { "summary": "Create new named exploration for this user", - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of user to associate this exploration with.", - "required": true - }, - { - "name": "exploration", - "in": "body", - "description": "Exploration session to save", - "schema": { - "$ref": "#/definitions/Exploration" - } + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of user to associate this exploration with.", + "required": true + }, { + "name": "exploration", + "in": "body", + "description": "Exploration session to save", + "schema": { + "$ref": "#/definitions/Exploration" } - ], + }], "responses": { "201": { "description": "Successfully created new Exploration session", @@ -490,22 +464,19 @@ }, "/users/{user_id}/explorations/{exploration_id}": { "get": { - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of user to associate this exploration with.", - "required": true - }, - { - "name": "exploration_id", - "in": "path", - "type": "string", - "description": "ID of the specific exploration.", - "required": true - } - ], + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of user to associate this exploration with.", + "required": true + }, { + "name": "exploration_id", + "in": "path", + "type": "string", + "description": "ID of the specific exploration.", + "required": true + }], "summary": "Returns the specified data exploration session", "description": "A data exploration session specifies query information.\n", "responses": { @@ -531,31 +502,27 @@ }, "patch": { "summary": "Update exploration configuration", - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of user", - "required": true - }, - { - "name": "exploration_id", - "in": "path", - "type": "string", - "description": "ID of the specific exploration.", - "required": true - }, - { - "name": "exploration", - "in": "body", - "description": "Update the exploration information to this.", - "required": true, - "schema": { - "$ref": "#/definitions/Exploration" - } + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of user", + "required": true + }, { + "name": "exploration_id", + "in": "path", + "type": "string", + "description": "ID of the specific exploration.", + "required": true + }, { + "name": "exploration", + "in": "body", + "description": "Update the exploration information to this.", + "required": true, + "schema": { + "$ref": "#/definitions/Exploration" } - ], + }], "responses": { "200": { "description": "Exploration's configuration was changed", @@ -578,22 +545,19 @@ } }, "delete": { - "parameters": [ - { - "name": "user_id", - "in": "path", - "type": "string", - "description": "ID of user to associate this exploration with.", - "required": true - }, - { - "name": "exploration_id", - "in": "path", - "type": "string", - "description": "ID of the specific exploration.", - "required": true - } - ], + "parameters": [{ + "name": "user_id", + "in": "path", + "type": "string", + "description": "ID of user to associate this exploration with.", + "required": true + }, { + "name": "exploration_id", + "in": "path", + "type": "string", + "description": "ID of the specific exploration.", + "required": true + }], "summary": "This specific exporer session will be removed.", "responses": { "204": { @@ -616,15 +580,13 @@ }, "/sources/{id}/kapacitors": { "get": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }], "summary": "Configured kapacitors", "responses": { "200": { @@ -643,23 +605,20 @@ }, "post": { "summary": "Create new kapacitor backend", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - }, - { - "name": "kapacitor", - "in": "body", - "description": "Configuration options for kapacitor", - "schema": { - "$ref": "#/definitions/Kapacitor" - } + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapacitor", + "in": "body", + "description": "Configuration options for kapacitor", + "schema": { + "$ref": "#/definitions/Kapacitor" } - ], + }], "responses": { "201": { "description": "Successfully created kapacitor source", @@ -685,22 +644,19 @@ }, "/sources/{id}/kapacitors/{kapa_id}": { "get": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of the kapacitor", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor", + "required": true + }], "summary": "Configured kapacitors", "description": "These kapacitors are used for monitoring and alerting.", "responses": { @@ -726,31 +682,27 @@ }, "patch": { "summary": "Update kapacitor configuration", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of a kapacitor backend", + "required": true + }, { + "name": "config", + "in": "body", + "description": "kapacitor configuration", + "schema": { + "$ref": "#/definitions/Kapacitor" }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of a kapacitor backend", - "required": true - }, - { - "name": "config", - "in": "body", - "description": "kapacitor configuration", - "schema": { - "$ref": "#/definitions/Kapacitor" - }, - "required": true - } - ], + "required": true + }], "responses": { "200": { "description": "Kapacitor's configuration was changed", @@ -773,22 +725,19 @@ } }, "delete": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of the kapacitor", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor", + "required": true + }], "summary": "This specific kapacitor will be removed.", "responses": { "204": { @@ -809,32 +758,256 @@ } } }, - "/sources/{id}/kapacitors/{kapa_id}/proxy": { + "/sources/{id}/kapacitors/{kapa_id}/tasks": { "get": { - "description": "GET to `path` of kapacitor. The response and status code from kapacitor is directly returned.", - "parameters": [ - { + "description": "Get all defined alert tasks.", + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }], + "responses": { + "200": { + "description": "All alert tasks for this specific kapacitor are returned", + "schema": { + "$ref": "#/definitions/Tasks" + } + }, + "404": { + "description": "Data source or Kapacitor ID does not exist.", + "schema": { + "$ref": "#/definitions/Error" + } + }, + "default": { + "description": "Internal server error", + "schema": { + "$ref": "#/definitions/Error" + } + } + } + }, + "post": { + "description": "Create kapacitor alert task", + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }, { + "name": "task", + "in": "body", + "description": "Rule to generate alert task", + "schema": { + "$ref": "#/definitions/Task" + }, + "required": true + }], + "responses": { + "201": { + "description": "Successfully created new kapacitor alert task", + "headers": { + "Location": { + "type": "string", + "format": "url", + "description": "Location of the newly created kapacitor task resource." + } + }, + "schema": { + "$ref": "#/definitions/Task" + } + }, + "404": { + "description": "Kapacitor ID does not exist.", + "schema": { + "$ref": "#/definitions/Error" + } + }, + "default": { + "description": "Internal server error", + "schema": { + "$ref": "#/definitions/Error" + } + } + } + } + }, + "/sources/{id}/kapacitors/{kapa_id}/tasks/{task_id}": { + "get": { + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor", + "required": true + }, { + "name": "task_id", + "in": "path", + "type": "string", + "description": "ID of the task", + "required": true + }], + "summary": "Specific kapacitor alert task", + "description": "Alerting task for kapacitor", + "responses": { + "200": { + "description": "Alert exists and has a specific TICKscript", + "schema": { + "$ref": "#/definitions/Task" + } + }, + "404": { + "description": "Unknown data source, kapacitor id, or task id", + "schema": { + "$ref": "#/definitions/Error" + } + }, + "default": { + "description": "Unexpected internal service error", + "schema": { + "$ref": "#/definitions/Error" + } + } + } + }, + "put": { + "summary": "Update rule alert task configuration", + "parameters": [{ "name": "id", "in": "path", "type": "string", "description": "ID of the source", "required": true - }, - { + }, { "name": "kapa_id", "in": "path", "type": "string", - "description": "ID of the kapacitor backend.", + "description": "ID of a kapacitor backend", + "required": true + }, { + "name": "task_id", + "in": "path", + "type": "string", + "description": "ID of a task", "required": true }, + { - "name": "path", - "in": "query", - "type": "string", - "description": "The kapacitor API path to use in the proxy redirect", + "name": "task", + "in": "body", + "description": "Task update", + "schema": { + "$ref": "#/definitions/Task" + }, "required": true } ], + "responses": { + "200": { + "description": "Alert configuration was changed", + "schema": { + "$ref": "#/definitions/Task" + } + }, + "404": { + "description": "Happens when trying to access a non-existent data source, kapacitor, or task.", + "schema": { + "$ref": "#/definitions/Error" + } + }, + "default": { + "description": "A processing or an unexpected error.", + "schema": { + "$ref": "#/definitions/Error" + } + } + } + }, + "delete": { + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor", + "required": true + }, { + "name": "task_id", + "in": "path", + "type": "string", + "description": "ID of the task", + "required": true + } + + ], + "summary": "This specific alert task will be removed.", + "responses": { + "204": { + "description": "Alert task has been removed." + }, + "404": { + "description": "Unknown Data source, Kapacitor id, or alert task", + "schema": { + "$ref": "#/definitions/Error" + } + }, + "default": { + "description": "Unexpected internal service error", + "schema": { + "$ref": "#/definitions/Error" + } + } + } + } + }, + "/sources/{id}/kapacitors/{kapa_id}/proxy": { + "get": { + "description": "GET to `path` of kapacitor. The response and status code from kapacitor is directly returned.", + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }, { + "name": "path", + "in": "query", + "type": "string", + "description": "The kapacitor API path to use in the proxy redirect", + "required": true + }], "responses": { "204": { "description": "Kapacitor returned no content" @@ -855,29 +1028,25 @@ }, "delete": { "description": "DELETE to `path` of kapacitor. The response and status code from kapacitor is directly returned.", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true - }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of the kapacitor backend.", - "required": true - }, - { - "name": "path", - "in": "query", - "type": "string", - "description": "The kapacitor API path to use in the proxy redirect", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }, { + "name": "path", + "in": "query", + "type": "string", + "description": "The kapacitor API path to use in the proxy redirect", + "required": true + }], "responses": { "204": { "description": "Kapacitor returned no content" @@ -898,38 +1067,33 @@ }, "patch": { "description": "PATCH body directly to configured kapacitor. The response and status code from kapacitor is directly returned.", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }, { + "name": "path", + "in": "query", + "type": "string", + "description": "The kapacitor API path to use in the proxy redirect", + "required": true + }, { + "name": "query", + "in": "body", + "description": "Kapacitor body", + "schema": { + "$ref": "#/definitions/KapacitorProxy" }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of the kapacitor backend.", - "required": true - }, - { - "name": "path", - "in": "query", - "type": "string", - "description": "The kapacitor API path to use in the proxy redirect", - "required": true - }, - { - "name": "query", - "in": "body", - "description": "Kapacitor body", - "schema": { - "$ref": "#/definitions/KapacitorProxy" - }, - "required": true - } - ], + "required": true + }], "responses": { "204": { "description": "Kapacitor returned no content" @@ -950,38 +1114,33 @@ }, "post": { "description": "POST body directly to configured kapacitor. The response and status code from kapacitor is directly returned.", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the source", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the source", + "required": true + }, { + "name": "kapa_id", + "in": "path", + "type": "string", + "description": "ID of the kapacitor backend.", + "required": true + }, { + "name": "path", + "in": "query", + "type": "string", + "description": "The kapacitor API path to use in the proxy redirect", + "required": true + }, { + "name": "query", + "in": "body", + "description": "Kapacitor body", + "schema": { + "$ref": "#/definitions/KapacitorProxy" }, - { - "name": "kapa_id", - "in": "path", - "type": "string", - "description": "ID of the kapacitor backend.", - "required": true - }, - { - "name": "path", - "in": "query", - "type": "string", - "description": "The kapacitor API path to use in the proxy redirect", - "required": true - }, - { - "name": "query", - "in": "body", - "description": "Kapacitor body", - "schema": { - "$ref": "#/definitions/KapacitorProxy" - }, - "required": true - } - ], + "required": true + }], "responses": { "204": { "description": "Kapacitor returned no content" @@ -1024,30 +1183,27 @@ "/layouts": { "get": { "summary": "Pre-configured layouts", - "parameters": [ - { - "name": "measurement", - "in": "query", - "description": "Returns layouts with this measurement", - "required": false, - "type": "array", - "items": { - "type": "string" - }, - "collectionFormat": "multi" + "parameters": [{ + "name": "measurement", + "in": "query", + "description": "Returns layouts with this measurement", + "required": false, + "type": "array", + "items": { + "type": "string" }, - { - "name": "app", - "in": "query", - "description": "Returns layouts with this app", - "required": false, - "type": "array", - "items": { - "type": "string" - }, - "collectionFormat": "multi" - } - ], + "collectionFormat": "multi" + }, { + "name": "app", + "in": "query", + "description": "Returns layouts with this app", + "required": false, + "type": "array", + "items": { + "type": "string" + }, + "collectionFormat": "multi" + }], "description": "Layouts are a collection of `Cells` that visualize time-series data.\n", "responses": { "200": { @@ -1066,16 +1222,14 @@ }, "post": { "summary": "Create new layout", - "parameters": [ - { - "name": "layout", - "in": "body", - "description": "Defines the layout and queries of the cells within the layout.", - "schema": { - "$ref": "#/definitions/Layout" - } + "parameters": [{ + "name": "layout", + "in": "body", + "description": "Defines the layout and queries of the cells within the layout.", + "schema": { + "$ref": "#/definitions/Layout" } - ], + }], "responses": { "201": { "description": "Successfully created new layout", @@ -1101,15 +1255,13 @@ }, "/layouts/{id}": { "get": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the layout", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the layout", + "required": true + }], "summary": "Specific pre-configured layout containing cells and queries.", "description": "layouts will hold information about how to layout the page of graphs.\n", "responses": { @@ -1134,15 +1286,13 @@ } }, "delete": { - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of the layout", - "required": true - } - ], + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of the layout", + "required": true + }], "summary": "This specific layout will be removed from the data store", "responses": { "204": { @@ -1164,24 +1314,21 @@ }, "put": { "summary": "Replace layout configuration.", - "parameters": [ - { - "name": "id", - "in": "path", - "type": "string", - "description": "ID of a layout", - "required": true + "parameters": [{ + "name": "id", + "in": "path", + "type": "string", + "description": "ID of a layout", + "required": true + }, { + "name": "config", + "in": "body", + "description": "layout configuration update parameters", + "schema": { + "$ref": "#/definitions/Layout" }, - { - "name": "config", - "in": "body", - "description": "layout configuration update parameters", - "schema": { - "$ref": "#/definitions/Layout" - }, - "required": true - } - ], + "required": true + }], "responses": { "200": { "description": "Layout has been replaced and the new layout is returned.", @@ -1259,7 +1406,12 @@ }, "proxy": { "type": "string", - "description": "URL location of proxy endpoint for this source", + "description": "URL location of proxy endpoint for this kapacitor", + "format": "url" + }, + "tasks": { + "type": "string", + "description": "URL location of tasks endpoint for this kapacitor", "format": "url" } } From 74713b9f0bb810023798b91c50a115751f333257 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 02:07:48 -0500 Subject: [PATCH 10/14] Add ticker and id generator to creation of new kapa alert --- server/kapacitors.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/kapacitors.go b/server/kapacitors.go index e96f88a5c2..3090595915 100644 --- a/server/kapacitors.go +++ b/server/kapacitors.go @@ -10,6 +10,7 @@ import ( "github.com/bouk/httprouter" "github.com/influxdata/chronograf" kapa "github.com/influxdata/chronograf/kapacitor" + "github.com/influxdata/chronograf/uuid" ) type postKapacitorRequest struct { @@ -288,6 +289,8 @@ func (h *Service) KapacitorTasksPost(w http.ResponseWriter, r *http.Request) { URL: srv.URL, Username: srv.Username, Password: srv.Password, + Ticker: &kapa.Alert{}, + ID: &uuid.V4{}, } var rule chronograf.AlertRule From c5d9c342c6aa8e3673597559c1109aea08797ec2 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 13:41:34 -0500 Subject: [PATCH 11/14] Update kapacitor scripts based on reviews --- kapacitor/data.go | 28 +++--- kapacitor/data_test.go | 6 +- kapacitor/influxout.go | 3 +- kapacitor/influxout_test.go | 6 +- kapacitor/templates.go | 26 ++++-- kapacitor/tickscripts.go | 2 +- kapacitor/tickscripts_test.go | 103 ++++++++++++++++------ kapacitor/triggers_test.go | 24 +++-- kapacitor/vars.go | 160 +++++++++++++++------------------- 9 files changed, 212 insertions(+), 146 deletions(-) diff --git a/kapacitor/data.go b/kapacitor/data.go index bc8c11f04c..71126634b1 100644 --- a/kapacitor/data.go +++ b/kapacitor/data.go @@ -7,9 +7,8 @@ import ( ) // Data returns the tickscript data section for querying -// TODO: Someone else needs to build the var period and var every -func Data(q chronograf.QueryConfig) (string, error) { - if q.RawText != "" { +func Data(rule chronograf.AlertRule) (string, error) { + if rule.Query.RawText != "" { batch := ` var data = batch |query(''' @@ -18,9 +17,9 @@ func Data(q chronograf.QueryConfig) (string, error) { .period(period) .every(every) .align()` - batch = fmt.Sprintf(batch, q.RawText) - if q.GroupBy.Time != "" { - batch = batch + fmt.Sprintf(".groupBy(%s)", q.GroupBy.Time) + batch = fmt.Sprintf(batch, rule.Query.RawText) + if rule.Query.GroupBy.Time != "" { + batch = batch + fmt.Sprintf(".groupBy(%s)", rule.Query.GroupBy.Time) } return batch, nil } @@ -32,16 +31,19 @@ func Data(q chronograf.QueryConfig) (string, error) { ` stream = fmt.Sprintf("%s\n.groupBy(groupby)\n", stream) - stream = stream + "|window().period(period).every(every).align()\n" - - for _, field := range q.Fields { - for _, fnc := range field.Funcs { - stream = stream + fmt.Sprintf(`|%s(field).as(metric)`, fnc) + stream = stream + ".where(where_filter)\n" + // Only need aggregate functions for threshold and relative + if rule.Type != "deadman" { + for _, field := range rule.Query.Fields { + for _, fnc := range field.Funcs { + // Only need a window if we have an aggregate function + stream = stream + "|window().period(period).every(every).align()\n" + stream = stream + fmt.Sprintf(`|%s(field).as(metric)`, fnc) + break // only support a single field + } break // only support a single field } - break // only support a single field } - stream = stream + "|where(where_filter)\n" return stream, nil } diff --git a/kapacitor/data_test.go b/kapacitor/data_test.go index 0455291477..6f281cd40e 100644 --- a/kapacitor/data_test.go +++ b/kapacitor/data_test.go @@ -43,7 +43,11 @@ func TestData(t *testing.T) { if err != nil { t.Errorf("Error unmarshaling %v", err) } - if tick, err := Data(q); err != nil { + alert := chronograf.AlertRule{ + Type: "deadman", + Query: q, + } + if tick, err := Data(alert); err != nil { t.Errorf("Error creating tick %v", err) } else { formatted, err := formatTick(tick) diff --git a/kapacitor/influxout.go b/kapacitor/influxout.go index 882c3aac45..6179f45d69 100644 --- a/kapacitor/influxout.go +++ b/kapacitor/influxout.go @@ -16,5 +16,6 @@ func InfluxOut(rule chronograf.AlertRule) string { .retentionPolicy(output_rp) .measurement(output_mt) .tag('name', '%s') - `, rule.Name) + .tag('type', '%s') + `, rule.Name, rule.Type) } diff --git a/kapacitor/influxout_test.go b/kapacitor/influxout_test.go index 84b517c107..61590d434d 100644 --- a/kapacitor/influxout_test.go +++ b/kapacitor/influxout_test.go @@ -17,11 +17,15 @@ func TestInfluxOut(t *testing.T) { .retentionPolicy(output_rp) .measurement(output_mt) .tag('name', 'name') + .tag('type', 'deadman') `, }, } for _, tt := range tests { - got := InfluxOut(chronograf.AlertRule{Name: "name"}) + got := InfluxOut(chronograf.AlertRule{ + Name: "name", + Type: "deadman", + }) formatted, err := formatTick(got) if err != nil { t.Errorf("%q. formatTick() error = %v", tt.name, err) diff --git a/kapacitor/templates.go b/kapacitor/templates.go index cda3e62041..3076ffd912 100644 --- a/kapacitor/templates.go +++ b/kapacitor/templates.go @@ -12,9 +12,14 @@ var ThresholdTrigger = ` .as('value') |alert() .stateChangesOnly() - .id(id) + .crit(lambda: "value" {{ .Operator }} crit) .message(message) - .crit(lambda: "value" {{ .Operator }} crit)` + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) +` // RelativeTrigger compares one window of data versus another. var RelativeTrigger = ` @@ -35,16 +40,25 @@ var trigger = past .as('value') |alert() .stateChangesOnly() - .id(id) + .crit(lambda: "value" {{ .Operator }} crit) .message(message) - .crit(lambda: "value" {{ .Operator }} crit)` + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) +` // DeadmanTrigger checks if any data has been streamed in the last period of time var DeadmanTrigger = ` - var trigger = data|deadman(threshold, period) + var trigger = data|deadman(threshold, every) .stateChangesOnly() - .id(id) .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) ` func execTemplate(tick string, alert interface{}) (string, error) { diff --git a/kapacitor/tickscripts.go b/kapacitor/tickscripts.go index 8aa97d062d..3437c69e71 100644 --- a/kapacitor/tickscripts.go +++ b/kapacitor/tickscripts.go @@ -17,7 +17,7 @@ func (a *Alert) Generate(rule chronograf.AlertRule) (chronograf.TICKScript, erro if err != nil { return "", nil } - data, err := Data(rule.Query) + data, err := Data(rule) if err != nil { return "", nil } diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 2fa1939207..5e5dd70a94 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -128,17 +128,25 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - -var message = 'message' - var period = 10m var every = 30s -var metric = 'metric' +var name = 'name' -var crit = 90 +var idVar = name + ':{{.Group}}' + +var message = 'message' + +var idtag = 'alertID' + +var levelfield = 'level' + +var messagefield = 'message' + +var durationfield = 'duration' + +var metric = 'metric' var output_db = 'chronograf' @@ -146,28 +154,34 @@ var output_rp = 'autogen' var output_mt = 'alerts' +var crit = 90 + var data = stream |from() .database(db) .retentionPolicy(rp) .measurement(measurement) .groupBy(groupby) + .where(where_filter) |window() .period(period) .every(every) .align() |mean(field) .as(metric) - |where(where_filter) var trigger = data |mean(metric) .as('value') |alert() .stateChangesOnly() - .id(id) - .message(message) .crit(lambda: "value" > crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) .slack() .victorOps() .email() @@ -179,6 +193,7 @@ trigger .retentionPolicy(output_rp) .measurement(output_mt) .tag('name', 'name') + .tag('type', 'stream') `, wantErr: false, }, @@ -265,39 +280,49 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - -var message = 'message' - var period = 10m var every = 30s +var name = 'name' + +var idVar = name + ':{{.Group}}' + +var message = 'message' + +var idtag = 'alertID' + +var levelfield = 'level' + +var messagefield = 'message' + +var durationfield = 'duration' + var metric = 'metric' -var shift = -1m - -var crit = 90 - var output_db = 'chronograf' var output_rp = 'autogen' var output_mt = 'alerts' +var shift = -1m + +var crit = 90 + var data = stream |from() .database(db) .retentionPolicy(rp) .measurement(measurement) .groupBy(groupby) + .where(where_filter) |window() .period(period) .every(every) .align() |mean(field) .as(metric) - |where(where_filter) var past = data |mean(metric) @@ -316,9 +341,13 @@ var trigger = past .as('value') |alert() .stateChangesOnly() - .id(id) - .message(message) .crit(lambda: "value" > crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) .slack() .victorOps() .email() @@ -330,6 +359,7 @@ trigger .retentionPolicy(output_rp) .measurement(output_mt) .tag('name', 'name') + .tag('type', 'stream') `, wantErr: false, }, @@ -416,17 +446,25 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - -var message = 'message' - var period = 10m var every = 30s -var metric = 'metric' +var name = 'name' -var threshold = 0.0 +var idVar = name + ':{{.Group}}' + +var message = 'message' + +var idtag = 'alertID' + +var levelfield = 'level' + +var messagefield = 'message' + +var durationfield = 'duration' + +var metric = 'metric' var output_db = 'chronograf' @@ -434,25 +472,31 @@ var output_rp = 'autogen' var output_mt = 'alerts' +var threshold = 0.0 + var data = stream |from() .database(db) .retentionPolicy(rp) .measurement(measurement) .groupBy(groupby) + .where(where_filter) |window() .period(period) .every(every) .align() |mean(field) .as(metric) - |where(where_filter) var trigger = data - |deadman(threshold, period) + |deadman(threshold, every) .stateChangesOnly() - .id(id) .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) .slack() .victorOps() .email() @@ -464,6 +508,7 @@ trigger .retentionPolicy(output_rp) .measurement(output_mt) .tag('name', 'name') + .tag('type', 'stream') `, wantErr: false, }, diff --git a/kapacitor/triggers_test.go b/kapacitor/triggers_test.go index 7399d880f3..e84b2fd7a7 100644 --- a/kapacitor/triggers_test.go +++ b/kapacitor/triggers_test.go @@ -21,10 +21,14 @@ func TestTrigger(t *testing.T) { Aggregate: "mean", }, want: `var trigger = data - |deadman(threshold, period) + |deadman(threshold, every) .stateChangesOnly() - .id(id) .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) `, wantErr: false, }, @@ -52,9 +56,13 @@ var trigger = past .as('value') |alert() .stateChangesOnly() - .id(id) - .message(message) .crit(lambda: "value" > crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) `, wantErr: false, }, @@ -70,9 +78,13 @@ var trigger = past .as('value') |alert() .stateChangesOnly() - .id(id) - .message(message) .crit(lambda: "value" > crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelField(levelfield) + .messageField(messagefield) + .durationField(durationfield) `, wantErr: false, }, diff --git a/kapacitor/vars.go b/kapacitor/vars.go index eecafcbaf6..4452126500 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -15,127 +15,111 @@ var ( RP = "autogen" // Measurement will be alerts so that the app knows where to get this data. Measurement = "alerts" + // IDTag is the output tag key for the ID of the alert + IDTag = "alertID" + //LevelField is the output field key for the alert level information + LevelField = "level" + // MessageField is the output field key for the message in the alert + MessageField = "message" + // DurationField is the output field key for the duration of the alert + DurationField = "duration" ) // Vars builds the top level vars for a kapacitor alert script func Vars(rule chronograf.AlertRule) (string, error) { - fld, err := field(rule.Query) + common, err := commonVars(rule) if err != nil { return "", err } + switch rule.Trigger { case "threshold": vars := ` - var db = '%s' - var rp = '%s' - var measurement = '%s' - var field = '%s' - var groupby = %s - var where_filter = %s - - var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - var message = '%s' - var period = %s - var every = %s - var metric = '%s' + %s var crit = %s - var output_db = '%s' - var output_rp = '%s' - var output_mt = '%s' - ` + ` return fmt.Sprintf(vars, - rule.Query.Database, - rule.Query.RetentionPolicy, - rule.Query.Measurement, - fld, - groupBy(rule.Query), - whereFilter(rule.Query), - rule.Message, - rule.Period, - rule.Every, - metric(rule.Query), + common, rule.Critical, - Database, - RP, - Measurement, ), nil case "relative": vars := ` - var db = '%s' - var rp = '%s' - var measurement = '%s' - var field = '%s' - var groupby = %s - var where_filter = %s - - var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - var message = '%s' - var period = %s - var every = %s - var metric = '%s' + %s var shift = -%s var crit = %s - var output_db = '%s' - var output_rp = '%s' - var output_mt = '%s' - ` + ` return fmt.Sprintf(vars, - rule.Query.Database, - rule.Query.RetentionPolicy, - rule.Query.Measurement, - fld, - groupBy(rule.Query), - whereFilter(rule.Query), - rule.Message, - rule.Period, - rule.Every, - metric(rule.Query), + common, rule.Shift, rule.Critical, - Database, - RP, - Measurement, ), nil case "deadman": vars := ` - var db = '%s' - var rp = '%s' - var measurement = '%s' - var field = '%s' - var groupby = %s - var where_filter = %s - - var id = 'kapacitor/{{ .Name }}/{{ .Group }}' - var message = '%s' - var period = %s - var every = %s - var metric = '%s' + %s var threshold = %s - var output_db = '%s' - var output_rp = '%s' - var output_mt = '%s' - ` + ` return fmt.Sprintf(vars, - rule.Query.Database, - rule.Query.RetentionPolicy, - rule.Query.Measurement, - fld, - groupBy(rule.Query), - whereFilter(rule.Query), - rule.Message, - rule.Period, - rule.Every, - metric(rule.Query), + common, "0.0", // deadman threshold hardcoded to zero - Database, - RP, - Measurement, ), nil default: return "", fmt.Errorf("Unknown trigger mechanism") } } +func commonVars(rule chronograf.AlertRule) (string, error) { + fld, err := field(rule.Query) + if err != nil { + return "", err + } + + common := ` + var db = '%s' + var rp = '%s' + var measurement = '%s' + var field = '%s' + var groupby = %s + var where_filter = %s + + var period = %s + var every = %s + + var name = '%s' + var idVar = name + ':{{.Group}}' + var message = '%s' + var idtag = '%s' + var levelfield = '%s' + var messagefield = '%s' + var durationfield = '%s' + + var metric = '%s' + + var output_db = '%s' + var output_rp = '%s' + var output_mt = '%s' + ` + return fmt.Sprintf(common, + rule.Query.Database, + rule.Query.RetentionPolicy, + rule.Query.Measurement, + fld, + groupBy(rule.Query), + whereFilter(rule.Query), + rule.Period, + rule.Every, + rule.Name, + rule.Message, + IDTag, + LevelField, + MessageField, + DurationField, + metric(rule.Query), + Database, + RP, + Measurement, + ), nil +} + func groupBy(q chronograf.QueryConfig) string { groups := []string{} for _, tag := range q.GroupBy.Tags { From 5424bc6e8c1823ebe968c4e7dbf2108dedb8e9f8 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 14:21:17 -0500 Subject: [PATCH 12/14] Update triggerType to be a tag in the output of alerts --- kapacitor/data.go | 2 +- kapacitor/influxout.go | 14 +++++--------- kapacitor/influxout_test.go | 4 ++-- kapacitor/tickscripts.go | 2 +- kapacitor/tickscripts_test.go | 30 ++++++++++++++++-------------- kapacitor/triggers.go | 2 +- kapacitor/vars.go | 13 +++++++++---- 7 files changed, 35 insertions(+), 32 deletions(-) diff --git a/kapacitor/data.go b/kapacitor/data.go index 71126634b1..a9b3e7f3d9 100644 --- a/kapacitor/data.go +++ b/kapacitor/data.go @@ -33,7 +33,7 @@ func Data(rule chronograf.AlertRule) (string, error) { stream = fmt.Sprintf("%s\n.groupBy(groupby)\n", stream) stream = stream + ".where(where_filter)\n" // Only need aggregate functions for threshold and relative - if rule.Type != "deadman" { + if rule.Trigger != "deadman" { for _, field := range rule.Query.Fields { for _, fnc := range field.Funcs { // Only need a window if we have an aggregate function diff --git a/kapacitor/influxout.go b/kapacitor/influxout.go index 6179f45d69..37b2ee3c7a 100644 --- a/kapacitor/influxout.go +++ b/kapacitor/influxout.go @@ -1,21 +1,17 @@ package kapacitor -import ( - "fmt" - - "github.com/influxdata/chronograf" -) +import "github.com/influxdata/chronograf" // InfluxOut creates a kapacitor influxDBOut node to write alert data to Database, RP, Measurement. func InfluxOut(rule chronograf.AlertRule) string { - return fmt.Sprintf(` + return ` trigger |influxDBOut() .create() .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', '%s') - .tag('type', '%s') - `, rule.Name, rule.Type) + .tag('name', name) + .tag('triggerType', triggerType) + ` } diff --git a/kapacitor/influxout_test.go b/kapacitor/influxout_test.go index 61590d434d..588ed0cbbf 100644 --- a/kapacitor/influxout_test.go +++ b/kapacitor/influxout_test.go @@ -16,8 +16,8 @@ func TestInfluxOut(t *testing.T) { .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', 'name') - .tag('type', 'deadman') + .tag('name', name) + .tag('triggerType', triggerType) `, }, } diff --git a/kapacitor/tickscripts.go b/kapacitor/tickscripts.go index 3437c69e71..b4aa5492ed 100644 --- a/kapacitor/tickscripts.go +++ b/kapacitor/tickscripts.go @@ -36,7 +36,7 @@ func (a *Alert) Generate(rule chronograf.AlertRule) (chronograf.TICKScript, erro return "", err } if err := validateTick(tick); err != nil { - return "", err + return tick, err } return tick, nil } diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 5e5dd70a94..511de9fca4 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -1,6 +1,7 @@ package kapacitor import ( + "fmt" "testing" "github.com/influxdata/chronograf" @@ -53,8 +54,9 @@ func TestGenerate(t *testing.T) { }, } gen := Alert{} - _, err := gen.Generate(alert) + tick, err := gen.Generate(alert) if err != nil { + fmt.Printf("%s", tick) t.Errorf("Error generating alert: %v", err) } } @@ -154,6 +156,8 @@ var output_rp = 'autogen' var output_mt = 'alerts' +var triggerType = 'threshold' + var crit = 90 var data = stream @@ -192,8 +196,8 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', 'name') - .tag('type', 'stream') + .tag('name', name) + .tag('triggerType', triggerType) `, wantErr: false, }, @@ -306,6 +310,8 @@ var output_rp = 'autogen' var output_mt = 'alerts' +var triggerType = 'relative' + var shift = -1m var crit = 90 @@ -358,8 +364,8 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', 'name') - .tag('type', 'stream') + .tag('name', name) + .tag('triggerType', triggerType) `, wantErr: false, }, @@ -464,7 +470,7 @@ var messagefield = 'message' var durationfield = 'duration' -var metric = 'metric' +var metric = 'usage_user' var output_db = 'chronograf' @@ -472,6 +478,8 @@ var output_rp = 'autogen' var output_mt = 'alerts' +var triggerType = 'deadman' + var threshold = 0.0 var data = stream @@ -481,12 +489,6 @@ var data = stream .measurement(measurement) .groupBy(groupby) .where(where_filter) - |window() - .period(period) - .every(every) - .align() - |mean(field) - .as(metric) var trigger = data |deadman(threshold, every) @@ -507,8 +509,8 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', 'name') - .tag('type', 'stream') + .tag('name', name) + .tag('triggerType', triggerType) `, wantErr: false, }, diff --git a/kapacitor/triggers.go b/kapacitor/triggers.go index c64d2f70c4..7115904aee 100644 --- a/kapacitor/triggers.go +++ b/kapacitor/triggers.go @@ -13,6 +13,6 @@ func Trigger(rule chronograf.AlertRule) (string, error) { case "threshold": return execTemplate(ThresholdTrigger, rule) default: - return "", fmt.Errorf("Unknown trigger type: %s", rule.Type) + return "", fmt.Errorf("Unknown trigger type: %s", rule.Trigger) } } diff --git a/kapacitor/vars.go b/kapacitor/vars.go index 4452126500..ecb2d8386b 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -97,6 +97,7 @@ func commonVars(rule chronograf.AlertRule) (string, error) { var output_db = '%s' var output_rp = '%s' var output_mt = '%s' + var triggerType = '%s' ` return fmt.Sprintf(common, rule.Query.Database, @@ -113,10 +114,11 @@ func commonVars(rule chronograf.AlertRule) (string, error) { LevelField, MessageField, DurationField, - metric(rule.Query), + metric(rule), Database, RP, Measurement, + rule.Trigger, ), nil } @@ -136,9 +138,12 @@ func field(q chronograf.QueryConfig) (string, error) { } // metric will be metric unless there are no field aggregates. If no aggregates, then it is the field name. -func metric(q chronograf.QueryConfig) string { - for _, field := range q.Fields { - if field.Field != "" && len(field.Funcs) == 0 { +func metric(rule chronograf.AlertRule) string { + for _, field := range rule.Query.Fields { + // Deadman triggers do not need any aggregate functions + if field.Field != "" && rule.Trigger == "deadman" { + return field.Field + } else if field.Field != "" && len(field.Funcs) == 0 { return field.Field } } From cd98eb2433f45c3bbf9820060d327bc63a362cb4 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 17:27:58 -0500 Subject: [PATCH 13/14] Update kapacitor generator to match with frontend UI changes. --- chronograf.go | 90 ++++++++++++-------- kapacitor/alerts.go | 2 +- kapacitor/alerts_test.go | 8 +- kapacitor/client.go | 35 ++++---- kapacitor/data.go | 12 ++- kapacitor/data_test.go | 7 +- kapacitor/influxout.go | 2 +- kapacitor/influxout_test.go | 6 +- kapacitor/templates.go | 77 ----------------- kapacitor/tickscripts_test.go | 151 +++++++++++++++++----------------- kapacitor/triggers.go | 82 +++++++++++++++++- kapacitor/triggers_test.go | 38 ++++----- kapacitor/vars.go | 32 +++---- 13 files changed, 283 insertions(+), 259 deletions(-) diff --git a/chronograf.go b/chronograf.go index ca872f601a..649bcb022a 100644 --- a/chronograf.go +++ b/chronograf.go @@ -87,41 +87,16 @@ type SourcesStore interface { Update(context.Context, Source) error } -// QueryConfig represents UI query from the data explorer -type QueryConfig struct { - ID string `json:"id,omitempty"` - Database string `json:"database"` - Measurement string `json:"measurement"` - RetentionPolicy string `json:"retentionPolicy"` - Fields []struct { - Field string `json:"field"` - Funcs []string `json:"funcs"` - } `json:"fields"` - Tags map[string][]string `json:"tags"` - GroupBy struct { - Time string `json:"time"` - Tags []string `json:"tags"` - } `json:"groupBy"` - AreTagsAccepted bool `json:"areTagsAccepted"` - RawText string `json:"rawText,omitempty"` -} - // AlertRule represents rules for building a tickscript alerting task type AlertRule struct { - ID string `json:"id,omitempty"` // ID is the unique ID of the alert - Name string `json:"name"` // Name is the user-defined name for the alert - Version string `json:"version"` // Version of the alert - Query QueryConfig `json:"query"` // Query is the filter of data for the alert. - Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert - AlertServices []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty) - Type string `json:"type"` // Type specifies kind of AlertRule (stream, batch) - Operator string `json:"operator"` // Operator for alert comparison - Aggregate string `json:"aggregate"` // Statistic aggregate over window of data - Period string `json:"period"` // Period is the window to search for alerting criteria - Every string `json:"every"` // Every how often to check for the alerting criteria - Critical string `json:"critical"` // Critical is the boundary value when alert goes critical - Shift string `json:"shift"` // Shift is the amount of time to look into the past for the alert to compare to the present - Message string `json:"message"` // Message included with alert + ID string `json:"id,omitempty"` // ID is the unique ID of the alert + Query QueryConfig `json:"query"` // Query is the filter of data for the alert. + Every string `json:"every"` // Every how often to check for the alerting criteria + Alerts []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty) + Message string `json:"message"` // Message included with alert + Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert + TriggerValues TriggerValues `json:"values"` // Defines the values that cause the alert to trigger + Name string `json:"name"` // Name is the user-defined name for the alert } // AlertRulesStore stores rules for building tickscript alerting tasks @@ -147,6 +122,55 @@ type Ticker interface { Generate(AlertRule) (TICKScript, error) } +// DeadmanValue specifies the timeout duration of a deadman alert. +type DeadmanValue struct { + Period string `json:"period, omitempty"` // Period is the max time data can be missed before an alert +} + +// RelativeValue specifies the trigger logic for a relative value change alert. +type RelativeValue struct { + Change string `json:"change,omitempty"` // Change specifies if the change is a percent or absolute + Period string `json:"period,omitempty"` // Period is the window to search for alerting criteria + Shift string `json:"shift,omitempty"` // Shift is the amount of time to look into the past for the alert to compare to the present + Operator string `json:"operator,omitempty"` // Operator for alert comparison + Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical +} + +// ThresholdValue specifies the trigger logic for a threshold change alert. +type ThresholdValue struct { + Period string `json:"period,omitempty"` // Period is the window to search for the alerting criteria + Operator string `json:"operator,omitempty"` // Operator for alert comparison + Percentile string `json:"percentile,omitempty"` // Percentile is defined only when Relation is not "Once" + Relation string `json:"relation,omitempty"` // Relation defines the logic about how often the threshold is met to be an alert. + Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical +} + +// TriggerValues specifies which of the trigger types defines the alerting logic. One of these whould not be nil. +type TriggerValues struct { + Deadman *DeadmanValue `json:"deadman,omitempty"` + Relative *RelativeValue `json:"relative,omitempty"` + Threshold *ThresholdValue `json:"threshold,omitempty"` +} + +// QueryConfig represents UI query from the data explorer +type QueryConfig struct { + ID string `json:"id,omitempty"` + Database string `json:"database"` + Measurement string `json:"measurement"` + RetentionPolicy string `json:"retentionPolicy"` + Fields []struct { + Field string `json:"field"` + Funcs []string `json:"funcs"` + } `json:"fields"` + Tags map[string][]string `json:"tags"` + GroupBy struct { + Time string `json:"time"` + Tags []string `json:"tags"` + } `json:"groupBy"` + AreTagsAccepted bool `json:"areTagsAccepted"` + RawText string `json:"rawText,omitempty"` +} + // Server represents a proxy connection to an HTTP server type Server struct { ID int // ID is the unique ID of the server diff --git a/kapacitor/alerts.go b/kapacitor/alerts.go index 0236543014..f84f54c864 100644 --- a/kapacitor/alerts.go +++ b/kapacitor/alerts.go @@ -9,7 +9,7 @@ import ( // AlertServices generates alert chaining methods to be attached to an alert from all rule Services func AlertServices(rule chronograf.AlertRule) (string, error) { alert := "" - for _, service := range rule.AlertServices { + for _, service := range rule.Alerts { if err := ValidateAlert(service); err != nil { return "", err } diff --git a/kapacitor/alerts_test.go b/kapacitor/alerts_test.go index 1672cf7e08..651027af67 100644 --- a/kapacitor/alerts_test.go +++ b/kapacitor/alerts_test.go @@ -16,7 +16,7 @@ func TestAlertServices(t *testing.T) { { name: "Test several valid services", rule: chronograf.AlertRule{ - AlertServices: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorOps", "email"}, }, want: `alert() .slack() @@ -27,7 +27,7 @@ func TestAlertServices(t *testing.T) { { name: "Test single invalid services amongst several valid", rule: chronograf.AlertRule{ - AlertServices: []string{"slack", "invalid", "email"}, + Alerts: []string{"slack", "invalid", "email"}, }, want: ``, wantErr: true, @@ -35,7 +35,7 @@ func TestAlertServices(t *testing.T) { { name: "Test single invalid service", rule: chronograf.AlertRule{ - AlertServices: []string{"invalid"}, + Alerts: []string{"invalid"}, }, want: ``, wantErr: true, @@ -43,7 +43,7 @@ func TestAlertServices(t *testing.T) { { name: "Test single valid service", rule: chronograf.AlertRule{ - AlertServices: []string{"slack"}, + Alerts: []string{"slack"}, }, want: `alert() .slack() diff --git a/kapacitor/client.go b/kapacitor/client.go index b45ef12115..012a2571bf 100644 --- a/kapacitor/client.go +++ b/kapacitor/client.go @@ -8,6 +8,7 @@ import ( client "github.com/influxdata/kapacitor/client/v1" ) +// Client communicates to kapacitor type Client struct { URL string Username string @@ -17,8 +18,8 @@ type Client struct { } const ( - Prefix = "chronograf-v1-" - Pattern = "chronograf-v1-*" + // Prefix is prepended to the ID of all alerts + Prefix = "chronograf-v1-" ) // Task represents a running kapacitor task @@ -28,10 +29,12 @@ type Task struct { TICKScript chronograf.TICKScript // TICKScript is the running script } +// Href returns the link to a kapacitor task given an id func (c *Client) Href(ID string) string { return fmt.Sprintf("/kapacitor/v1/tasks/%s", ID) } +// Create builds and POSTs a tickscript to kapacitor func (c *Client) Create(ctx context.Context, rule chronograf.AlertRule) (*Task, error) { kapa, err := c.kapaClient(ctx) if err != nil { @@ -48,15 +51,10 @@ func (c *Client) Create(ctx context.Context, rule chronograf.AlertRule) (*Task, return nil, err } - taskType, err := toTask(rule.Type) - if err != nil { - return nil, err - } - kapaID := Prefix + id task, err := kapa.CreateTask(client.CreateTaskOptions{ ID: kapaID, - Type: taskType, + Type: toTask(rule.Query), DBRPs: []client.DBRP{{Database: rule.Query.Database, RetentionPolicy: rule.Query.RetentionPolicy}}, TICKscript: string(script), Status: client.Enabled, @@ -72,6 +70,7 @@ func (c *Client) Create(ctx context.Context, rule chronograf.AlertRule) (*Task, }, nil } +// Delete removes tickscript task from kapacitor func (c *Client) Delete(ctx context.Context, href string) error { kapa, err := c.kapaClient(ctx) if err != nil { @@ -80,6 +79,7 @@ func (c *Client) Delete(ctx context.Context, href string) error { return kapa.DeleteTask(client.Link{Href: href}) } +// Update changes the tickscript of a given id. func (c *Client) Update(ctx context.Context, href string, rule chronograf.AlertRule) (*Task, error) { kapa, err := c.kapaClient(ctx) if err != nil { @@ -91,15 +91,10 @@ func (c *Client) Update(ctx context.Context, href string, rule chronograf.AlertR return nil, err } - taskType, err := toTask(rule.Type) - if err != nil { - return nil, err - } - opts := client.UpdateTaskOptions{ TICKscript: string(script), Status: client.Enabled, - Type: taskType, + Type: toTask(rule.Query), DBRPs: []client.DBRP{ { Database: rule.Query.Database, @@ -135,12 +130,10 @@ func (c *Client) kapaClient(ctx context.Context) (*client.Client, error) { Credentials: creds, }) } -func toTask(taskType string) (client.TaskType, error) { - if taskType == "stream" { - return client.StreamTask, nil - } else if taskType == "batch" { - return client.BatchTask, nil - } else { - return 0, fmt.Errorf("Unknown alert type %s", taskType) + +func toTask(q chronograf.QueryConfig) client.TaskType { + if q.RawText == "" { + return client.StreamTask } + return client.BatchTask } diff --git a/kapacitor/data.go b/kapacitor/data.go index a9b3e7f3d9..607d471c98 100644 --- a/kapacitor/data.go +++ b/kapacitor/data.go @@ -33,17 +33,21 @@ func Data(rule chronograf.AlertRule) (string, error) { stream = fmt.Sprintf("%s\n.groupBy(groupby)\n", stream) stream = stream + ".where(where_filter)\n" // Only need aggregate functions for threshold and relative + + value := "" if rule.Trigger != "deadman" { for _, field := range rule.Query.Fields { for _, fnc := range field.Funcs { // Only need a window if we have an aggregate function - stream = stream + "|window().period(period).every(every).align()\n" - stream = stream + fmt.Sprintf(`|%s(field).as(metric)`, fnc) + value = value + "|window().period(period).every(every).align()\n" + value = value + fmt.Sprintf(`|%s(field).as(value)`, fnc) break // only support a single field } break // only support a single field } } - - return stream, nil + if value == "" { + value = `|eval(lambda: field).as(value)` + } + return stream + value, nil } diff --git a/kapacitor/data_test.go b/kapacitor/data_test.go index 6f281cd40e..6abbe9548f 100644 --- a/kapacitor/data_test.go +++ b/kapacitor/data_test.go @@ -44,18 +44,17 @@ func TestData(t *testing.T) { t.Errorf("Error unmarshaling %v", err) } alert := chronograf.AlertRule{ - Type: "deadman", - Query: q, + Trigger: "deadman", + Query: q, } if tick, err := Data(alert); err != nil { t.Errorf("Error creating tick %v", err) } else { - formatted, err := formatTick(tick) + _, err := formatTick(tick) if err != nil { fmt.Printf(tick) t.Errorf("Error formatting tick %v", err) } - fmt.Printf("%s", formatted) } } diff --git a/kapacitor/influxout.go b/kapacitor/influxout.go index 37b2ee3c7a..12954f6a14 100644 --- a/kapacitor/influxout.go +++ b/kapacitor/influxout.go @@ -11,7 +11,7 @@ func InfluxOut(rule chronograf.AlertRule) string { .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', name) + .tag('alertName', name) .tag('triggerType', triggerType) ` } diff --git a/kapacitor/influxout_test.go b/kapacitor/influxout_test.go index 588ed0cbbf..f3868618e3 100644 --- a/kapacitor/influxout_test.go +++ b/kapacitor/influxout_test.go @@ -16,15 +16,15 @@ func TestInfluxOut(t *testing.T) { .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', name) + .tag('alertName', name) .tag('triggerType', triggerType) `, }, } for _, tt := range tests { got := InfluxOut(chronograf.AlertRule{ - Name: "name", - Type: "deadman", + Name: "name", + Trigger: "deadman", }) formatted, err := formatTick(got) if err != nil { diff --git a/kapacitor/templates.go b/kapacitor/templates.go index 3076ffd912..ff18178fe6 100644 --- a/kapacitor/templates.go +++ b/kapacitor/templates.go @@ -1,78 +1 @@ package kapacitor - -import ( - "bytes" - "log" - "text/template" -) - -// ThresholdTrigger is the trickscript trigger for alerts that exceed a value -var ThresholdTrigger = ` - var trigger = data|{{ .Aggregate }}(metric) - .as('value') - |alert() - .stateChangesOnly() - .crit(lambda: "value" {{ .Operator }} crit) - .message(message) - .id(idVar) - .idTag(idtag) - .levelField(levelfield) - .messageField(messagefield) - .durationField(durationfield) -` - -// RelativeTrigger compares one window of data versus another. -var RelativeTrigger = ` -var past = data - |{{ .Aggregate }}(metric) - .as('stat') - |shift(shift) - -var current = data - |{{ .Aggregate }}(metric) - .as('stat') - -var trigger = past - |join(current) - .as('past', 'current') - |eval(lambda: abs(float("current.stat" - "past.stat"))/float("past.stat")) - .keep() - .as('value') - |alert() - .stateChangesOnly() - .crit(lambda: "value" {{ .Operator }} crit) - .message(message) - .id(idVar) - .idTag(idtag) - .levelField(levelfield) - .messageField(messagefield) - .durationField(durationfield) -` - -// DeadmanTrigger checks if any data has been streamed in the last period of time -var DeadmanTrigger = ` - var trigger = data|deadman(threshold, every) - .stateChangesOnly() - .message(message) - .id(idVar) - .idTag(idtag) - .levelField(levelfield) - .messageField(messagefield) - .durationField(durationfield) -` - -func execTemplate(tick string, alert interface{}) (string, error) { - p := template.New("template") - t, err := p.Parse(tick) - if err != nil { - log.Fatalf("template parse: %s", err) - return "", err - } - buf := new(bytes.Buffer) - err = t.Execute(buf, alert) - if err != nil { - log.Fatalf("template execution: %s", err) - return "", err - } - return buf.String(), nil -} diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 511de9fca4..960e16161b 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -9,17 +9,19 @@ import ( func TestGenerate(t *testing.T) { alert := chronograf.AlertRule{ - Name: "name", - Version: "1.0", - Trigger: "relative", - AlertServices: []string{"slack", "victorOps", "email"}, - Type: "stream", - Operator: ">", - Aggregate: "mean", - Period: "10m", - Every: "30s", - Critical: "90", - Shift: "1m", + Name: "name", + Trigger: "relative", + Alerts: []string{"slack", "victorOps", "email"}, + TriggerValues: chronograf.TriggerValues{ + Relative: &chronograf.RelativeValue{ + Change: "change", + Period: "10m", + Shift: "1m", + Operator: "greater than", + Value: "90", + }, + }, + Every: "30s", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -63,18 +65,20 @@ func TestGenerate(t *testing.T) { func TestThreshold(t *testing.T) { alert := chronograf.AlertRule{ - Name: "name", - Version: "1.0", - Trigger: "threshold", - AlertServices: []string{"slack", "victorOps", "email"}, - Type: "stream", - Operator: ">", - Aggregate: "mean", - Period: "10m", - Every: "30s", - Critical: "90", - Shift: "1m", - Message: "message", + Name: "name", + Trigger: "threshold", + Alerts: []string{"slack", "victorOps", "email"}, + TriggerValues: chronograf.TriggerValues{ + Threshold: &chronograf.ThresholdValue{ + Relation: "once", + Period: "10m", + Percentile: "", // TODO: if relation is not once then this will have a number + Operator: "greater than", + Value: "90", + }, + }, + Every: "30s", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -130,8 +134,6 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var period = 10m - var every = 30s var name = 'name' @@ -142,13 +144,13 @@ var message = 'message' var idtag = 'alertID' -var levelfield = 'level' +var leveltag = 'level' var messagefield = 'message' var durationfield = 'duration' -var metric = 'metric' +var value = 'value' var output_db = 'chronograf' @@ -158,6 +160,8 @@ var output_mt = 'alerts' var triggerType = 'threshold' +var period = 10m + var crit = 90 var data = stream @@ -172,18 +176,16 @@ var data = stream .every(every) .align() |mean(field) - .as(metric) + .as(value) var trigger = data - |mean(metric) - .as('value') |alert() .stateChangesOnly() .crit(lambda: "value" > crit) .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) .slack() @@ -196,7 +198,7 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', name) + .tag('alertName', name) .tag('triggerType', triggerType) `, wantErr: false, @@ -210,6 +212,7 @@ trigger continue } if got != tt.want { + fmt.Printf("%s", got) t.Errorf("%q. Threshold() = %v, want %v", tt.name, got, tt.want) } } @@ -217,18 +220,20 @@ trigger func TestRelative(t *testing.T) { alert := chronograf.AlertRule{ - Name: "name", - Version: "1.0", - Trigger: "relative", - AlertServices: []string{"slack", "victorOps", "email"}, - Type: "stream", - Operator: ">", - Aggregate: "mean", - Period: "10m", - Every: "30s", - Critical: "90", - Shift: "1m", - Message: "message", + Name: "name", + Trigger: "relative", + Alerts: []string{"slack", "victorOps", "email"}, + TriggerValues: chronograf.TriggerValues{ + Relative: &chronograf.RelativeValue{ + Change: "change", + Period: "10m", + Shift: "1m", + Operator: "greater than", + Value: "90", + }, + }, + Every: "30s", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -284,8 +289,6 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var period = 10m - var every = 30s var name = 'name' @@ -296,13 +299,13 @@ var message = 'message' var idtag = 'alertID' -var levelfield = 'level' +var leveltag = 'level' var messagefield = 'message' var durationfield = 'duration' -var metric = 'metric' +var value = 'value' var output_db = 'chronograf' @@ -312,6 +315,8 @@ var output_mt = 'alerts' var triggerType = 'relative' +var period = 10m + var shift = -1m var crit = 90 @@ -328,21 +333,17 @@ var data = stream .every(every) .align() |mean(field) - .as(metric) + .as(value) var past = data - |mean(metric) - .as('stat') |shift(shift) var current = data - |mean(metric) - .as('stat') var trigger = past |join(current) .as('past', 'current') - |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) + |eval(lambda: abs(float("current.value" - "past.value")) / float("past.value")) .keep() .as('value') |alert() @@ -351,7 +352,7 @@ var trigger = past .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) .slack() @@ -364,7 +365,7 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', name) + .tag('alertName', name) .tag('triggerType', triggerType) `, wantErr: false, @@ -378,6 +379,7 @@ trigger continue } if got != tt.want { + fmt.Printf("%s", got) t.Errorf("%q. Relative() = %v, want %v", tt.name, got, tt.want) } } @@ -385,18 +387,16 @@ trigger func TestDeadman(t *testing.T) { alert := chronograf.AlertRule{ - Name: "name", - Version: "1.0", - Trigger: "deadman", - AlertServices: []string{"slack", "victorOps", "email"}, - Type: "stream", - Operator: ">", - Aggregate: "mean", - Period: "10m", - Every: "30s", - Critical: "90", - Shift: "1m", - Message: "message", + Name: "name", + Trigger: "deadman", + Alerts: []string{"slack", "victorOps", "email"}, + TriggerValues: chronograf.TriggerValues{ + Deadman: &chronograf.DeadmanValue{ + Period: "10m", + }, + }, + Every: "30s", + Message: "message", Query: chronograf.QueryConfig{ Database: "telegraf", Measurement: "cpu", @@ -452,8 +452,6 @@ var groupby = ['host', 'cluster_id'] var where_filter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') -var period = 10m - var every = 30s var name = 'name' @@ -464,13 +462,13 @@ var message = 'message' var idtag = 'alertID' -var levelfield = 'level' +var leveltag = 'level' var messagefield = 'message' var durationfield = 'duration' -var metric = 'usage_user' +var value = 'value' var output_db = 'chronograf' @@ -482,6 +480,8 @@ var triggerType = 'deadman' var threshold = 0.0 +var period = 10m + var data = stream |from() .database(db) @@ -489,6 +489,8 @@ var data = stream .measurement(measurement) .groupBy(groupby) .where(where_filter) + |eval(lambda: field) + .as(value) var trigger = data |deadman(threshold, every) @@ -496,7 +498,7 @@ var trigger = data .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) .slack() @@ -509,7 +511,7 @@ trigger .database(output_db) .retentionPolicy(output_rp) .measurement(output_mt) - .tag('name', name) + .tag('alertName', name) .tag('triggerType', triggerType) `, wantErr: false, @@ -523,6 +525,7 @@ trigger continue } if got != tt.want { + fmt.Printf("%s", got) t.Errorf("%q. Deadman() = %v, want %v", tt.name, got, tt.want) } } diff --git a/kapacitor/triggers.go b/kapacitor/triggers.go index 7115904aee..d0f2926646 100644 --- a/kapacitor/triggers.go +++ b/kapacitor/triggers.go @@ -3,16 +3,94 @@ package kapacitor import "github.com/influxdata/chronograf" import "fmt" +// ThresholdTrigger is the trickscript trigger for alerts that exceed a value +var ThresholdTrigger = ` + var trigger = data + |alert() + .stateChangesOnly() + .crit(lambda: "value" %s crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelTag(leveltag) + .messageField(messagefield) + .durationField(durationfield) +` + +// RelativeTrigger compares one window of data versus another. +var RelativeTrigger = ` +var past = data + |shift(shift) + +var current = data + +var trigger = past + |join(current) + .as('past', 'current') + |eval(lambda: abs(float("current.value" - "past.value"))/float("past.value")) + .keep() + .as('value') + |alert() + .stateChangesOnly() + .crit(lambda: "value" %s crit) + .message(message) + .id(idVar) + .idTag(idtag) + .levelTag(leveltag) + .messageField(messagefield) + .durationField(durationfield) +` + +// DeadmanTrigger checks if any data has been streamed in the last period of time +var DeadmanTrigger = ` + var trigger = data|deadman(threshold, every) + .stateChangesOnly() + .message(message) + .id(idVar) + .idTag(idtag) + .levelTag(leveltag) + .messageField(messagefield) + .durationField(durationfield) +` + // Trigger returns the trigger mechanism for a tickscript func Trigger(rule chronograf.AlertRule) (string, error) { switch rule.Trigger { case "deadman": return DeadmanTrigger, nil case "relative": - return execTemplate(RelativeTrigger, rule) + op, err := kapaOperator(rule.TriggerValues.Relative.Operator) + if err != nil { + return "", err + } + return fmt.Sprintf(RelativeTrigger, op), nil case "threshold": - return execTemplate(ThresholdTrigger, rule) + op, err := kapaOperator(rule.TriggerValues.Threshold.Operator) + if err != nil { + return "", err + } + return fmt.Sprintf(ThresholdTrigger, op), nil default: return "", fmt.Errorf("Unknown trigger type: %s", rule.Trigger) } } + +// kapaOperator converts UI strings to kapacitor operators +func kapaOperator(operator string) (string, error) { + switch operator { + case "greater than": + return ">", nil + case "less than": + return "<", nil + case "equal to or less than": + return "<=", nil + case "equal to or greater than": + return ">=", nil + case "equal": + return "==", nil + case "not equal": + return "!=", nil + default: + return "", fmt.Errorf("invalid operator: %s is unknown", operator) + } +} diff --git a/kapacitor/triggers_test.go b/kapacitor/triggers_test.go index e84b2fd7a7..db1733514a 100644 --- a/kapacitor/triggers_test.go +++ b/kapacitor/triggers_test.go @@ -16,9 +16,7 @@ func TestTrigger(t *testing.T) { { name: "Test Deadman", rule: chronograf.AlertRule{ - Trigger: "deadman", - Operator: ">", - Aggregate: "mean", + Trigger: "deadman", }, want: `var trigger = data |deadman(threshold, every) @@ -26,7 +24,7 @@ func TestTrigger(t *testing.T) { .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) `, @@ -35,23 +33,22 @@ func TestTrigger(t *testing.T) { { name: "Test Relative", rule: chronograf.AlertRule{ - Trigger: "relative", - Operator: ">", - Aggregate: "mean", + Trigger: "relative", + TriggerValues: chronograf.TriggerValues{ + Relative: &chronograf.RelativeValue{ + Operator: "greater than", + }, + }, }, want: `var past = data - |mean(metric) - .as('stat') |shift(shift) var current = data - |mean(metric) - .as('stat') var trigger = past |join(current) .as('past', 'current') - |eval(lambda: abs(float("current.stat" - "past.stat")) / float("past.stat")) + |eval(lambda: abs(float("current.value" - "past.value")) / float("past.value")) .keep() .as('value') |alert() @@ -60,7 +57,7 @@ var trigger = past .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) `, @@ -69,20 +66,21 @@ var trigger = past { name: "Test Threshold", rule: chronograf.AlertRule{ - Trigger: "threshold", - Operator: ">", - Aggregate: "median", + Trigger: "threshold", + TriggerValues: chronograf.TriggerValues{ + Threshold: &chronograf.ThresholdValue{ + Operator: "greater than", + }, + }, }, want: `var trigger = data - |median(metric) - .as('value') |alert() .stateChangesOnly() .crit(lambda: "value" > crit) .message(message) .id(idVar) .idTag(idtag) - .levelField(levelfield) + .levelTag(leveltag) .messageField(messagefield) .durationField(durationfield) `, @@ -91,7 +89,7 @@ var trigger = past { name: "Test Invalid", rule: chronograf.AlertRule{ - Type: "invalid", + Trigger: "invalid", }, want: ``, wantErr: true, diff --git a/kapacitor/vars.go b/kapacitor/vars.go index ecb2d8386b..b9d2f7dd2c 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -17,8 +17,8 @@ var ( Measurement = "alerts" // IDTag is the output tag key for the ID of the alert IDTag = "alertID" - //LevelField is the output field key for the alert level information - LevelField = "level" + //LevelTag is the output tag key for the alert level information + LevelTag = "level" // MessageField is the output field key for the message in the alert MessageField = "message" // DurationField is the output field key for the duration of the alert @@ -36,31 +36,36 @@ func Vars(rule chronograf.AlertRule) (string, error) { case "threshold": vars := ` %s + var period = %s var crit = %s ` return fmt.Sprintf(vars, common, - rule.Critical, - ), nil + rule.TriggerValues.Threshold.Period, + rule.TriggerValues.Threshold.Value), nil case "relative": vars := ` %s + var period = %s var shift = -%s var crit = %s ` return fmt.Sprintf(vars, common, - rule.Shift, - rule.Critical, + rule.TriggerValues.Relative.Period, + rule.TriggerValues.Relative.Shift, + rule.TriggerValues.Relative.Value, ), nil case "deadman": vars := ` %s var threshold = %s + var period = %s ` return fmt.Sprintf(vars, common, "0.0", // deadman threshold hardcoded to zero + rule.TriggerValues.Deadman.Period, ), nil default: return "", fmt.Errorf("Unknown trigger mechanism") @@ -81,18 +86,17 @@ func commonVars(rule chronograf.AlertRule) (string, error) { var groupby = %s var where_filter = %s - var period = %s var every = %s var name = '%s' var idVar = name + ':{{.Group}}' var message = '%s' var idtag = '%s' - var levelfield = '%s' + var leveltag = '%s' var messagefield = '%s' var durationfield = '%s' - var metric = '%s' + var value = 'value' var output_db = '%s' var output_rp = '%s' @@ -106,15 +110,13 @@ func commonVars(rule chronograf.AlertRule) (string, error) { fld, groupBy(rule.Query), whereFilter(rule.Query), - rule.Period, rule.Every, rule.Name, rule.Message, IDTag, - LevelField, + LevelTag, MessageField, DurationField, - metric(rule), Database, RP, Measurement, @@ -137,8 +139,8 @@ func field(q chronograf.QueryConfig) (string, error) { return "", fmt.Errorf("No fields set in query") } -// metric will be metric unless there are no field aggregates. If no aggregates, then it is the field name. -func metric(rule chronograf.AlertRule) string { +// value will be "value"" unless there are no field aggregates. If no aggregates, then it is the field name. +func value(rule chronograf.AlertRule) string { for _, field := range rule.Query.Fields { // Deadman triggers do not need any aggregate functions if field.Field != "" && rule.Trigger == "deadman" { @@ -147,7 +149,7 @@ func metric(rule chronograf.AlertRule) string { return field.Field } } - return "metric" + return "value" } func whereFilter(q chronograf.QueryConfig) string { From a95c9983004db5b06b81bd41eecb6309a3436f46 Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Thu, 3 Nov 2016 19:44:28 -0500 Subject: [PATCH 14/14] Update to store alert information in boltdb --- bolt/alerts.go | 115 +++++++++++++++++++++++++ bolt/client.go | 15 ++-- bolt/internal/internal.go | 27 ++++++ bolt/internal/internal.pb.go | 127 +++++++++++++++------------ bolt/internal/internal.proto | 5 ++ chronograf.go | 32 ++----- kapacitor/alerts.go | 27 +++++- kapacitor/alerts_test.go | 2 +- kapacitor/tickscripts_test.go | 51 +++++------ kapacitor/triggers.go | 4 +- kapacitor/triggers_test.go | 8 +- kapacitor/vars.go | 12 +-- server/kapacitors.go | 158 +++++++++++++++++++++++++++++----- server/server.go | 1 + server/service.go | 1 + 15 files changed, 427 insertions(+), 158 deletions(-) create mode 100644 bolt/alerts.go diff --git a/bolt/alerts.go b/bolt/alerts.go new file mode 100644 index 0000000000..d24abb6b03 --- /dev/null +++ b/bolt/alerts.go @@ -0,0 +1,115 @@ +package bolt + +import ( + "context" + + "github.com/boltdb/bolt" + "github.com/influxdata/chronograf" + "github.com/influxdata/chronograf/bolt/internal" +) + +// Ensure AlertsStore implements chronograf.AlertsStore. +var _ chronograf.AlertRulesStore = &AlertsStore{} + +var AlertsBucket = []byte("Alerts") + +type AlertsStore struct { + client *Client +} + +// All returns all known alerts +func (s *AlertsStore) All(ctx context.Context) ([]chronograf.AlertRule, error) { + var srcs []chronograf.AlertRule + if err := s.client.db.View(func(tx *bolt.Tx) error { + if err := tx.Bucket(AlertsBucket).ForEach(func(k, v []byte) error { + var src chronograf.AlertRule + if err := internal.UnmarshalAlertRule(v, &src); err != nil { + return err + } + srcs = append(srcs, src) + return nil + }); err != nil { + return err + } + return nil + }); err != nil { + return nil, err + } + + return srcs, nil + +} + +// Add creates a new Alerts in the AlertsStore. +func (s *AlertsStore) Add(ctx context.Context, src chronograf.AlertRule) (chronograf.AlertRule, error) { + if err := s.client.db.Update(func(tx *bolt.Tx) error { + b := tx.Bucket(AlertsBucket) + if v, err := internal.MarshalAlertRule(&src); err != nil { + return err + } else if err := b.Put([]byte(src.ID), v); err != nil { + return err + } + return nil + }); err != nil { + return chronograf.AlertRule{}, err + } + + return src, nil +} + +// Delete removes the Alerts from the AlertsStore +func (s *AlertsStore) Delete(ctx context.Context, src chronograf.AlertRule) error { + _, err := s.Get(ctx, src.ID) + if err != nil { + return err + } + if err := s.client.db.Update(func(tx *bolt.Tx) error { + if err := tx.Bucket(AlertsBucket).Delete([]byte(src.ID)); err != nil { + return err + } + return nil + }); err != nil { + return err + } + + return nil +} + +// Get returns a Alerts if the id exists. +func (s *AlertsStore) Get(ctx context.Context, id string) (chronograf.AlertRule, error) { + var src chronograf.AlertRule + if err := s.client.db.View(func(tx *bolt.Tx) error { + if v := tx.Bucket(AlertsBucket).Get([]byte(id)); v == nil { + return chronograf.ErrAlertNotFound + } else if err := internal.UnmarshalAlertRule(v, &src); err != nil { + return err + } + return nil + }); err != nil { + return chronograf.AlertRule{}, err + } + + return src, nil +} + +// Update a Alerts +func (s *AlertsStore) Update(ctx context.Context, src chronograf.AlertRule) error { + if err := s.client.db.Update(func(tx *bolt.Tx) error { + // Get an existing alerts with the same ID. + b := tx.Bucket(AlertsBucket) + if v := b.Get([]byte(src.ID)); v == nil { + return chronograf.ErrAlertNotFound + } + + if v, err := internal.MarshalAlertRule(&src); err != nil { + return err + } else if err := b.Put([]byte(src.ID), v); err != nil { + return err + } + return nil + }); err != nil { + return err + } + + return nil +} diff --git a/bolt/client.go b/bolt/client.go index 542d82905e..93f32d9002 100644 --- a/bolt/client.go +++ b/bolt/client.go @@ -19,6 +19,7 @@ type Client struct { SourcesStore *SourcesStore ServersStore *ServersStore LayoutStore *LayoutStore + AlertsStore *AlertsStore } func NewClient() *Client { @@ -26,6 +27,7 @@ func NewClient() *Client { c.ExplorationStore = &ExplorationStore{client: c} c.SourcesStore = &SourcesStore{client: c} c.ServersStore = &ServersStore{client: c} + c.AlertsStore = &AlertsStore{client: c} c.LayoutStore = &LayoutStore{ client: c, IDs: &uuid.V4{}, @@ -59,20 +61,15 @@ func (c *Client) Open() error { if _, err := tx.CreateBucketIfNotExists(LayoutBucket); err != nil { return err } - + // Always create Alerts bucket. + if _, err := tx.CreateBucketIfNotExists(AlertsBucket); err != nil { + return err + } return nil }); err != nil { return err } - // TODO: Ask @gunnar about these - /* - c.ExplorationStore = &ExplorationStore{client: c} - c.SourcesStore = &SourcesStore{client: c} - c.ServersStore = &ServersStore{client: c} - c.LayoutStore = &LayoutStore{client: c} - */ - return nil } diff --git a/bolt/internal/internal.go b/bolt/internal/internal.go index c48e373929..599214daa3 100644 --- a/bolt/internal/internal.go +++ b/bolt/internal/internal.go @@ -1,6 +1,7 @@ package internal import ( + "encoding/json" "time" "github.com/gogo/protobuf/proto" @@ -161,3 +162,29 @@ func UnmarshalLayout(data []byte, l *chronograf.Layout) error { l.Cells = cells return nil } + +// MarshalAlertRule encodes an alert rule to binary protobuf format. +func MarshalAlertRule(r *chronograf.AlertRule) ([]byte, error) { + j, err := json.Marshal(r) + if err != nil { + return nil, err + } + return proto.Marshal(&AlertRule{ + ID: r.ID, + JSON: string(j), + }) +} + +// UnmarshalAlertRule decodes an alert rule from binary protobuf data. +func UnmarshalAlertRule(data []byte, r *chronograf.AlertRule) error { + var pb AlertRule + if err := proto.Unmarshal(data, &pb); err != nil { + return err + } + + err := json.Unmarshal([]byte(pb.JSON), r) + if err != nil { + return err + } + return nil +} diff --git a/bolt/internal/internal.pb.go b/bolt/internal/internal.pb.go index a71075cc4f..ea37458e13 100644 --- a/bolt/internal/internal.pb.go +++ b/bolt/internal/internal.pb.go @@ -15,6 +15,7 @@ It has these top-level messages: Layout Cell Query + AlertRule */ package internal @@ -34,13 +35,13 @@ var _ = math.Inf const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package type Exploration struct { - ID int64 `protobuf:"varint,1,opt,name=ID,proto3" json:"ID,omitempty"` - Name string `protobuf:"bytes,2,opt,name=Name,proto3" json:"Name,omitempty"` - UserID int64 `protobuf:"varint,3,opt,name=UserID,proto3" json:"UserID,omitempty"` - Data string `protobuf:"bytes,4,opt,name=Data,proto3" json:"Data,omitempty"` - CreatedAt int64 `protobuf:"varint,5,opt,name=CreatedAt,proto3" json:"CreatedAt,omitempty"` - UpdatedAt int64 `protobuf:"varint,6,opt,name=UpdatedAt,proto3" json:"UpdatedAt,omitempty"` - Default bool `protobuf:"varint,7,opt,name=Default,proto3" json:"Default,omitempty"` + ID int64 `protobuf:"varint,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"` + Name string `protobuf:"bytes,2,opt,name=Name,json=name,proto3" json:"Name,omitempty"` + UserID int64 `protobuf:"varint,3,opt,name=UserID,json=userID,proto3" json:"UserID,omitempty"` + Data string `protobuf:"bytes,4,opt,name=Data,json=data,proto3" json:"Data,omitempty"` + CreatedAt int64 `protobuf:"varint,5,opt,name=CreatedAt,json=createdAt,proto3" json:"CreatedAt,omitempty"` + UpdatedAt int64 `protobuf:"varint,6,opt,name=UpdatedAt,json=updatedAt,proto3" json:"UpdatedAt,omitempty"` + Default bool `protobuf:"varint,7,opt,name=Default,json=default,proto3" json:"Default,omitempty"` } func (m *Exploration) Reset() { *m = Exploration{} } @@ -49,13 +50,13 @@ func (*Exploration) ProtoMessage() {} func (*Exploration) Descriptor() ([]byte, []int) { return fileDescriptorInternal, []int{0} } type Source struct { - ID int64 `protobuf:"varint,1,opt,name=ID,proto3" json:"ID,omitempty"` - Name string `protobuf:"bytes,2,opt,name=Name,proto3" json:"Name,omitempty"` - Type string `protobuf:"bytes,3,opt,name=Type,proto3" json:"Type,omitempty"` - Username string `protobuf:"bytes,4,opt,name=Username,proto3" json:"Username,omitempty"` - Password string `protobuf:"bytes,5,opt,name=Password,proto3" json:"Password,omitempty"` - URL string `protobuf:"bytes,6,opt,name=URL,proto3" json:"URL,omitempty"` - Default bool `protobuf:"varint,7,opt,name=Default,proto3" json:"Default,omitempty"` + ID int64 `protobuf:"varint,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"` + Name string `protobuf:"bytes,2,opt,name=Name,json=name,proto3" json:"Name,omitempty"` + Type string `protobuf:"bytes,3,opt,name=Type,json=type,proto3" json:"Type,omitempty"` + Username string `protobuf:"bytes,4,opt,name=Username,json=username,proto3" json:"Username,omitempty"` + Password string `protobuf:"bytes,5,opt,name=Password,json=password,proto3" json:"Password,omitempty"` + URL string `protobuf:"bytes,6,opt,name=URL,json=uRL,proto3" json:"URL,omitempty"` + Default bool `protobuf:"varint,7,opt,name=Default,json=default,proto3" json:"Default,omitempty"` } func (m *Source) Reset() { *m = Source{} } @@ -64,12 +65,12 @@ func (*Source) ProtoMessage() {} func (*Source) Descriptor() ([]byte, []int) { return fileDescriptorInternal, []int{1} } type Server struct { - ID int64 `protobuf:"varint,1,opt,name=ID,proto3" json:"ID,omitempty"` - Name string `protobuf:"bytes,2,opt,name=Name,proto3" json:"Name,omitempty"` - Username string `protobuf:"bytes,3,opt,name=Username,proto3" json:"Username,omitempty"` - Password string `protobuf:"bytes,4,opt,name=Password,proto3" json:"Password,omitempty"` - URL string `protobuf:"bytes,5,opt,name=URL,proto3" json:"URL,omitempty"` - SrcID int64 `protobuf:"varint,6,opt,name=SrcID,proto3" json:"SrcID,omitempty"` + ID int64 `protobuf:"varint,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"` + Name string `protobuf:"bytes,2,opt,name=Name,json=name,proto3" json:"Name,omitempty"` + Username string `protobuf:"bytes,3,opt,name=Username,json=username,proto3" json:"Username,omitempty"` + Password string `protobuf:"bytes,4,opt,name=Password,json=password,proto3" json:"Password,omitempty"` + URL string `protobuf:"bytes,5,opt,name=URL,json=uRL,proto3" json:"URL,omitempty"` + SrcID int64 `protobuf:"varint,6,opt,name=SrcID,json=srcID,proto3" json:"SrcID,omitempty"` } func (m *Server) Reset() { *m = Server{} } @@ -78,10 +79,10 @@ func (*Server) ProtoMessage() {} func (*Server) Descriptor() ([]byte, []int) { return fileDescriptorInternal, []int{2} } type Layout struct { - ID string `protobuf:"bytes,1,opt,name=ID,proto3" json:"ID,omitempty"` - Application string `protobuf:"bytes,2,opt,name=Application,proto3" json:"Application,omitempty"` - Measurement string `protobuf:"bytes,3,opt,name=Measurement,proto3" json:"Measurement,omitempty"` - Cells []*Cell `protobuf:"bytes,4,rep,name=Cells" json:"Cells,omitempty"` + ID string `protobuf:"bytes,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"` + Application string `protobuf:"bytes,2,opt,name=Application,json=application,proto3" json:"Application,omitempty"` + Measurement string `protobuf:"bytes,3,opt,name=Measurement,json=measurement,proto3" json:"Measurement,omitempty"` + Cells []*Cell `protobuf:"bytes,4,rep,name=Cells,json=cells" json:"Cells,omitempty"` } func (m *Layout) Reset() { *m = Layout{} } @@ -119,9 +120,9 @@ func (m *Cell) GetQueries() []*Query { } type Query struct { - Command string `protobuf:"bytes,1,opt,name=Command,proto3" json:"Command,omitempty"` - DB string `protobuf:"bytes,2,opt,name=DB,proto3" json:"DB,omitempty"` - RP string `protobuf:"bytes,3,opt,name=RP,proto3" json:"RP,omitempty"` + Command string `protobuf:"bytes,1,opt,name=Command,json=command,proto3" json:"Command,omitempty"` + DB string `protobuf:"bytes,2,opt,name=DB,json=dB,proto3" json:"DB,omitempty"` + RP string `protobuf:"bytes,3,opt,name=RP,json=rP,proto3" json:"RP,omitempty"` } func (m *Query) Reset() { *m = Query{} } @@ -129,6 +130,16 @@ func (m *Query) String() string { return proto.CompactTextString(m) } func (*Query) ProtoMessage() {} func (*Query) Descriptor() ([]byte, []int) { return fileDescriptorInternal, []int{5} } +type AlertRule struct { + ID string `protobuf:"bytes,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"` + JSON string `protobuf:"bytes,2,opt,name=JSON,json=jSON,proto3" json:"JSON,omitempty"` +} + +func (m *AlertRule) Reset() { *m = AlertRule{} } +func (m *AlertRule) String() string { return proto.CompactTextString(m) } +func (*AlertRule) ProtoMessage() {} +func (*AlertRule) Descriptor() ([]byte, []int) { return fileDescriptorInternal, []int{6} } + func init() { proto.RegisterType((*Exploration)(nil), "internal.Exploration") proto.RegisterType((*Source)(nil), "internal.Source") @@ -136,38 +147,42 @@ func init() { proto.RegisterType((*Layout)(nil), "internal.Layout") proto.RegisterType((*Cell)(nil), "internal.Cell") proto.RegisterType((*Query)(nil), "internal.Query") + proto.RegisterType((*AlertRule)(nil), "internal.AlertRule") } func init() { proto.RegisterFile("internal.proto", fileDescriptorInternal) } var fileDescriptorInternal = []byte{ - // 442 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x8c, 0x93, 0xcf, 0x8e, 0xd3, 0x30, - 0x10, 0xc6, 0xe5, 0x24, 0x4e, 0x9b, 0x29, 0x2a, 0xc8, 0x42, 0xc8, 0x42, 0x1c, 0xa2, 0x88, 0x43, - 0xb9, 0xec, 0x01, 0x9e, 0xa0, 0xdb, 0x70, 0xa8, 0xb4, 0xa0, 0xe2, 0xa5, 0x0f, 0x60, 0x5a, 0xa3, - 0x8d, 0x94, 0x26, 0xc1, 0x71, 0x68, 0x73, 0xe5, 0x0a, 0x8f, 0xc1, 0x1b, 0xf0, 0x82, 0x68, 0x26, - 0xee, 0x9f, 0xc3, 0x6a, 0xd5, 0xdb, 0x7c, 0x33, 0x5f, 0x34, 0x3f, 0x7f, 0x76, 0x60, 0x5a, 0x54, - 0xce, 0xd8, 0x4a, 0x97, 0x37, 0x8d, 0xad, 0x5d, 0x2d, 0xc6, 0x47, 0x9d, 0xfd, 0x63, 0x30, 0xf9, - 0x78, 0x68, 0xca, 0xda, 0x6a, 0x57, 0xd4, 0x95, 0x98, 0x42, 0xb0, 0xcc, 0x25, 0x4b, 0xd9, 0x2c, - 0x54, 0xc1, 0x32, 0x17, 0x02, 0xa2, 0xcf, 0x7a, 0x67, 0x64, 0x90, 0xb2, 0x59, 0xa2, 0xa8, 0x16, - 0xaf, 0x20, 0x5e, 0xb7, 0xc6, 0x2e, 0x73, 0x19, 0x92, 0xcf, 0x2b, 0xf4, 0xe6, 0xda, 0x69, 0x19, - 0x0d, 0x5e, 0xac, 0xc5, 0x1b, 0x48, 0x16, 0xd6, 0x68, 0x67, 0xb6, 0x73, 0x27, 0x39, 0xd9, 0xcf, - 0x0d, 0x9c, 0xae, 0x9b, 0xad, 0x9f, 0xc6, 0xc3, 0xf4, 0xd4, 0x10, 0x12, 0x46, 0xb9, 0xf9, 0xae, - 0xbb, 0xd2, 0xc9, 0x51, 0xca, 0x66, 0x63, 0x75, 0x94, 0xd9, 0x5f, 0x06, 0xf1, 0x7d, 0xdd, 0xd9, - 0x8d, 0xb9, 0x0a, 0x58, 0x40, 0xf4, 0xb5, 0x6f, 0x0c, 0xe1, 0x26, 0x8a, 0x6a, 0xf1, 0x1a, 0xc6, - 0x88, 0x5d, 0xa1, 0x77, 0x00, 0x3e, 0x69, 0x9c, 0xad, 0x74, 0xdb, 0xee, 0x6b, 0xbb, 0x25, 0xe6, - 0x44, 0x9d, 0xb4, 0x78, 0x01, 0xe1, 0x5a, 0xdd, 0x11, 0x6c, 0xa2, 0xb0, 0x7c, 0x02, 0xf3, 0x0f, - 0x62, 0x1a, 0xfb, 0xd3, 0xd8, 0xab, 0x30, 0x2f, 0x91, 0xc2, 0x27, 0x90, 0xa2, 0xc7, 0x91, 0xf8, - 0x19, 0xe9, 0x25, 0xf0, 0x7b, 0xbb, 0x59, 0xe6, 0x3e, 0xd3, 0x41, 0x64, 0xbf, 0x18, 0xc4, 0x77, - 0xba, 0xaf, 0x3b, 0x77, 0x81, 0x93, 0x10, 0x4e, 0x0a, 0x93, 0x79, 0xd3, 0x94, 0xc5, 0x86, 0x5e, - 0x81, 0xa7, 0xba, 0x6c, 0xa1, 0xe3, 0x93, 0xd1, 0x6d, 0x67, 0xcd, 0xce, 0x54, 0xce, 0xf3, 0x5d, - 0xb6, 0xc4, 0x5b, 0xe0, 0x0b, 0x53, 0x96, 0xad, 0x8c, 0xd2, 0x70, 0x36, 0x79, 0x3f, 0xbd, 0x39, - 0x3d, 0x3a, 0x6c, 0xab, 0x61, 0x98, 0xfd, 0x66, 0x10, 0x61, 0x25, 0x9e, 0x01, 0x3b, 0x10, 0x01, - 0x57, 0xec, 0x80, 0xaa, 0xa7, 0xb5, 0x5c, 0xb1, 0x1e, 0xd5, 0x9e, 0x56, 0x70, 0xc5, 0xf6, 0xa8, - 0x1e, 0xe8, 0xd0, 0x5c, 0xb1, 0x07, 0xf1, 0x0e, 0x46, 0x3f, 0x3a, 0x63, 0x0b, 0xd3, 0x4a, 0x4e, - 0x8b, 0x9e, 0x9f, 0x17, 0x7d, 0xe9, 0x8c, 0xed, 0xd5, 0x71, 0x8e, 0x1f, 0x16, 0xfe, 0xa6, 0x58, - 0x81, 0x91, 0x53, 0xb4, 0xa3, 0x21, 0x72, 0xac, 0xb3, 0x39, 0x70, 0xfa, 0x06, 0x2f, 0x71, 0x51, - 0xef, 0x76, 0xba, 0xda, 0xfa, 0x54, 0x8e, 0x12, 0xa3, 0xca, 0x6f, 0x7d, 0x22, 0x41, 0x7e, 0x8b, - 0x5a, 0xad, 0xfc, 0xf9, 0x03, 0xb5, 0xfa, 0x16, 0xd3, 0x2f, 0xf5, 0xe1, 0x7f, 0x00, 0x00, 0x00, - 0xff, 0xff, 0x85, 0xa7, 0xa7, 0xb1, 0x64, 0x03, 0x00, 0x00, + // 486 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x8c, 0x93, 0xcf, 0x8e, 0xd3, 0x3c, + 0x14, 0xc5, 0xe5, 0x26, 0x4e, 0x9a, 0xdb, 0x4f, 0xfd, 0x90, 0x85, 0x50, 0x84, 0x58, 0x54, 0x11, + 0x8b, 0xb2, 0x19, 0x24, 0x78, 0x82, 0x4e, 0xc3, 0xa2, 0xa8, 0x74, 0x8a, 0x4b, 0x1f, 0xc0, 0x24, + 0x17, 0x4d, 0x50, 0xfe, 0xe1, 0xd8, 0xb4, 0xd9, 0xb2, 0x85, 0xc7, 0xe0, 0x0d, 0x78, 0x41, 0x64, + 0xd7, 0x21, 0x23, 0x81, 0x46, 0xb3, 0x3c, 0xf7, 0xdc, 0xe8, 0xfe, 0xee, 0xb9, 0x0e, 0xcc, 0x8b, + 0x5a, 0xa1, 0xac, 0x45, 0x79, 0xd5, 0xca, 0x46, 0x35, 0x6c, 0x3a, 0xe8, 0xe4, 0x17, 0x81, 0xd9, + 0x9b, 0x73, 0x5b, 0x36, 0x52, 0xa8, 0xa2, 0xa9, 0xd9, 0x1c, 0x26, 0x9b, 0x34, 0x26, 0x0b, 0xb2, + 0xf4, 0xf8, 0xa4, 0x48, 0x19, 0x03, 0x7f, 0x27, 0x2a, 0x8c, 0x27, 0x0b, 0xb2, 0x8c, 0xb8, 0x5f, + 0x8b, 0x0a, 0xd9, 0x13, 0x08, 0x8e, 0x1d, 0xca, 0x4d, 0x1a, 0x7b, 0xb6, 0x2f, 0xd0, 0x56, 0x99, + 0xde, 0x54, 0x28, 0x11, 0xfb, 0x97, 0xde, 0x5c, 0x28, 0xc1, 0x9e, 0x41, 0xb4, 0x96, 0x28, 0x14, + 0xe6, 0x2b, 0x15, 0x53, 0xdb, 0x1e, 0x65, 0x43, 0xc1, 0xb8, 0xc7, 0x36, 0x77, 0x6e, 0x70, 0x71, + 0xf5, 0x50, 0x60, 0x31, 0x84, 0x29, 0x7e, 0x12, 0xba, 0x54, 0x71, 0xb8, 0x20, 0xcb, 0x29, 0x0f, + 0xf3, 0x8b, 0x4c, 0x7e, 0x12, 0x08, 0x0e, 0x8d, 0x96, 0x19, 0x3e, 0x08, 0x98, 0x81, 0xff, 0xa1, + 0x6f, 0xd1, 0xe2, 0x46, 0xdc, 0x57, 0x7d, 0x8b, 0xec, 0x29, 0x4c, 0xcd, 0x12, 0xc6, 0x77, 0xc0, + 0x53, 0xed, 0xb4, 0xf1, 0xf6, 0xa2, 0xeb, 0x4e, 0x8d, 0xcc, 0x2d, 0x73, 0xc4, 0xa7, 0xad, 0xd3, + 0xec, 0x11, 0x78, 0x47, 0xbe, 0xb5, 0xb0, 0x11, 0xf7, 0x34, 0xdf, 0xde, 0x83, 0xf9, 0xc3, 0x60, + 0xa2, 0xfc, 0x8a, 0xf2, 0x41, 0x98, 0x77, 0x91, 0xbc, 0x7b, 0x90, 0xfc, 0x7f, 0x23, 0xd1, 0x11, + 0xe9, 0x31, 0xd0, 0x83, 0xcc, 0x36, 0xa9, 0xcb, 0x94, 0x76, 0x46, 0x24, 0xdf, 0x08, 0x04, 0x5b, + 0xd1, 0x37, 0x5a, 0xdd, 0xc1, 0x89, 0x2c, 0xce, 0x02, 0x66, 0xab, 0xb6, 0x2d, 0x8b, 0xcc, 0xbe, + 0x02, 0x47, 0x35, 0x13, 0x63, 0xc9, 0x74, 0xbc, 0x43, 0xd1, 0x69, 0x89, 0x15, 0xd6, 0xca, 0xf1, + 0xcd, 0xaa, 0xb1, 0xc4, 0x9e, 0x03, 0x5d, 0x63, 0x59, 0x76, 0xb1, 0xbf, 0xf0, 0x96, 0xb3, 0x57, + 0xf3, 0xab, 0x3f, 0x8f, 0xce, 0x94, 0x39, 0xcd, 0x8c, 0x99, 0x7c, 0x27, 0xe0, 0x1b, 0xcd, 0xfe, + 0x03, 0x72, 0xb6, 0x04, 0x94, 0x93, 0xb3, 0x51, 0xbd, 0x1d, 0x4b, 0x39, 0xe9, 0x8d, 0x3a, 0xd9, + 0x11, 0x94, 0x93, 0x93, 0x51, 0xb7, 0x76, 0x69, 0xca, 0xc9, 0x2d, 0x7b, 0x01, 0xe1, 0x17, 0x8d, + 0xb2, 0xc0, 0x2e, 0xa6, 0x76, 0xd0, 0xff, 0xe3, 0xa0, 0xf7, 0x1a, 0x65, 0xcf, 0x07, 0xdf, 0x7c, + 0x58, 0xb8, 0x4b, 0x91, 0xc2, 0x44, 0x6e, 0xa3, 0x0d, 0xc7, 0xc8, 0x93, 0x15, 0x50, 0xfb, 0x8d, + 0x39, 0xe2, 0xba, 0xa9, 0x2a, 0x51, 0xe7, 0x2e, 0x95, 0x30, 0xbb, 0x48, 0x13, 0x55, 0x7a, 0xed, + 0x12, 0x99, 0xe4, 0xd7, 0x46, 0xf3, 0xbd, 0xdb, 0x7f, 0x22, 0xf7, 0xc9, 0x4b, 0x88, 0x56, 0x25, + 0x4a, 0xc5, 0x75, 0x89, 0x7f, 0xe5, 0xca, 0xc0, 0x7f, 0x7b, 0xb8, 0xd9, 0x0d, 0x67, 0xfe, 0x7c, + 0xb8, 0xd9, 0x7d, 0x0c, 0xec, 0x3f, 0xf8, 0xfa, 0x77, 0x00, 0x00, 0x00, 0xff, 0xff, 0xe0, 0xb6, + 0x3a, 0xf1, 0x95, 0x03, 0x00, 0x00, } diff --git a/bolt/internal/internal.proto b/bolt/internal/internal.proto index b482fe86cd..221b140c28 100644 --- a/bolt/internal/internal.proto +++ b/bolt/internal/internal.proto @@ -52,3 +52,8 @@ message Query { string DB = 2; // DB the database for the query (optional) string RP = 3; // RP is a retention policy and optional; } + +message AlertRule { + string ID = 1; // ID is the unique ID of this alert rule + string JSON = 2; // JSON byte representation of the alert +} diff --git a/chronograf.go b/chronograf.go index 649bcb022a..ff2dc4311a 100644 --- a/chronograf.go +++ b/chronograf.go @@ -13,6 +13,7 @@ const ( ErrSourceNotFound = Error("source not found") ErrServerNotFound = Error("server not found") ErrLayoutNotFound = Error("layout not found") + ErrAlertNotFound = Error("alert not found") ErrAuthentication = Error("user not authenticated") ) @@ -122,34 +123,15 @@ type Ticker interface { Generate(AlertRule) (TICKScript, error) } -// DeadmanValue specifies the timeout duration of a deadman alert. -type DeadmanValue struct { - Period string `json:"period, omitempty"` // Period is the max time data can be missed before an alert -} - -// RelativeValue specifies the trigger logic for a relative value change alert. -type RelativeValue struct { - Change string `json:"change,omitempty"` // Change specifies if the change is a percent or absolute - Period string `json:"period,omitempty"` // Period is the window to search for alerting criteria - Shift string `json:"shift,omitempty"` // Shift is the amount of time to look into the past for the alert to compare to the present - Operator string `json:"operator,omitempty"` // Operator for alert comparison - Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical -} - -// ThresholdValue specifies the trigger logic for a threshold change alert. -type ThresholdValue struct { - Period string `json:"period,omitempty"` // Period is the window to search for the alerting criteria +// TriggerValues specifies the alerting logic for a specific trigger type +type TriggerValues struct { + Change string `json:"change,omitempty"` // Change specifies if the change is a percent or absolute + Period string `json:"period,omitempty"` // Period is the window to search for alerting criteria + Shift string `json:"shift,omitempty"` // Shift is the amount of time to look into the past for the alert to compare to the present Operator string `json:"operator,omitempty"` // Operator for alert comparison + Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical Percentile string `json:"percentile,omitempty"` // Percentile is defined only when Relation is not "Once" Relation string `json:"relation,omitempty"` // Relation defines the logic about how often the threshold is met to be an alert. - Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical -} - -// TriggerValues specifies which of the trigger types defines the alerting logic. One of these whould not be nil. -type TriggerValues struct { - Deadman *DeadmanValue `json:"deadman,omitempty"` - Relative *RelativeValue `json:"relative,omitempty"` - Threshold *ThresholdValue `json:"threshold,omitempty"` } // QueryConfig represents UI query from the data explorer diff --git a/kapacitor/alerts.go b/kapacitor/alerts.go index f84f54c864..861ea598e4 100644 --- a/kapacitor/alerts.go +++ b/kapacitor/alerts.go @@ -6,14 +6,37 @@ import ( "github.com/influxdata/chronograf" ) +func kapaService(alert string) (string, error) { + switch alert { + case "hipchat": + return "hipChat", nil + case "opsgenie": + return "opsGenie", nil + case "pagerduty": + return "pagerDuty", nil + case "victorops": + return "victorOps", nil + case "smtp": + return "email", nil + case "sensu", "slack", "email", "talk", "telegram": + return alert, nil + default: + return "", fmt.Errorf("Unsupport alert %s", alert) + } +} + // AlertServices generates alert chaining methods to be attached to an alert from all rule Services func AlertServices(rule chronograf.AlertRule) (string, error) { alert := "" for _, service := range rule.Alerts { - if err := ValidateAlert(service); err != nil { + srv, err := kapaService(service) + if err != nil { return "", err } - alert = alert + fmt.Sprintf(".%s()", service) + if err := ValidateAlert(srv); err != nil { + return "", err + } + alert = alert + fmt.Sprintf(".%s()", srv) } return alert, nil } diff --git a/kapacitor/alerts_test.go b/kapacitor/alerts_test.go index 651027af67..5dbb33b77e 100644 --- a/kapacitor/alerts_test.go +++ b/kapacitor/alerts_test.go @@ -16,7 +16,7 @@ func TestAlertServices(t *testing.T) { { name: "Test several valid services", rule: chronograf.AlertRule{ - Alerts: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorops", "email"}, }, want: `alert() .slack() diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 960e16161b..848c7999e1 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -1,6 +1,7 @@ package kapacitor import ( + "encoding/json" "fmt" "testing" @@ -11,15 +12,13 @@ func TestGenerate(t *testing.T) { alert := chronograf.AlertRule{ Name: "name", Trigger: "relative", - Alerts: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorops", "email"}, TriggerValues: chronograf.TriggerValues{ - Relative: &chronograf.RelativeValue{ - Change: "change", - Period: "10m", - Shift: "1m", - Operator: "greater than", - Value: "90", - }, + Change: "change", + Period: "10m", + Shift: "1m", + Operator: "greater than", + Value: "90", }, Every: "30s", Query: chronograf.QueryConfig{ @@ -67,15 +66,13 @@ func TestThreshold(t *testing.T) { alert := chronograf.AlertRule{ Name: "name", Trigger: "threshold", - Alerts: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorops", "email"}, TriggerValues: chronograf.TriggerValues{ - Threshold: &chronograf.ThresholdValue{ - Relation: "once", - Period: "10m", - Percentile: "", // TODO: if relation is not once then this will have a number - Operator: "greater than", - Value: "90", - }, + Relation: "once", + Period: "10m", + Percentile: "", // TODO: if relation is not once then this will have a number + Operator: "greater than", + Value: "90", }, Every: "30s", Message: "message", @@ -222,15 +219,13 @@ func TestRelative(t *testing.T) { alert := chronograf.AlertRule{ Name: "name", Trigger: "relative", - Alerts: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorops", "email"}, TriggerValues: chronograf.TriggerValues{ - Relative: &chronograf.RelativeValue{ - Change: "change", - Period: "10m", - Shift: "1m", - Operator: "greater than", - Value: "90", - }, + Change: "change", + Period: "10m", + Shift: "1m", + Operator: "greater than", + Value: "90", }, Every: "30s", Message: "message", @@ -382,6 +377,8 @@ trigger fmt.Printf("%s", got) t.Errorf("%q. Relative() = %v, want %v", tt.name, got, tt.want) } + b, _ := json.Marshal(tt.alert) + fmt.Printf("%s", string(b)) } } @@ -389,11 +386,9 @@ func TestDeadman(t *testing.T) { alert := chronograf.AlertRule{ Name: "name", Trigger: "deadman", - Alerts: []string{"slack", "victorOps", "email"}, + Alerts: []string{"slack", "victorops", "email"}, TriggerValues: chronograf.TriggerValues{ - Deadman: &chronograf.DeadmanValue{ - Period: "10m", - }, + Period: "10m", }, Every: "30s", Message: "message", diff --git a/kapacitor/triggers.go b/kapacitor/triggers.go index d0f2926646..d0c233c493 100644 --- a/kapacitor/triggers.go +++ b/kapacitor/triggers.go @@ -59,13 +59,13 @@ func Trigger(rule chronograf.AlertRule) (string, error) { case "deadman": return DeadmanTrigger, nil case "relative": - op, err := kapaOperator(rule.TriggerValues.Relative.Operator) + op, err := kapaOperator(rule.TriggerValues.Operator) if err != nil { return "", err } return fmt.Sprintf(RelativeTrigger, op), nil case "threshold": - op, err := kapaOperator(rule.TriggerValues.Threshold.Operator) + op, err := kapaOperator(rule.TriggerValues.Operator) if err != nil { return "", err } diff --git a/kapacitor/triggers_test.go b/kapacitor/triggers_test.go index db1733514a..8665d5a3e8 100644 --- a/kapacitor/triggers_test.go +++ b/kapacitor/triggers_test.go @@ -35,9 +35,7 @@ func TestTrigger(t *testing.T) { rule: chronograf.AlertRule{ Trigger: "relative", TriggerValues: chronograf.TriggerValues{ - Relative: &chronograf.RelativeValue{ - Operator: "greater than", - }, + Operator: "greater than", }, }, want: `var past = data @@ -68,9 +66,7 @@ var trigger = past rule: chronograf.AlertRule{ Trigger: "threshold", TriggerValues: chronograf.TriggerValues{ - Threshold: &chronograf.ThresholdValue{ - Operator: "greater than", - }, + Operator: "greater than", }, }, want: `var trigger = data diff --git a/kapacitor/vars.go b/kapacitor/vars.go index b9d2f7dd2c..73b4d09c45 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -41,8 +41,8 @@ func Vars(rule chronograf.AlertRule) (string, error) { ` return fmt.Sprintf(vars, common, - rule.TriggerValues.Threshold.Period, - rule.TriggerValues.Threshold.Value), nil + rule.TriggerValues.Period, + rule.TriggerValues.Value), nil case "relative": vars := ` %s @@ -52,9 +52,9 @@ func Vars(rule chronograf.AlertRule) (string, error) { ` return fmt.Sprintf(vars, common, - rule.TriggerValues.Relative.Period, - rule.TriggerValues.Relative.Shift, - rule.TriggerValues.Relative.Value, + rule.TriggerValues.Period, + rule.TriggerValues.Shift, + rule.TriggerValues.Value, ), nil case "deadman": vars := ` @@ -65,7 +65,7 @@ func Vars(rule chronograf.AlertRule) (string, error) { return fmt.Sprintf(vars, common, "0.0", // deadman threshold hardcoded to zero - rule.TriggerValues.Deadman.Period, + rule.TriggerValues.Period, ), nil default: return "", fmt.Errorf("Unknown trigger mechanism") diff --git a/server/kapacitors.go b/server/kapacitors.go index 3090595915..bf5535db01 100644 --- a/server/kapacitors.go +++ b/server/kapacitors.go @@ -293,21 +293,53 @@ func (h *Service) KapacitorTasksPost(w http.ResponseWriter, r *http.Request) { ID: &uuid.V4{}, } - var rule chronograf.AlertRule - task, err := c.Create(ctx, rule) + var req chronograf.AlertRule + if err = json.NewDecoder(r.Body).Decode(&req); err != nil { + invalidJSON(w) + return + } + // TODO: validate this data + /* + if err := req.Valid(); err != nil { + invalidData(w, err) + return + } + */ + + task, err := c.Create(ctx, req) if err != nil { Error(w, http.StatusInternalServerError, err.Error()) return } - // TODO: Set the tickscript the store - // TODO: possibly use the Href in update to the store - _ = task.TICKScript - _ = task.ID - _ = task.Href - // TODO: Add the task from the store - // TODO: Return POST response - w.WriteHeader(http.StatusNoContent) + req.ID = task.ID + rule, err := h.AlertRulesStore.Add(ctx, req) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + res := alertResponse{ + AlertRule: rule, + Links: alertLinks{ + Self: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/tasks/%s", srv.SrcID, srv.ID, req.ID), + Kapacitor: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/proxy?path=%s", srv.SrcID, srv.ID, url.QueryEscape(task.Href)), + }, + TICKScript: string(task.TICKScript), + } + + w.Header().Add("Location", res.Links.Self) + encodeJSON(w, http.StatusCreated, res, h.Logger) +} + +type alertLinks struct { + Self string `json:"self"` + Kapacitor string `json:"kapacitor"` +} + +type alertResponse struct { + chronograf.AlertRule + TICKScript string `json:"tickscript"` + Links alertLinks `json:"links"` } // KapacitorTasksPut proxies PATCH to kapacitor @@ -338,19 +370,40 @@ func (h *Service) KapacitorTasksPut(w http.ResponseWriter, r *http.Request) { Password: srv.Password, Ticker: &kapa.Alert{}, } - // TODO: Pull rule from PUT parameters - var rule chronograf.AlertRule - task, err := c.Update(ctx, c.Href(tid), rule) + var req chronograf.AlertRule + if err = json.NewDecoder(r.Body).Decode(&req); err != nil { + invalidJSON(w) + return + } + // TODO: validate this data + /* + if err := req.Valid(); err != nil { + invalidData(w, err) + return + } + */ + + req.ID = tid + task, err := c.Update(ctx, c.Href(tid), req) if err != nil { Error(w, http.StatusInternalServerError, err.Error()) return } - // TODO: Set the tickscript in the update to the store - // TODO: possibly use the Href in update to the store - _ = task.TICKScript - // TODO: Update the task from the store - // TODO: Return Patch response - w.WriteHeader(http.StatusNoContent) + + if err := h.AlertRulesStore.Update(ctx, req); err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + + res := alertResponse{ + AlertRule: req, + Links: alertLinks{ + Self: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/tasks/%s", srv.SrcID, srv.ID, req.ID), + Kapacitor: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/proxy?path=%s", srv.SrcID, srv.ID, url.QueryEscape(task.Href)), + }, + TICKScript: string(task.TICKScript), + } + encodeJSON(w, http.StatusOK, res, h.Logger) } // KapacitorTasksGet retrieves all tasks @@ -373,7 +426,40 @@ func (h *Service) KapacitorTasksGet(w http.ResponseWriter, r *http.Request) { notFound(w, id) return } - // TODO: GET tasks from store + + rules, err := h.AlertRulesStore.All(ctx) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + + ticker := &kapa.Alert{} + c := kapa.Client{} + res := allAlertsResponse{ + Tasks: []alertResponse{}, + } + for _, rule := range rules { + tickscript, err := ticker.Generate(rule) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + + ar := alertResponse{ + AlertRule: rule, + Links: alertLinks{ + Self: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/tasks/%s", srv.SrcID, srv.ID, rule.ID), + Kapacitor: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/proxy?path=%s", srv.SrcID, srv.ID, url.QueryEscape(c.Href(rule.ID))), + }, + TICKScript: string(tickscript), + } + res.Tasks = append(res.Tasks, ar) + } + encodeJSON(w, http.StatusOK, res, h.Logger) +} + +type allAlertsResponse struct { + Tasks []alertResponse `json:"tasks"` } // KapacitorTasksGet retrieves specific task @@ -397,8 +483,29 @@ func (h *Service) KapacitorTasksID(w http.ResponseWriter, r *http.Request) { return } tid := httprouter.GetParamFromContext(ctx, "tid") - // TODO: GET task from store - _ = tid + rule, err := h.AlertRulesStore.Get(ctx, tid) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + + ticker := &kapa.Alert{} + c := kapa.Client{} + tickscript, err := ticker.Generate(rule) + if err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + + res := alertResponse{ + AlertRule: rule, + Links: alertLinks{ + Self: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/tasks/%s", srv.SrcID, srv.ID, rule.ID), + Kapacitor: fmt.Sprintf("/chronograf/v1/sources/%d/kapacitors/%d/proxy?path=%s", srv.SrcID, srv.ID, url.QueryEscape(c.Href(rule.ID))), + }, + TICKScript: string(tickscript), + } + encodeJSON(w, http.StatusOK, res, h.Logger) } // KapacitorTasksDelete proxies DELETE to kapacitor @@ -422,7 +529,6 @@ func (h *Service) KapacitorTasksDelete(w http.ResponseWriter, r *http.Request) { return } - // TODO: Delete the task from the store tid := httprouter.GetParamFromContext(ctx, "tid") c := kapa.Client{ URL: srv.URL, @@ -433,5 +539,11 @@ func (h *Service) KapacitorTasksDelete(w http.ResponseWriter, r *http.Request) { Error(w, http.StatusInternalServerError, err.Error()) return } + + if err := h.AlertRulesStore.Delete(ctx, chronograf.AlertRule{ID: tid}); err != nil { + Error(w, http.StatusInternalServerError, err.Error()) + return + } + w.WriteHeader(http.StatusNoContent) } diff --git a/server/server.go b/server/server.go index e27f43f8e4..7107b9246f 100644 --- a/server/server.go +++ b/server/server.go @@ -128,6 +128,7 @@ func openService(boltPath, cannedPath string, logger chronograf.Logger) Service ServersStore: db.ServersStore, TimeSeries: &influx.Client{}, LayoutStore: layouts, + AlertRulesStore: db.AlertsStore, } } diff --git a/server/service.go b/server/service.go index 5486f00eb4..198b342a7c 100644 --- a/server/service.go +++ b/server/service.go @@ -8,6 +8,7 @@ type Service struct { SourcesStore chronograf.SourcesStore ServersStore chronograf.ServersStore LayoutStore chronograf.LayoutStore + AlertRulesStore chronograf.AlertRulesStore TimeSeries chronograf.TimeSeries Logger chronograf.Logger }