From 98198dbf5b33b8ddfbe7e3d6ce91db0789d7e81c Mon Sep 17 00:00:00 2001 From: Chris Goller Date: Tue, 7 Feb 2017 17:09:14 -0600 Subject: [PATCH] Update kapacitor alert rule to have detail field --- bolt/alerts_test.go | 108 +++++++++++++++++++++++++ chronograf.go | 13 +-- kapacitor/tickscripts_test.go | 148 ++++++++++++++++++++++++++++++++++ kapacitor/triggers.go | 16 +++- kapacitor/vars.go | 11 ++- server/swagger.json | 4 + 6 files changed, 289 insertions(+), 11 deletions(-) create mode 100644 bolt/alerts_test.go diff --git a/bolt/alerts_test.go b/bolt/alerts_test.go new file mode 100644 index 000000000..51d8b8aea --- /dev/null +++ b/bolt/alerts_test.go @@ -0,0 +1,108 @@ +package bolt_test + +import ( + "context" + "reflect" + "testing" + + "github.com/influxdata/chronograf" +) + +func setupTestClient() (*TestClient, error) { + if c, err := NewTestClient(); err != nil { + return nil, err + } else if err := c.Open(); err != nil { + return nil, err + } else { + return c, nil + } +} + +// Ensure an AlertRuleStore can be stored. +func TestAlertRuleStoreAdd(t *testing.T) { + c, err := setupTestClient() + if err != nil { + t.Fatal(err) + } + defer c.Close() + s := c.AlertsStore + + alerts := []chronograf.AlertRule{ + chronograf.AlertRule{ + ID: "one", + }, + chronograf.AlertRule{ + ID: "two", + Details: "howdy", + }, + } + + // Add new alert. + ctx := context.Background() + for i, a := range alerts { + // Adding should return an identical copy + actual, err := s.Add(ctx, 0, 0, a) + if err != nil { + t.Errorf("erroring adding alert to store: %v", err) + } + if !reflect.DeepEqual(actual, alerts[i]) { + t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alerts[i]) + } + } +} + +func setupWithRule(ctx context.Context, alert chronograf.AlertRule) (*TestClient, error) { + c, err := setupTestClient() + if err != nil { + return nil, err + } + + // Add test alert + if _, err := c.AlertsStore.Add(ctx, 0, 0, alert); err != nil { + return nil, err + } + return c, nil +} + +// Ensure an AlertRuleStore can be loaded. +func TestAlertRuleStoreGet(t *testing.T) { + ctx := context.Background() + alert := chronograf.AlertRule{ + ID: "one", + } + c, err := setupWithRule(ctx, alert) + if err != nil { + t.Fatalf("Error adding test alert to store: %v", err) + } + defer c.Close() + actual, err := c.AlertsStore.Get(ctx, 0, 0, "one") + if err != nil { + t.Fatalf("Error loading rule from store: %v", err) + } + + if !reflect.DeepEqual(actual, alert) { + t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert) + } +} + +// Ensure an AlertRuleStore can be load with a detail. +func TestAlertRuleStoreGetDetail(t *testing.T) { + ctx := context.Background() + alert := chronograf.AlertRule{ + ID: "one", + Details: "my details", + } + c, err := setupWithRule(ctx, alert) + if err != nil { + t.Fatalf("Error adding test alert to store: %v", err) + } + defer c.Close() + actual, err := c.AlertsStore.Get(ctx, 0, 0, "one") + if err != nil { + t.Fatalf("Error loading rule from store: %v", err) + } + + if !reflect.DeepEqual(actual, alert) { + t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert) + } +} diff --git a/chronograf.go b/chronograf.go index 25f53c870..d51fe2291 100644 --- a/chronograf.go +++ b/chronograf.go @@ -110,6 +110,7 @@ type AlertRule struct { Every string `json:"every"` // Every how often to check for the alerting criteria Alerts []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty) Message string `json:"message"` // Message included with alert + Details string `json:"details"` // Details is generally used for the Email alert. If empty will not be added. Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert TriggerValues TriggerValues `json:"values"` // Defines the values that cause the alert to trigger Name string `json:"name"` // Name is the user-defined name for the alert @@ -238,13 +239,13 @@ type Dashboard struct { // DashboardCell holds visual and query information for a cell type DashboardCell struct { - X int32 `json:"x"` - Y int32 `json:"y"` - W int32 `json:"w"` - H int32 `json:"h"` - Name string `json:"name"` + X int32 `json:"x"` + Y int32 `json:"y"` + W int32 `json:"w"` + H int32 `json:"h"` + Name string `json:"name"` Queries []Query `json:"queries"` - Type string `json:"type"` + Type string `json:"type"` } // DashboardsStore is the storage and retrieval of dashboards diff --git a/kapacitor/tickscripts_test.go b/kapacitor/tickscripts_test.go index 465812543..c00e04378 100644 --- a/kapacitor/tickscripts_test.go +++ b/kapacitor/tickscripts_test.go @@ -199,6 +199,154 @@ trigger } } +func TestThresholdDetail(t *testing.T) { + alert := chronograf.AlertRule{ + Name: "name", + Trigger: "threshold", + Alerts: []string{"slack", "victorops", "email"}, + TriggerValues: chronograf.TriggerValues{ + Operator: "greater than", + Value: "90", + }, + Every: "30s", + Message: "message", + Details: "details", + Query: chronograf.QueryConfig{ + Database: "telegraf", + Measurement: "cpu", + RetentionPolicy: "autogen", + Fields: []chronograf.Field{ + { + Field: "usage_user", + Funcs: []string{"mean"}, + }, + }, + Tags: map[string][]string{ + "host": []string{ + "acc-0eabc309-eu-west-1-data-3", + "prod", + }, + "cpu": []string{ + "cpu_total", + }, + }, + GroupBy: chronograf.GroupBy{ + Time: "10m", + Tags: []string{"host", "cluster_id"}, + }, + AreTagsAccepted: true, + RawText: "", + }, + } + + tests := []struct { + name string + alert chronograf.AlertRule + want chronograf.TICKScript + wantErr bool + }{ + { + name: "Test valid template alert", + alert: alert, + want: `var db = 'telegraf' + +var rp = 'autogen' + +var measurement = 'cpu' + +var groupBy = ['host', 'cluster_id'] + +var whereFilter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod') + +var period = 10m + +var every = 30s + +var name = 'name' + +var idVar = name + ':{{.Group}}' + +var message = 'message' + +var idTag = 'alertID' + +var levelTag = 'level' + +var messageField = 'message' + +var durationField = 'duration' + +var outputDB = 'chronograf' + +var outputRP = 'autogen' + +var outputMeasurement = 'alerts' + +var triggerType = 'threshold' + +var details = 'details' + +var crit = 90 + +var data = stream + |from() + .database(db) + .retentionPolicy(rp) + .measurement(measurement) + .groupBy(groupBy) + .where(whereFilter) + |window() + .period(period) + .every(every) + .align() + |mean('usage_user') + .as('value') + +var trigger = data + |alert() + .crit(lambda: "value" > crit) + .stateChangesOnly() + .message(message) + .id(idVar) + .idTag(idTag) + .levelTag(levelTag) + .messageField(messageField) + .durationField(durationField) + .details(details) + .slack() + .victorOps() + .email() + +trigger + |influxDBOut() + .create() + .database(outputDB) + .retentionPolicy(outputRP) + .measurement(outputMeasurement) + .tag('alertName', name) + .tag('triggerType', triggerType) + +trigger + |httpOut('output') +`, + wantErr: false, + }, + } + for _, tt := range tests { + gen := Alert{} + got, err := gen.Generate(tt.alert) + if (err != nil) != tt.wantErr { + t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr) + continue + } + if got != tt.want { + diff := diffmatchpatch.New() + delta := diff.DiffMain(string(tt.want), string(got), true) + t.Errorf("%q\n%s", tt.name, diff.DiffPrettyText(delta)) + } + } +} + func TestThresholdInsideRange(t *testing.T) { alert := chronograf.AlertRule{ Name: "name", diff --git a/kapacitor/triggers.go b/kapacitor/triggers.go index 6efab3cbc..b52772b68 100644 --- a/kapacitor/triggers.go +++ b/kapacitor/triggers.go @@ -27,13 +27,19 @@ var AllAlerts = ` .durationField(durationField) ` -// ThresholdTrigger is the trickscript trigger for alerts that exceed a value +// Details is used only for alerts that specify detail string +var Details = ` + .details(details) +` + +// ThresholdTrigger is the tickscript trigger for alerts that exceed a value var ThresholdTrigger = ` var trigger = data |alert() .crit(lambda: "value" %s crit) ` +// ThresholdRangeTrigger is the alert when data does not intersect the range. var ThresholdRangeTrigger = ` var trigger = data |alert() @@ -102,7 +108,11 @@ func Trigger(rule chronograf.AlertRule) (string, error) { return "", err } - return trigger + AllAlerts, nil + trigger += AllAlerts + if rule.Details != "" { + trigger += Details + } + return trigger, nil } func relativeTrigger(rule chronograf.AlertRule) (string, error) { @@ -132,7 +142,7 @@ func thresholdRangeTrigger(rule chronograf.AlertRule) (string, error) { if err != nil { return "", err } - var iops []interface{} = make([]interface{}, len(ops)) + var iops = make([]interface{}, len(ops)) for i, o := range ops { iops[i] = o } diff --git a/kapacitor/vars.go b/kapacitor/vars.go index 4692fbd82..f9239e6bf 100644 --- a/kapacitor/vars.go +++ b/kapacitor/vars.go @@ -100,7 +100,7 @@ func commonVars(rule chronograf.AlertRule) (string, error) { var outputMeasurement = '%s' var triggerType = '%s' ` - return fmt.Sprintf(common, + res := fmt.Sprintf(common, rule.Query.Database, rule.Query.RetentionPolicy, rule.Query.Measurement, @@ -117,7 +117,14 @@ func commonVars(rule chronograf.AlertRule) (string, error) { RP, Measurement, rule.Trigger, - ), nil + ) + + if rule.Details != "" { + res += fmt.Sprintf(` + var details = '%s' + `, rule.Details) + } + return res, nil } // window is only used if deadman or threshold/relative with aggregate. Will return empty diff --git a/server/swagger.json b/server/swagger.json index 19d556be4..a590ca78e 100644 --- a/server/swagger.json +++ b/server/swagger.json @@ -1958,6 +1958,10 @@ "type": "string", "description": "Message to send when alert occurs." }, + "details": { + "type": "string", + "description": "Template for constructing a detailed HTML message for the alert. (Currently, only used for email/smtp" + }, "trigger": { "type": "string", "description": "Trigger defines the alerting structure; deadman alert if no data are received for the specified time range; relative alert if the data change relative to the data in a different time range; threshold alert if the data cross a boundary",