Update kapacitor alert rule to have detail field

pull/852/head
Chris Goller 2017-02-07 17:09:14 -06:00
parent 9db24ab075
commit 98198dbf5b
6 changed files with 289 additions and 11 deletions

108
bolt/alerts_test.go Normal file
View File

@ -0,0 +1,108 @@
package bolt_test
import (
"context"
"reflect"
"testing"
"github.com/influxdata/chronograf"
)
func setupTestClient() (*TestClient, error) {
if c, err := NewTestClient(); err != nil {
return nil, err
} else if err := c.Open(); err != nil {
return nil, err
} else {
return c, nil
}
}
// Ensure an AlertRuleStore can be stored.
func TestAlertRuleStoreAdd(t *testing.T) {
c, err := setupTestClient()
if err != nil {
t.Fatal(err)
}
defer c.Close()
s := c.AlertsStore
alerts := []chronograf.AlertRule{
chronograf.AlertRule{
ID: "one",
},
chronograf.AlertRule{
ID: "two",
Details: "howdy",
},
}
// Add new alert.
ctx := context.Background()
for i, a := range alerts {
// Adding should return an identical copy
actual, err := s.Add(ctx, 0, 0, a)
if err != nil {
t.Errorf("erroring adding alert to store: %v", err)
}
if !reflect.DeepEqual(actual, alerts[i]) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alerts[i])
}
}
}
func setupWithRule(ctx context.Context, alert chronograf.AlertRule) (*TestClient, error) {
c, err := setupTestClient()
if err != nil {
return nil, err
}
// Add test alert
if _, err := c.AlertsStore.Add(ctx, 0, 0, alert); err != nil {
return nil, err
}
return c, nil
}
// Ensure an AlertRuleStore can be loaded.
func TestAlertRuleStoreGet(t *testing.T) {
ctx := context.Background()
alert := chronograf.AlertRule{
ID: "one",
}
c, err := setupWithRule(ctx, alert)
if err != nil {
t.Fatalf("Error adding test alert to store: %v", err)
}
defer c.Close()
actual, err := c.AlertsStore.Get(ctx, 0, 0, "one")
if err != nil {
t.Fatalf("Error loading rule from store: %v", err)
}
if !reflect.DeepEqual(actual, alert) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert)
}
}
// Ensure an AlertRuleStore can be load with a detail.
func TestAlertRuleStoreGetDetail(t *testing.T) {
ctx := context.Background()
alert := chronograf.AlertRule{
ID: "one",
Details: "my details",
}
c, err := setupWithRule(ctx, alert)
if err != nil {
t.Fatalf("Error adding test alert to store: %v", err)
}
defer c.Close()
actual, err := c.AlertsStore.Get(ctx, 0, 0, "one")
if err != nil {
t.Fatalf("Error loading rule from store: %v", err)
}
if !reflect.DeepEqual(actual, alert) {
t.Fatalf("alert returned is different then alert saved; actual: %v, expected %v", actual, alert)
}
}

View File

@ -110,6 +110,7 @@ type AlertRule struct {
Every string `json:"every"` // Every how often to check for the alerting criteria
Alerts []string `json:"alerts"` // AlertServices name all the services to notify (e.g. pagerduty)
Message string `json:"message"` // Message included with alert
Details string `json:"details"` // Details is generally used for the Email alert. If empty will not be added.
Trigger string `json:"trigger"` // Trigger is a type that defines when to trigger the alert
TriggerValues TriggerValues `json:"values"` // Defines the values that cause the alert to trigger
Name string `json:"name"` // Name is the user-defined name for the alert
@ -238,13 +239,13 @@ type Dashboard struct {
// DashboardCell holds visual and query information for a cell
type DashboardCell struct {
X int32 `json:"x"`
Y int32 `json:"y"`
W int32 `json:"w"`
H int32 `json:"h"`
Name string `json:"name"`
X int32 `json:"x"`
Y int32 `json:"y"`
W int32 `json:"w"`
H int32 `json:"h"`
Name string `json:"name"`
Queries []Query `json:"queries"`
Type string `json:"type"`
Type string `json:"type"`
}
// DashboardsStore is the storage and retrieval of dashboards

View File

@ -199,6 +199,154 @@ trigger
}
}
func TestThresholdDetail(t *testing.T) {
alert := chronograf.AlertRule{
Name: "name",
Trigger: "threshold",
Alerts: []string{"slack", "victorops", "email"},
TriggerValues: chronograf.TriggerValues{
Operator: "greater than",
Value: "90",
},
Every: "30s",
Message: "message",
Details: "details",
Query: chronograf.QueryConfig{
Database: "telegraf",
Measurement: "cpu",
RetentionPolicy: "autogen",
Fields: []chronograf.Field{
{
Field: "usage_user",
Funcs: []string{"mean"},
},
},
Tags: map[string][]string{
"host": []string{
"acc-0eabc309-eu-west-1-data-3",
"prod",
},
"cpu": []string{
"cpu_total",
},
},
GroupBy: chronograf.GroupBy{
Time: "10m",
Tags: []string{"host", "cluster_id"},
},
AreTagsAccepted: true,
RawText: "",
},
}
tests := []struct {
name string
alert chronograf.AlertRule
want chronograf.TICKScript
wantErr bool
}{
{
name: "Test valid template alert",
alert: alert,
want: `var db = 'telegraf'
var rp = 'autogen'
var measurement = 'cpu'
var groupBy = ['host', 'cluster_id']
var whereFilter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod')
var period = 10m
var every = 30s
var name = 'name'
var idVar = name + ':{{.Group}}'
var message = 'message'
var idTag = 'alertID'
var levelTag = 'level'
var messageField = 'message'
var durationField = 'duration'
var outputDB = 'chronograf'
var outputRP = 'autogen'
var outputMeasurement = 'alerts'
var triggerType = 'threshold'
var details = 'details'
var crit = 90
var data = stream
|from()
.database(db)
.retentionPolicy(rp)
.measurement(measurement)
.groupBy(groupBy)
.where(whereFilter)
|window()
.period(period)
.every(every)
.align()
|mean('usage_user')
.as('value')
var trigger = data
|alert()
.crit(lambda: "value" > crit)
.stateChangesOnly()
.message(message)
.id(idVar)
.idTag(idTag)
.levelTag(levelTag)
.messageField(messageField)
.durationField(durationField)
.details(details)
.slack()
.victorOps()
.email()
trigger
|influxDBOut()
.create()
.database(outputDB)
.retentionPolicy(outputRP)
.measurement(outputMeasurement)
.tag('alertName', name)
.tag('triggerType', triggerType)
trigger
|httpOut('output')
`,
wantErr: false,
},
}
for _, tt := range tests {
gen := Alert{}
got, err := gen.Generate(tt.alert)
if (err != nil) != tt.wantErr {
t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr)
continue
}
if got != tt.want {
diff := diffmatchpatch.New()
delta := diff.DiffMain(string(tt.want), string(got), true)
t.Errorf("%q\n%s", tt.name, diff.DiffPrettyText(delta))
}
}
}
func TestThresholdInsideRange(t *testing.T) {
alert := chronograf.AlertRule{
Name: "name",

View File

@ -27,13 +27,19 @@ var AllAlerts = `
.durationField(durationField)
`
// ThresholdTrigger is the trickscript trigger for alerts that exceed a value
// Details is used only for alerts that specify detail string
var Details = `
.details(details)
`
// ThresholdTrigger is the tickscript trigger for alerts that exceed a value
var ThresholdTrigger = `
var trigger = data
|alert()
.crit(lambda: "value" %s crit)
`
// ThresholdRangeTrigger is the alert when data does not intersect the range.
var ThresholdRangeTrigger = `
var trigger = data
|alert()
@ -102,7 +108,11 @@ func Trigger(rule chronograf.AlertRule) (string, error) {
return "", err
}
return trigger + AllAlerts, nil
trigger += AllAlerts
if rule.Details != "" {
trigger += Details
}
return trigger, nil
}
func relativeTrigger(rule chronograf.AlertRule) (string, error) {
@ -132,7 +142,7 @@ func thresholdRangeTrigger(rule chronograf.AlertRule) (string, error) {
if err != nil {
return "", err
}
var iops []interface{} = make([]interface{}, len(ops))
var iops = make([]interface{}, len(ops))
for i, o := range ops {
iops[i] = o
}

View File

@ -100,7 +100,7 @@ func commonVars(rule chronograf.AlertRule) (string, error) {
var outputMeasurement = '%s'
var triggerType = '%s'
`
return fmt.Sprintf(common,
res := fmt.Sprintf(common,
rule.Query.Database,
rule.Query.RetentionPolicy,
rule.Query.Measurement,
@ -117,7 +117,14 @@ func commonVars(rule chronograf.AlertRule) (string, error) {
RP,
Measurement,
rule.Trigger,
), nil
)
if rule.Details != "" {
res += fmt.Sprintf(`
var details = '%s'
`, rule.Details)
}
return res, nil
}
// window is only used if deadman or threshold/relative with aggregate. Will return empty

View File

@ -1958,6 +1958,10 @@
"type": "string",
"description": "Message to send when alert occurs."
},
"details": {
"type": "string",
"description": "Template for constructing a detailed HTML message for the alert. (Currently, only used for email/smtp"
},
"trigger": {
"type": "string",
"description": "Trigger defines the alerting structure; deadman alert if no data are received for the specified time range; relative alert if the data change relative to the data in a different time range; threshold alert if the data cross a boundary",