add kapacitor range alerting to API
parent
ca377f87d2
commit
2f39cb771c
|
@ -139,11 +139,13 @@ type Ticker interface {
|
||||||
|
|
||||||
// TriggerValues specifies the alerting logic for a specific trigger type
|
// TriggerValues specifies the alerting logic for a specific trigger type
|
||||||
type TriggerValues struct {
|
type TriggerValues struct {
|
||||||
Change string `json:"change,omitempty"` // Change specifies if the change is a percent or absolute
|
Change string `json:"change,omitempty"` // Change specifies if the change is a percent or absolute
|
||||||
Period string `json:"period,omitempty"` // Period length of time before deadman is alerted
|
Period string `json:"period,omitempty"` // Period length of time before deadman is alerted
|
||||||
Shift string `json:"shift,omitempty"` // Shift is the amount of time to look into the past for the alert to compare to the present
|
Shift string `json:"shift,omitempty"` // Shift is the amount of time to look into the past for the alert to compare to the present
|
||||||
Operator string `json:"operator,omitempty"` // Operator for alert comparison
|
Operator string `json:"operator,omitempty"` // Operator for alert comparison
|
||||||
Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical
|
RangeOperator string `json:"range_operator,omitempty"` // RangeOperator is an optional operator for range comparisons
|
||||||
|
Value string `json:"value,omitempty"` // Value is the boundary value when alert goes critical
|
||||||
|
RangeValue string `json:"range_value,omitempty"` // RangeValue is an optional value for range comparisons
|
||||||
}
|
}
|
||||||
|
|
||||||
// Field represent influxql fields and functions from the UI
|
// Field represent influxql fields and functions from the UI
|
||||||
|
@ -247,7 +249,7 @@ type DashboardCell struct {
|
||||||
|
|
||||||
// DashboardsStore is the storage and retrieval of dashboards
|
// DashboardsStore is the storage and retrieval of dashboards
|
||||||
type DashboardsStore interface {
|
type DashboardsStore interface {
|
||||||
// All lists all dashboards from the DashboardStore
|
// All lists all dashboards from the DashboardStore
|
||||||
All(context.Context) ([]Dashboard, error)
|
All(context.Context) ([]Dashboard, error)
|
||||||
// Create a new Dashboard in the DashboardStore
|
// Create a new Dashboard in the DashboardStore
|
||||||
Add(context.Context, Dashboard) (Dashboard, error)
|
Add(context.Context, Dashboard) (Dashboard, error)
|
||||||
|
|
|
@ -199,6 +199,154 @@ trigger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestThresholdRange(t *testing.T) {
|
||||||
|
alert := chronograf.AlertRule{
|
||||||
|
Name: "name",
|
||||||
|
Trigger: "threshold",
|
||||||
|
Alerts: []string{"slack", "victorops", "email"},
|
||||||
|
TriggerValues: chronograf.TriggerValues{
|
||||||
|
Operator: "greater than",
|
||||||
|
Value: "90",
|
||||||
|
RangeOperator: "less than",
|
||||||
|
RangeValue: "100",
|
||||||
|
},
|
||||||
|
Every: "30s",
|
||||||
|
Message: "message",
|
||||||
|
Query: chronograf.QueryConfig{
|
||||||
|
Database: "telegraf",
|
||||||
|
Measurement: "cpu",
|
||||||
|
RetentionPolicy: "autogen",
|
||||||
|
Fields: []chronograf.Field{
|
||||||
|
{
|
||||||
|
Field: "usage_user",
|
||||||
|
Funcs: []string{"mean"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tags: map[string][]string{
|
||||||
|
"host": []string{
|
||||||
|
"acc-0eabc309-eu-west-1-data-3",
|
||||||
|
"prod",
|
||||||
|
},
|
||||||
|
"cpu": []string{
|
||||||
|
"cpu_total",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
GroupBy: chronograf.GroupBy{
|
||||||
|
Time: "10m",
|
||||||
|
Tags: []string{"host", "cluster_id"},
|
||||||
|
},
|
||||||
|
AreTagsAccepted: true,
|
||||||
|
RawText: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
alert chronograf.AlertRule
|
||||||
|
want chronograf.TICKScript
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Test valid template alert",
|
||||||
|
alert: alert,
|
||||||
|
want: `var db = 'telegraf'
|
||||||
|
|
||||||
|
var rp = 'autogen'
|
||||||
|
|
||||||
|
var measurement = 'cpu'
|
||||||
|
|
||||||
|
var groupBy = ['host', 'cluster_id']
|
||||||
|
|
||||||
|
var whereFilter = lambda: ("cpu" == 'cpu_total') AND ("host" == 'acc-0eabc309-eu-west-1-data-3' OR "host" == 'prod')
|
||||||
|
|
||||||
|
var period = 10m
|
||||||
|
|
||||||
|
var every = 30s
|
||||||
|
|
||||||
|
var name = 'name'
|
||||||
|
|
||||||
|
var idVar = name + ':{{.Group}}'
|
||||||
|
|
||||||
|
var message = 'message'
|
||||||
|
|
||||||
|
var idTag = 'alertID'
|
||||||
|
|
||||||
|
var levelTag = 'level'
|
||||||
|
|
||||||
|
var messageField = 'message'
|
||||||
|
|
||||||
|
var durationField = 'duration'
|
||||||
|
|
||||||
|
var outputDB = 'chronograf'
|
||||||
|
|
||||||
|
var outputRP = 'autogen'
|
||||||
|
|
||||||
|
var outputMeasurement = 'alerts'
|
||||||
|
|
||||||
|
var triggerType = 'threshold'
|
||||||
|
|
||||||
|
var lower = 90
|
||||||
|
|
||||||
|
var upper = 100
|
||||||
|
|
||||||
|
var data = stream
|
||||||
|
|from()
|
||||||
|
.database(db)
|
||||||
|
.retentionPolicy(rp)
|
||||||
|
.measurement(measurement)
|
||||||
|
.groupBy(groupBy)
|
||||||
|
.where(whereFilter)
|
||||||
|
|window()
|
||||||
|
.period(period)
|
||||||
|
.every(every)
|
||||||
|
.align()
|
||||||
|
|mean('usage_user')
|
||||||
|
.as('value')
|
||||||
|
|
||||||
|
var trigger = data
|
||||||
|
|alert()
|
||||||
|
.crit(lambda: "value" > lower AND "value" < upper)
|
||||||
|
.stateChangesOnly()
|
||||||
|
.message(message)
|
||||||
|
.id(idVar)
|
||||||
|
.idTag(idTag)
|
||||||
|
.levelTag(levelTag)
|
||||||
|
.messageField(messageField)
|
||||||
|
.durationField(durationField)
|
||||||
|
.slack()
|
||||||
|
.victorOps()
|
||||||
|
.email()
|
||||||
|
|
||||||
|
trigger
|
||||||
|
|influxDBOut()
|
||||||
|
.create()
|
||||||
|
.database(outputDB)
|
||||||
|
.retentionPolicy(outputRP)
|
||||||
|
.measurement(outputMeasurement)
|
||||||
|
.tag('alertName', name)
|
||||||
|
.tag('triggerType', triggerType)
|
||||||
|
|
||||||
|
trigger
|
||||||
|
|httpOut('output')
|
||||||
|
`,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
gen := Alert{}
|
||||||
|
got, err := gen.Generate(tt.alert)
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("%q. Threshold() error = %v, wantErr %v", tt.name, err, tt.wantErr)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
diff := diffmatchpatch.New()
|
||||||
|
delta := diff.DiffMain(string(tt.want), string(got), true)
|
||||||
|
t.Errorf("%q\n%s", tt.name, diff.DiffPrettyText(delta))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestThresholdNoAggregate(t *testing.T) {
|
func TestThresholdNoAggregate(t *testing.T) {
|
||||||
alert := chronograf.AlertRule{
|
alert := chronograf.AlertRule{
|
||||||
Name: "name",
|
Name: "name",
|
||||||
|
|
|
@ -34,6 +34,12 @@ var ThresholdTrigger = `
|
||||||
.crit(lambda: "value" %s crit)
|
.crit(lambda: "value" %s crit)
|
||||||
`
|
`
|
||||||
|
|
||||||
|
var ThresholdRangeTrigger = `
|
||||||
|
var trigger = data
|
||||||
|
|alert()
|
||||||
|
.crit(lambda: "value" %s lower AND "value" %s upper)
|
||||||
|
`
|
||||||
|
|
||||||
// RelativeAbsoluteTrigger compares one window of data versus another (current - past)
|
// RelativeAbsoluteTrigger compares one window of data versus another (current - past)
|
||||||
var RelativeAbsoluteTrigger = `
|
var RelativeAbsoluteTrigger = `
|
||||||
var past = data
|
var past = data
|
||||||
|
@ -83,7 +89,11 @@ func Trigger(rule chronograf.AlertRule) (string, error) {
|
||||||
case Relative:
|
case Relative:
|
||||||
trigger, err = relativeTrigger(rule)
|
trigger, err = relativeTrigger(rule)
|
||||||
case Threshold:
|
case Threshold:
|
||||||
trigger, err = thresholdTrigger(rule)
|
if rule.TriggerValues.RangeOperator == "" || rule.TriggerValues.RangeValue == "" {
|
||||||
|
trigger, err = thresholdTrigger(rule)
|
||||||
|
} else {
|
||||||
|
trigger, err = thresholdRangeTrigger(rule)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
trigger, err = "", fmt.Errorf("Unknown trigger type: %s", rule.Trigger)
|
trigger, err = "", fmt.Errorf("Unknown trigger type: %s", rule.Trigger)
|
||||||
}
|
}
|
||||||
|
@ -116,3 +126,15 @@ func thresholdTrigger(rule chronograf.AlertRule) (string, error) {
|
||||||
}
|
}
|
||||||
return fmt.Sprintf(ThresholdTrigger, op), nil
|
return fmt.Sprintf(ThresholdTrigger, op), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func thresholdRangeTrigger(rule chronograf.AlertRule) (string, error) {
|
||||||
|
op, err := kapaOperator(rule.TriggerValues.Operator)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
rangeOp, err := kapaOperator(rule.TriggerValues.RangeOperator)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return fmt.Sprintf(ThresholdRangeTrigger, op, rangeOp), nil
|
||||||
|
}
|
||||||
|
|
|
@ -34,13 +34,25 @@ func Vars(rule chronograf.AlertRule) (string, error) {
|
||||||
|
|
||||||
switch rule.Trigger {
|
switch rule.Trigger {
|
||||||
case Threshold:
|
case Threshold:
|
||||||
vars := `
|
if rule.TriggerValues.RangeOperator == "" || rule.TriggerValues.RangeValue == "" {
|
||||||
|
vars := `
|
||||||
%s
|
%s
|
||||||
var crit = %s
|
var crit = %s
|
||||||
`
|
`
|
||||||
return fmt.Sprintf(vars,
|
return fmt.Sprintf(vars,
|
||||||
common,
|
common,
|
||||||
rule.TriggerValues.Value), nil
|
rule.TriggerValues.Value), nil
|
||||||
|
} else {
|
||||||
|
vars := `
|
||||||
|
%s
|
||||||
|
var lower = %s
|
||||||
|
var upper = %s
|
||||||
|
`
|
||||||
|
return fmt.Sprintf(vars,
|
||||||
|
common,
|
||||||
|
rule.TriggerValues.Value,
|
||||||
|
rule.TriggerValues.RangeValue), nil
|
||||||
|
}
|
||||||
case Relative:
|
case Relative:
|
||||||
vars := `
|
vars := `
|
||||||
%s
|
%s
|
||||||
|
|
Loading…
Reference in New Issue