diff --git a/query/docs/SPEC.md b/query/docs/SPEC.md index 4640f2ebfa..3d89088095 100644 --- a/query/docs/SPEC.md +++ b/query/docs/SPEC.md @@ -1404,6 +1404,41 @@ from(bucket:"telegraf/autogen") r.service == "app-server") ``` +#### Histogram + +Histogram approximates the cumulative distribution function of a dataset by counting data frequencies for a list of buckets. +A bucket is defined by an upper bound where all data points that are less than or equal to the bound are counted in the bucket. +The bucket counts are cumulative. + +Each input table is converted into a single output table representing a single histogram. +The output table will have a the same group key as the input table. +The columns not part of the group key will be removed and an upper bound column and a count column will be added. + +Histogram has the following properties: + +* `column` string + Column is the name of a column containing the input data values. + The column type must be float. + Defaults to `_value`. +* `upperBoundColumn` string + UpperBoundColumn is the name of the column in which to store the histogram upper bounds. + Defaults to `le`. +* `countColumn` string + CountColumn is the name of the column in which to store the histogram counts. + Defaults to `_value`. +* `buckets` array of floats + Buckets is a list of upper bounds to use when computing the histogram frequencies. + Buckets should contain a bucket whose bound is the maximum value of the data set, this value can be set to positive infinity if no maximum is known. +* `normalize` bool + Normalize when true will convert the counts into frequencies values between 0 and 1. + Normalized histograms cannot be aggregated by summing their counts. + Defaults to `false`. + + +Example: + + histogram(buckets:linearBuckets(start:0.0,width:10.0,count:10)) // compute the histogram of the data using 10 buckets from 0,10,20,...,100 + #### HistogramQuantile HistogramQuantile approximates a quantile given an histogram that approximates the cumulative distribution of the dataset. @@ -1432,6 +1467,7 @@ HistogramQuantile has the following properties: * `upperBoundColumn` string UpperBoundColumn is the name of the column containing the histogram upper bounds. The upper bound column type must be float. + Defaults to `le`. * `valueColumn` string ValueColumn is the name of the output column which will contain the computed quantile. Defaults to `_value`. @@ -1444,7 +1480,39 @@ HistogramQuantile has the following properties: Example: - histogramQuantile(quantile:0.9, upperBoundColumn:"le") // compute the 90th quantile using histogram data. + histogramQuantile(quantile:0.9) // compute the 90th quantile using histogram data. + +#### LinearBuckets + +LinearBuckets produces a list of linearly separated floats. + +LinearBuckets has the following properties: + +* `start` float + Start is the first value in the returned list. +* `width` float + Width is the distance between subsequent bucket values. +* `count` int + Count is the number of buckets to create. +* `inifinity` bool + Infinity when true adds an additional bucket with a value of positive infinity. + Defaults to `true`. + +#### LogrithmicBuckets + +LogrithmicBuckets produces a list of exponentially separated floats. + +LogrithmicBuckets has the following properties: + +* `start` float + Start is the first value in the returned bucket list. +* `factor` float + Factor is the multiplier applied to each subsequent bucket. +* `count` int + Count is the number of buckets to create. +* `inifinity` bool + Infinity when true adds an additional bucket with a value of positive infinity. + Defaults to `true`. #### Limit diff --git a/query/functions/histogram.go b/query/functions/histogram.go new file mode 100644 index 0000000000..1146fe75a8 --- /dev/null +++ b/query/functions/histogram.go @@ -0,0 +1,464 @@ +package functions + +import ( + "fmt" + "math" + "regexp" + "sort" + + "github.com/influxdata/platform/query" + "github.com/influxdata/platform/query/execute" + "github.com/influxdata/platform/query/interpreter" + "github.com/influxdata/platform/query/plan" + "github.com/influxdata/platform/query/semantic" + "github.com/influxdata/platform/query/values" + "github.com/pkg/errors" +) + +const HistogramKind = "histogram" + +type HistogramOpSpec struct { + Column string `json:"column"` + UpperBoundColumn string `json:"upperBoundColumn"` + CountColumn string `json:"countColumn"` + Buckets []float64 `json:"buckets"` + Normalize bool `json:"normalize"` +} + +var histogramSignature = execute.DefaultAggregateSignature() + +func init() { + histogramSignature.Params["column"] = semantic.String + histogramSignature.Params["upperBoundColumn"] = semantic.String + histogramSignature.Params["buckets"] = semantic.NewArrayType(semantic.Float) + histogramSignature.Params["normalize"] = semantic.Bool + + query.RegisterFunction(HistogramKind, createHistogramOpSpec, histogramSignature) + query.RegisterBuiltInValue("linearBuckets", linearBuckets{}) + query.RegisterBuiltInValue("logarithmicBuckets", logarithmicBuckets{}) + query.RegisterOpSpec(HistogramKind, newHistogramOp) + plan.RegisterProcedureSpec(HistogramKind, newHistogramProcedure, HistogramKind) + execute.RegisterTransformation(HistogramKind, createHistogramTransformation) +} + +func createHistogramOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) { + if err := a.AddParentFromArgs(args); err != nil { + return nil, err + } + + spec := new(HistogramOpSpec) + + if col, ok, err := args.GetString("column"); err != nil { + return nil, err + } else if ok { + spec.Column = col + } else { + spec.Column = execute.DefaultValueColLabel + } + if col, ok, err := args.GetString("upperBoundColumn"); err != nil { + return nil, err + } else if ok { + spec.UpperBoundColumn = col + } else { + spec.UpperBoundColumn = DefaultUpperBoundColumnLabel + } + if col, ok, err := args.GetString("countColumn"); err != nil { + return nil, err + } else if ok { + spec.CountColumn = col + } else { + spec.CountColumn = execute.DefaultValueColLabel + } + bucketsArry, err := args.GetRequiredArray("buckets", semantic.Float) + if err != nil { + return nil, err + } + spec.Buckets, err = interpreter.ToFloatArray(bucketsArry) + if err != nil { + return nil, err + } + if normalize, ok, err := args.GetBool("normalize"); err != nil { + return nil, err + } else if ok { + spec.Normalize = normalize + } + + return spec, nil +} + +func newHistogramOp() query.OperationSpec { + return new(HistogramOpSpec) +} + +func (s *HistogramOpSpec) Kind() query.OperationKind { + return HistogramKind +} + +type HistogramProcedureSpec struct { + HistogramOpSpec +} + +func newHistogramProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) { + spec, ok := qs.(*HistogramOpSpec) + if !ok { + return nil, fmt.Errorf("invalid spec type %T", qs) + } + + return &HistogramProcedureSpec{ + HistogramOpSpec: *spec, + }, nil +} + +func (s *HistogramProcedureSpec) Kind() plan.ProcedureKind { + return HistogramKind +} +func (s *HistogramProcedureSpec) Copy() plan.ProcedureSpec { + ns := new(HistogramProcedureSpec) + *ns = *s + if len(s.Buckets) > 0 { + ns.Buckets = make([]float64, len(s.Buckets)) + copy(ns.Buckets, s.Buckets) + } + return ns +} + +func createHistogramTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) { + s, ok := spec.(*HistogramProcedureSpec) + if !ok { + return nil, nil, fmt.Errorf("invalid spec type %T", spec) + } + cache := execute.NewTableBuilderCache(a.Allocator()) + d := execute.NewDataset(id, mode, cache) + t := NewHistogramTransformation(d, cache, s) + return t, d, nil +} + +type histogramTransformation struct { + d execute.Dataset + cache execute.TableBuilderCache + + spec HistogramProcedureSpec +} + +func NewHistogramTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *HistogramProcedureSpec) *histogramTransformation { + sort.Float64s(spec.Buckets) + return &histogramTransformation{ + d: d, + cache: cache, + spec: *spec, + } +} + +func (t *histogramTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error { + return t.d.RetractTable(key) +} + +func (t *histogramTransformation) Process(id execute.DatasetID, tbl query.Table) error { + builder, created := t.cache.TableBuilder(tbl.Key()) + if !created { + return fmt.Errorf("histogram found duplicate table with key: %v", tbl.Key()) + } + valueIdx := execute.ColIdx(t.spec.Column, tbl.Cols()) + if valueIdx < 0 { + return fmt.Errorf("column %q is missing", t.spec.Column) + } + if col := tbl.Cols()[valueIdx]; col.Type != query.TFloat { + return fmt.Errorf("column %q must be a float got %v", t.spec.Column, col.Type) + } + + execute.AddTableKeyCols(tbl.Key(), builder) + boundIdx := builder.AddCol(query.ColMeta{ + Label: t.spec.UpperBoundColumn, + Type: query.TFloat, + }) + countIdx := builder.AddCol(query.ColMeta{ + Label: t.spec.CountColumn, + Type: query.TFloat, + }) + totalRows := 0.0 + counts := make([]float64, len(t.spec.Buckets)) + err := tbl.Do(func(cr query.ColReader) error { + totalRows += float64(cr.Len()) + for _, v := range cr.Floats(valueIdx) { + idx := sort.Search(len(t.spec.Buckets), func(i int) bool { + return v <= t.spec.Buckets[i] + }) + if idx >= len(t.spec.Buckets) { + // Greater than highest bucket, or not found + return fmt.Errorf("found value greater than any bucket, %d %d %f %f", idx, len(t.spec.Buckets), v, t.spec.Buckets[len(t.spec.Buckets)-1]) + } + // Increment counter + counts[idx]++ + } + return nil + }) + if err != nil { + return err + } + + // Add records making counts cumulative + total := 0.0 + for i, v := range counts { + execute.AppendKeyValues(tbl.Key(), builder) + count := v + total + if t.spec.Normalize { + count /= totalRows + } + builder.AppendFloat(countIdx, count) + builder.AppendFloat(boundIdx, t.spec.Buckets[i]) + total += v + } + return nil +} + +func (t *histogramTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error { + return t.d.UpdateWatermark(mark) +} +func (t *histogramTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error { + return t.d.UpdateProcessingTime(pt) +} +func (t *histogramTransformation) Finish(id execute.DatasetID, err error) { + t.d.Finish(err) +} + +// linearBuckets is a helper function for creating buckets spaced linearly +type linearBuckets struct{} + +func (b linearBuckets) Type() semantic.Type { + return semantic.NewFunctionType(semantic.FunctionSignature{ + Params: map[string]semantic.Type{ + "start": semantic.Float, + "width": semantic.Float, + "count": semantic.Int, + "infinity": semantic.Bool, + }, + ReturnType: semantic.String, + }) +} + +func (b linearBuckets) Str() string { + panic(values.UnexpectedKind(semantic.String, semantic.Function)) +} + +func (b linearBuckets) Int() int64 { + panic(values.UnexpectedKind(semantic.Int, semantic.Function)) +} + +func (b linearBuckets) UInt() uint64 { + panic(values.UnexpectedKind(semantic.UInt, semantic.Function)) +} + +func (b linearBuckets) Float() float64 { + panic(values.UnexpectedKind(semantic.Float, semantic.Function)) +} + +func (b linearBuckets) Bool() bool { + panic(values.UnexpectedKind(semantic.Bool, semantic.Function)) +} + +func (b linearBuckets) Time() values.Time { + panic(values.UnexpectedKind(semantic.Time, semantic.Function)) +} + +func (b linearBuckets) Duration() values.Duration { + panic(values.UnexpectedKind(semantic.Duration, semantic.Function)) +} + +func (b linearBuckets) Regexp() *regexp.Regexp { + panic(values.UnexpectedKind(semantic.Regexp, semantic.Function)) +} + +func (b linearBuckets) Array() values.Array { + panic(values.UnexpectedKind(semantic.Array, semantic.Function)) +} + +func (b linearBuckets) Object() values.Object { + panic(values.UnexpectedKind(semantic.Object, semantic.Function)) +} + +func (b linearBuckets) Function() values.Function { + return b +} + +func (b linearBuckets) Equal(rhs values.Value) bool { + if b.Type() != rhs.Type() { + return false + } + _, ok := rhs.(linearBuckets) + return ok +} + +func (b linearBuckets) HasSideEffect() bool { + return false +} + +func (b linearBuckets) Call(args values.Object) (values.Value, error) { + startV, ok := args.Get("start") + if !ok { + return nil, errors.New("start is required") + } + if startV.Type() != semantic.Float { + return nil, errors.New("start must be a float") + } + widthV, ok := args.Get("width") + if !ok { + return nil, errors.New("width is required") + } + if widthV.Type() != semantic.Float { + return nil, errors.New("width must be a float") + } + countV, ok := args.Get("count") + if !ok { + return nil, errors.New("count is required") + } + if countV.Type() != semantic.Int { + return nil, errors.New("count must be an int") + } + infV, ok := args.Get("infinity") + if !ok { + infV = values.NewBoolValue(true) + } + if infV.Type() != semantic.Bool { + return nil, errors.New("infinity must be a bool") + } + start := startV.Float() + width := widthV.Float() + count := countV.Int() + inf := infV.Bool() + l := int(count) + if inf { + l++ + } + elements := make([]values.Value, l) + bound := start + for i := 0; i < l; i++ { + elements[i] = values.NewFloatValue(bound) + bound += width + } + if inf { + elements[l-1] = values.NewFloatValue(math.Inf(1)) + } + counts := values.NewArrayWithBacking(semantic.Float, elements) + return counts, nil +} + +// logarithmicBuckets is a helper function for creating buckets spaced by an logarithmic factor. +type logarithmicBuckets struct{} + +func (b logarithmicBuckets) Type() semantic.Type { + return semantic.NewFunctionType(semantic.FunctionSignature{ + Params: map[string]semantic.Type{ + "start": semantic.Float, + "factor": semantic.Float, + "count": semantic.Int, + "infinity": semantic.Bool, + }, + ReturnType: semantic.String, + }) +} + +func (b logarithmicBuckets) Str() string { + panic(values.UnexpectedKind(semantic.String, semantic.Function)) +} + +func (b logarithmicBuckets) Int() int64 { + panic(values.UnexpectedKind(semantic.Int, semantic.Function)) +} + +func (b logarithmicBuckets) UInt() uint64 { + panic(values.UnexpectedKind(semantic.UInt, semantic.Function)) +} + +func (b logarithmicBuckets) Float() float64 { + panic(values.UnexpectedKind(semantic.Float, semantic.Function)) +} + +func (b logarithmicBuckets) Bool() bool { + panic(values.UnexpectedKind(semantic.Bool, semantic.Function)) +} + +func (b logarithmicBuckets) Time() values.Time { + panic(values.UnexpectedKind(semantic.Time, semantic.Function)) +} + +func (b logarithmicBuckets) Duration() values.Duration { + panic(values.UnexpectedKind(semantic.Duration, semantic.Function)) +} + +func (b logarithmicBuckets) Regexp() *regexp.Regexp { + panic(values.UnexpectedKind(semantic.Regexp, semantic.Function)) +} + +func (b logarithmicBuckets) Array() values.Array { + panic(values.UnexpectedKind(semantic.Array, semantic.Function)) +} + +func (b logarithmicBuckets) Object() values.Object { + panic(values.UnexpectedKind(semantic.Object, semantic.Function)) +} + +func (b logarithmicBuckets) Function() values.Function { + return b +} + +func (b logarithmicBuckets) Equal(rhs values.Value) bool { + if b.Type() != rhs.Type() { + return false + } + _, ok := rhs.(logarithmicBuckets) + return ok +} + +func (b logarithmicBuckets) HasSideEffect() bool { + return false +} + +func (b logarithmicBuckets) Call(args values.Object) (values.Value, error) { + startV, ok := args.Get("start") + if !ok { + return nil, errors.New("start is required") + } + if startV.Type() != semantic.Float { + return nil, errors.New("start must be a float") + } + factorV, ok := args.Get("factor") + if !ok { + return nil, errors.New("factor is required") + } + if factorV.Type() != semantic.Float { + return nil, errors.New("factor must be a float") + } + countV, ok := args.Get("count") + if !ok { + return nil, errors.New("count is required") + } + if countV.Type() != semantic.Int { + return nil, errors.New("count must be an int") + } + infV, ok := args.Get("infinity") + if !ok { + infV = values.NewBoolValue(true) + } + if infV.Type() != semantic.Bool { + return nil, errors.New("infinity must be a bool") + } + start := startV.Float() + factor := factorV.Float() + count := countV.Int() + inf := infV.Bool() + l := int(count) + if inf { + l++ + } + elements := make([]values.Value, l) + bound := start + for i := 0; i < l; i++ { + elements[i] = values.NewFloatValue(bound) + bound *= factor + } + if inf { + elements[l-1] = values.NewFloatValue(math.Inf(1)) + } + counts := values.NewArrayWithBacking(semantic.Float, elements) + return counts, nil +} diff --git a/query/functions/histogram_quantile.go b/query/functions/histogram_quantile.go index e41bc2ba50..4db1ab4a21 100644 --- a/query/functions/histogram_quantile.go +++ b/query/functions/histogram_quantile.go @@ -14,6 +14,8 @@ import ( const HistogramQuantileKind = "histogramQuantile" +const DefaultUpperBoundColumnLabel = "le" + type HistogramQuantileOpSpec struct { Quantile float64 `json:"quantile"` CountColumn string `json:"countColumn"` @@ -60,6 +62,8 @@ func createHistogramQuantileOpSpec(args query.Arguments, a *query.Administration return nil, err } else if ok { s.UpperBoundColumn = col + } else { + s.UpperBoundColumn = DefaultUpperBoundColumnLabel } if col, ok, err := args.GetString("valueColumn"); err != nil { diff --git a/query/functions/histogram_test.go b/query/functions/histogram_test.go new file mode 100644 index 0000000000..68a5fc09a3 --- /dev/null +++ b/query/functions/histogram_test.go @@ -0,0 +1,345 @@ +package functions_test + +import ( + "math" + "testing" + + "github.com/influxdata/platform/query" + "github.com/influxdata/platform/query/execute" + "github.com/influxdata/platform/query/execute/executetest" + "github.com/influxdata/platform/query/functions" + "github.com/influxdata/platform/query/querytest" +) + +func TestHistogramOperation_Marshaling(t *testing.T) { + data := []byte(`{"id":"histogram","kind":"histogram","spec":{"column":"_value"}}`) + op := &query.Operation{ + ID: "histogram", + Spec: &functions.HistogramOpSpec{ + Column: "_value", + }, + } + querytest.OperationMarshalingTestHelper(t, data, op) +} + +func TestHistogram_PassThrough(t *testing.T) { + executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation { + s := functions.NewHistogramTransformation( + d, + c, + &functions.HistogramProcedureSpec{}, + ) + return s + }) +} + +func TestHistogram_Process(t *testing.T) { + testCases := []struct { + name string + spec *functions.HistogramProcedureSpec + data []query.Table + want []*executetest.Table + }{ + { + name: "linear", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{0, 10, 20, 30, 40}, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 0.0, 0.0}, + {execute.Time(1), execute.Time(3), 10.0, 2.0}, + {execute.Time(1), execute.Time(3), 20.0, 4.0}, + {execute.Time(1), execute.Time(3), 30.0, 7.0}, + {execute.Time(1), execute.Time(3), 40.0, 10.0}, + }, + }}, + }, + { + name: "linear+infinity", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{0, 10, 20, 30, 40, math.Inf(1)}, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 68.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 0.0, 0.0}, + {execute.Time(1), execute.Time(3), 10.0, 2.0}, + {execute.Time(1), execute.Time(3), 20.0, 4.0}, + {execute.Time(1), execute.Time(3), 30.0, 7.0}, + {execute.Time(1), execute.Time(3), 40.0, 10.0}, + {execute.Time(1), execute.Time(3), math.Inf(1), 11.0}, + }, + }}, + }, + { + name: "linear+normalize", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{0, 10, 20, 30, 40}, + Normalize: true, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 0.0, 0.0}, + {execute.Time(1), execute.Time(3), 10.0, 0.2}, + {execute.Time(1), execute.Time(3), 20.0, 0.4}, + {execute.Time(1), execute.Time(3), 30.0, 0.7}, + {execute.Time(1), execute.Time(3), 40.0, 1.0}, + }, + }}, + }, + { + name: "logarithmic", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{1, 2, 4, 8, 16, 32, 64}, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 1.0, 0.0}, + {execute.Time(1), execute.Time(3), 2.0, 1.0}, + {execute.Time(1), execute.Time(3), 4.0, 1.0}, + {execute.Time(1), execute.Time(3), 8.0, 2.0}, + {execute.Time(1), execute.Time(3), 16.0, 3.0}, + {execute.Time(1), execute.Time(3), 32.0, 8.0}, + {execute.Time(1), execute.Time(3), 64.0, 10.0}, + }, + }}, + }, + { + name: "logarithmic unsorted", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{1, 64, 2, 4, 16, 8, 32}, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 1.0, 0.0}, + {execute.Time(1), execute.Time(3), 2.0, 1.0}, + {execute.Time(1), execute.Time(3), 4.0, 1.0}, + {execute.Time(1), execute.Time(3), 8.0, 2.0}, + {execute.Time(1), execute.Time(3), 16.0, 3.0}, + {execute.Time(1), execute.Time(3), 32.0, 8.0}, + {execute.Time(1), execute.Time(3), 64.0, 10.0}, + }, + }}, + }, + { + name: "fibonacci", + spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{ + Column: "_value", + UpperBoundColumn: "le", + CountColumn: "_value", + Buckets: []float64{1, 2, 3, 5, 8, 13, 21, 34, 55}, + }}, + data: []query.Table{&executetest.Table{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "_time", Type: query.TTime}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), execute.Time(1), 02.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 31.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 12.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 38.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 24.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 40.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 30.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 28.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 17.0}, + {execute.Time(1), execute.Time(3), execute.Time(2), 08.0}, + }, + }}, + want: []*executetest.Table{{ + KeyCols: []string{"_start", "_stop"}, + ColMeta: []query.ColMeta{ + {Label: "_start", Type: query.TTime}, + {Label: "_stop", Type: query.TTime}, + {Label: "le", Type: query.TFloat}, + {Label: "_value", Type: query.TFloat}, + }, + Data: [][]interface{}{ + {execute.Time(1), execute.Time(3), 1.0, 0.0}, + {execute.Time(1), execute.Time(3), 2.0, 1.0}, + {execute.Time(1), execute.Time(3), 3.0, 1.0}, + {execute.Time(1), execute.Time(3), 5.0, 1.0}, + {execute.Time(1), execute.Time(3), 8.0, 2.0}, + {execute.Time(1), execute.Time(3), 13.0, 3.0}, + {execute.Time(1), execute.Time(3), 21.0, 4.0}, + {execute.Time(1), execute.Time(3), 34.0, 8.0}, + {execute.Time(1), execute.Time(3), 55.0, 10.0}, + }, + }}, + }, + } + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + executetest.ProcessTestHelper( + t, + tc.data, + tc.want, + nil, + func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation { + return functions.NewHistogramTransformation(d, c, tc.spec) + }, + ) + }) + } +} diff --git a/query/interpreter/interpreter.go b/query/interpreter/interpreter.go index 32ad59dc0b..2bec88fe18 100644 --- a/query/interpreter/interpreter.go +++ b/query/interpreter/interpreter.go @@ -914,6 +914,16 @@ func ToStringArray(a values.Array) ([]string, error) { }) return strs, nil } +func ToFloatArray(a values.Array) ([]float64, error) { + if a.Type().ElementType() != semantic.Float { + return nil, fmt.Errorf("cannot convert array of %v to an array of floats", a.Type().ElementType()) + } + vs := make([]float64, a.Len()) + a.Range(func(i int, v values.Value) { + vs[i] = v.Float() + }) + return vs, nil +} // Arguments provides access to the keyword arguments passed to a function. // semantic.The Get{Type} methods return three values: the typed value of the arg,