Merged pull request #738 from influxdata/nc-histogram
feat: Add histogram function for computing bucket based histogramspull/10616/head
commit
35002fa90f
|
|
@ -1404,6 +1404,41 @@ from(bucket:"telegraf/autogen")
|
|||
r.service == "app-server")
|
||||
```
|
||||
|
||||
#### Histogram
|
||||
|
||||
Histogram approximates the cumulative distribution function of a dataset by counting data frequencies for a list of buckets.
|
||||
A bucket is defined by an upper bound where all data points that are less than or equal to the bound are counted in the bucket.
|
||||
The bucket counts are cumulative.
|
||||
|
||||
Each input table is converted into a single output table representing a single histogram.
|
||||
The output table will have a the same group key as the input table.
|
||||
The columns not part of the group key will be removed and an upper bound column and a count column will be added.
|
||||
|
||||
Histogram has the following properties:
|
||||
|
||||
* `column` string
|
||||
Column is the name of a column containing the input data values.
|
||||
The column type must be float.
|
||||
Defaults to `_value`.
|
||||
* `upperBoundColumn` string
|
||||
UpperBoundColumn is the name of the column in which to store the histogram upper bounds.
|
||||
Defaults to `le`.
|
||||
* `countColumn` string
|
||||
CountColumn is the name of the column in which to store the histogram counts.
|
||||
Defaults to `_value`.
|
||||
* `buckets` array of floats
|
||||
Buckets is a list of upper bounds to use when computing the histogram frequencies.
|
||||
Buckets should contain a bucket whose bound is the maximum value of the data set, this value can be set to positive infinity if no maximum is known.
|
||||
* `normalize` bool
|
||||
Normalize when true will convert the counts into frequencies values between 0 and 1.
|
||||
Normalized histograms cannot be aggregated by summing their counts.
|
||||
Defaults to `false`.
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
histogram(buckets:linearBuckets(start:0.0,width:10.0,count:10)) // compute the histogram of the data using 10 buckets from 0,10,20,...,100
|
||||
|
||||
#### HistogramQuantile
|
||||
|
||||
HistogramQuantile approximates a quantile given an histogram that approximates the cumulative distribution of the dataset.
|
||||
|
|
@ -1432,6 +1467,7 @@ HistogramQuantile has the following properties:
|
|||
* `upperBoundColumn` string
|
||||
UpperBoundColumn is the name of the column containing the histogram upper bounds.
|
||||
The upper bound column type must be float.
|
||||
Defaults to `le`.
|
||||
* `valueColumn` string
|
||||
ValueColumn is the name of the output column which will contain the computed quantile.
|
||||
Defaults to `_value`.
|
||||
|
|
@ -1444,7 +1480,39 @@ HistogramQuantile has the following properties:
|
|||
|
||||
Example:
|
||||
|
||||
histogramQuantile(quantile:0.9, upperBoundColumn:"le") // compute the 90th quantile using histogram data.
|
||||
histogramQuantile(quantile:0.9) // compute the 90th quantile using histogram data.
|
||||
|
||||
#### LinearBuckets
|
||||
|
||||
LinearBuckets produces a list of linearly separated floats.
|
||||
|
||||
LinearBuckets has the following properties:
|
||||
|
||||
* `start` float
|
||||
Start is the first value in the returned list.
|
||||
* `width` float
|
||||
Width is the distance between subsequent bucket values.
|
||||
* `count` int
|
||||
Count is the number of buckets to create.
|
||||
* `inifinity` bool
|
||||
Infinity when true adds an additional bucket with a value of positive infinity.
|
||||
Defaults to `true`.
|
||||
|
||||
#### LogrithmicBuckets
|
||||
|
||||
LogrithmicBuckets produces a list of exponentially separated floats.
|
||||
|
||||
LogrithmicBuckets has the following properties:
|
||||
|
||||
* `start` float
|
||||
Start is the first value in the returned bucket list.
|
||||
* `factor` float
|
||||
Factor is the multiplier applied to each subsequent bucket.
|
||||
* `count` int
|
||||
Count is the number of buckets to create.
|
||||
* `inifinity` bool
|
||||
Infinity when true adds an additional bucket with a value of positive infinity.
|
||||
Defaults to `true`.
|
||||
|
||||
#### Limit
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,464 @@
|
|||
package functions
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
|
||||
"github.com/influxdata/platform/query"
|
||||
"github.com/influxdata/platform/query/execute"
|
||||
"github.com/influxdata/platform/query/interpreter"
|
||||
"github.com/influxdata/platform/query/plan"
|
||||
"github.com/influxdata/platform/query/semantic"
|
||||
"github.com/influxdata/platform/query/values"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const HistogramKind = "histogram"
|
||||
|
||||
type HistogramOpSpec struct {
|
||||
Column string `json:"column"`
|
||||
UpperBoundColumn string `json:"upperBoundColumn"`
|
||||
CountColumn string `json:"countColumn"`
|
||||
Buckets []float64 `json:"buckets"`
|
||||
Normalize bool `json:"normalize"`
|
||||
}
|
||||
|
||||
var histogramSignature = execute.DefaultAggregateSignature()
|
||||
|
||||
func init() {
|
||||
histogramSignature.Params["column"] = semantic.String
|
||||
histogramSignature.Params["upperBoundColumn"] = semantic.String
|
||||
histogramSignature.Params["buckets"] = semantic.NewArrayType(semantic.Float)
|
||||
histogramSignature.Params["normalize"] = semantic.Bool
|
||||
|
||||
query.RegisterFunction(HistogramKind, createHistogramOpSpec, histogramSignature)
|
||||
query.RegisterBuiltInValue("linearBuckets", linearBuckets{})
|
||||
query.RegisterBuiltInValue("logarithmicBuckets", logarithmicBuckets{})
|
||||
query.RegisterOpSpec(HistogramKind, newHistogramOp)
|
||||
plan.RegisterProcedureSpec(HistogramKind, newHistogramProcedure, HistogramKind)
|
||||
execute.RegisterTransformation(HistogramKind, createHistogramTransformation)
|
||||
}
|
||||
|
||||
func createHistogramOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
|
||||
if err := a.AddParentFromArgs(args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
spec := new(HistogramOpSpec)
|
||||
|
||||
if col, ok, err := args.GetString("column"); err != nil {
|
||||
return nil, err
|
||||
} else if ok {
|
||||
spec.Column = col
|
||||
} else {
|
||||
spec.Column = execute.DefaultValueColLabel
|
||||
}
|
||||
if col, ok, err := args.GetString("upperBoundColumn"); err != nil {
|
||||
return nil, err
|
||||
} else if ok {
|
||||
spec.UpperBoundColumn = col
|
||||
} else {
|
||||
spec.UpperBoundColumn = DefaultUpperBoundColumnLabel
|
||||
}
|
||||
if col, ok, err := args.GetString("countColumn"); err != nil {
|
||||
return nil, err
|
||||
} else if ok {
|
||||
spec.CountColumn = col
|
||||
} else {
|
||||
spec.CountColumn = execute.DefaultValueColLabel
|
||||
}
|
||||
bucketsArry, err := args.GetRequiredArray("buckets", semantic.Float)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
spec.Buckets, err = interpreter.ToFloatArray(bucketsArry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if normalize, ok, err := args.GetBool("normalize"); err != nil {
|
||||
return nil, err
|
||||
} else if ok {
|
||||
spec.Normalize = normalize
|
||||
}
|
||||
|
||||
return spec, nil
|
||||
}
|
||||
|
||||
func newHistogramOp() query.OperationSpec {
|
||||
return new(HistogramOpSpec)
|
||||
}
|
||||
|
||||
func (s *HistogramOpSpec) Kind() query.OperationKind {
|
||||
return HistogramKind
|
||||
}
|
||||
|
||||
type HistogramProcedureSpec struct {
|
||||
HistogramOpSpec
|
||||
}
|
||||
|
||||
func newHistogramProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
|
||||
spec, ok := qs.(*HistogramOpSpec)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid spec type %T", qs)
|
||||
}
|
||||
|
||||
return &HistogramProcedureSpec{
|
||||
HistogramOpSpec: *spec,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *HistogramProcedureSpec) Kind() plan.ProcedureKind {
|
||||
return HistogramKind
|
||||
}
|
||||
func (s *HistogramProcedureSpec) Copy() plan.ProcedureSpec {
|
||||
ns := new(HistogramProcedureSpec)
|
||||
*ns = *s
|
||||
if len(s.Buckets) > 0 {
|
||||
ns.Buckets = make([]float64, len(s.Buckets))
|
||||
copy(ns.Buckets, s.Buckets)
|
||||
}
|
||||
return ns
|
||||
}
|
||||
|
||||
func createHistogramTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
|
||||
s, ok := spec.(*HistogramProcedureSpec)
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
|
||||
}
|
||||
cache := execute.NewTableBuilderCache(a.Allocator())
|
||||
d := execute.NewDataset(id, mode, cache)
|
||||
t := NewHistogramTransformation(d, cache, s)
|
||||
return t, d, nil
|
||||
}
|
||||
|
||||
type histogramTransformation struct {
|
||||
d execute.Dataset
|
||||
cache execute.TableBuilderCache
|
||||
|
||||
spec HistogramProcedureSpec
|
||||
}
|
||||
|
||||
func NewHistogramTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *HistogramProcedureSpec) *histogramTransformation {
|
||||
sort.Float64s(spec.Buckets)
|
||||
return &histogramTransformation{
|
||||
d: d,
|
||||
cache: cache,
|
||||
spec: *spec,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *histogramTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error {
|
||||
return t.d.RetractTable(key)
|
||||
}
|
||||
|
||||
func (t *histogramTransformation) Process(id execute.DatasetID, tbl query.Table) error {
|
||||
builder, created := t.cache.TableBuilder(tbl.Key())
|
||||
if !created {
|
||||
return fmt.Errorf("histogram found duplicate table with key: %v", tbl.Key())
|
||||
}
|
||||
valueIdx := execute.ColIdx(t.spec.Column, tbl.Cols())
|
||||
if valueIdx < 0 {
|
||||
return fmt.Errorf("column %q is missing", t.spec.Column)
|
||||
}
|
||||
if col := tbl.Cols()[valueIdx]; col.Type != query.TFloat {
|
||||
return fmt.Errorf("column %q must be a float got %v", t.spec.Column, col.Type)
|
||||
}
|
||||
|
||||
execute.AddTableKeyCols(tbl.Key(), builder)
|
||||
boundIdx := builder.AddCol(query.ColMeta{
|
||||
Label: t.spec.UpperBoundColumn,
|
||||
Type: query.TFloat,
|
||||
})
|
||||
countIdx := builder.AddCol(query.ColMeta{
|
||||
Label: t.spec.CountColumn,
|
||||
Type: query.TFloat,
|
||||
})
|
||||
totalRows := 0.0
|
||||
counts := make([]float64, len(t.spec.Buckets))
|
||||
err := tbl.Do(func(cr query.ColReader) error {
|
||||
totalRows += float64(cr.Len())
|
||||
for _, v := range cr.Floats(valueIdx) {
|
||||
idx := sort.Search(len(t.spec.Buckets), func(i int) bool {
|
||||
return v <= t.spec.Buckets[i]
|
||||
})
|
||||
if idx >= len(t.spec.Buckets) {
|
||||
// Greater than highest bucket, or not found
|
||||
return fmt.Errorf("found value greater than any bucket, %d %d %f %f", idx, len(t.spec.Buckets), v, t.spec.Buckets[len(t.spec.Buckets)-1])
|
||||
}
|
||||
// Increment counter
|
||||
counts[idx]++
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add records making counts cumulative
|
||||
total := 0.0
|
||||
for i, v := range counts {
|
||||
execute.AppendKeyValues(tbl.Key(), builder)
|
||||
count := v + total
|
||||
if t.spec.Normalize {
|
||||
count /= totalRows
|
||||
}
|
||||
builder.AppendFloat(countIdx, count)
|
||||
builder.AppendFloat(boundIdx, t.spec.Buckets[i])
|
||||
total += v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *histogramTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
|
||||
return t.d.UpdateWatermark(mark)
|
||||
}
|
||||
func (t *histogramTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
|
||||
return t.d.UpdateProcessingTime(pt)
|
||||
}
|
||||
func (t *histogramTransformation) Finish(id execute.DatasetID, err error) {
|
||||
t.d.Finish(err)
|
||||
}
|
||||
|
||||
// linearBuckets is a helper function for creating buckets spaced linearly
|
||||
type linearBuckets struct{}
|
||||
|
||||
func (b linearBuckets) Type() semantic.Type {
|
||||
return semantic.NewFunctionType(semantic.FunctionSignature{
|
||||
Params: map[string]semantic.Type{
|
||||
"start": semantic.Float,
|
||||
"width": semantic.Float,
|
||||
"count": semantic.Int,
|
||||
"infinity": semantic.Bool,
|
||||
},
|
||||
ReturnType: semantic.String,
|
||||
})
|
||||
}
|
||||
|
||||
func (b linearBuckets) Str() string {
|
||||
panic(values.UnexpectedKind(semantic.String, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Int() int64 {
|
||||
panic(values.UnexpectedKind(semantic.Int, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) UInt() uint64 {
|
||||
panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Float() float64 {
|
||||
panic(values.UnexpectedKind(semantic.Float, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Bool() bool {
|
||||
panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Time() values.Time {
|
||||
panic(values.UnexpectedKind(semantic.Time, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Duration() values.Duration {
|
||||
panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Regexp() *regexp.Regexp {
|
||||
panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Array() values.Array {
|
||||
panic(values.UnexpectedKind(semantic.Array, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Object() values.Object {
|
||||
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
|
||||
}
|
||||
|
||||
func (b linearBuckets) Function() values.Function {
|
||||
return b
|
||||
}
|
||||
|
||||
func (b linearBuckets) Equal(rhs values.Value) bool {
|
||||
if b.Type() != rhs.Type() {
|
||||
return false
|
||||
}
|
||||
_, ok := rhs.(linearBuckets)
|
||||
return ok
|
||||
}
|
||||
|
||||
func (b linearBuckets) HasSideEffect() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (b linearBuckets) Call(args values.Object) (values.Value, error) {
|
||||
startV, ok := args.Get("start")
|
||||
if !ok {
|
||||
return nil, errors.New("start is required")
|
||||
}
|
||||
if startV.Type() != semantic.Float {
|
||||
return nil, errors.New("start must be a float")
|
||||
}
|
||||
widthV, ok := args.Get("width")
|
||||
if !ok {
|
||||
return nil, errors.New("width is required")
|
||||
}
|
||||
if widthV.Type() != semantic.Float {
|
||||
return nil, errors.New("width must be a float")
|
||||
}
|
||||
countV, ok := args.Get("count")
|
||||
if !ok {
|
||||
return nil, errors.New("count is required")
|
||||
}
|
||||
if countV.Type() != semantic.Int {
|
||||
return nil, errors.New("count must be an int")
|
||||
}
|
||||
infV, ok := args.Get("infinity")
|
||||
if !ok {
|
||||
infV = values.NewBoolValue(true)
|
||||
}
|
||||
if infV.Type() != semantic.Bool {
|
||||
return nil, errors.New("infinity must be a bool")
|
||||
}
|
||||
start := startV.Float()
|
||||
width := widthV.Float()
|
||||
count := countV.Int()
|
||||
inf := infV.Bool()
|
||||
l := int(count)
|
||||
if inf {
|
||||
l++
|
||||
}
|
||||
elements := make([]values.Value, l)
|
||||
bound := start
|
||||
for i := 0; i < l; i++ {
|
||||
elements[i] = values.NewFloatValue(bound)
|
||||
bound += width
|
||||
}
|
||||
if inf {
|
||||
elements[l-1] = values.NewFloatValue(math.Inf(1))
|
||||
}
|
||||
counts := values.NewArrayWithBacking(semantic.Float, elements)
|
||||
return counts, nil
|
||||
}
|
||||
|
||||
// logarithmicBuckets is a helper function for creating buckets spaced by an logarithmic factor.
|
||||
type logarithmicBuckets struct{}
|
||||
|
||||
func (b logarithmicBuckets) Type() semantic.Type {
|
||||
return semantic.NewFunctionType(semantic.FunctionSignature{
|
||||
Params: map[string]semantic.Type{
|
||||
"start": semantic.Float,
|
||||
"factor": semantic.Float,
|
||||
"count": semantic.Int,
|
||||
"infinity": semantic.Bool,
|
||||
},
|
||||
ReturnType: semantic.String,
|
||||
})
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Str() string {
|
||||
panic(values.UnexpectedKind(semantic.String, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Int() int64 {
|
||||
panic(values.UnexpectedKind(semantic.Int, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) UInt() uint64 {
|
||||
panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Float() float64 {
|
||||
panic(values.UnexpectedKind(semantic.Float, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Bool() bool {
|
||||
panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Time() values.Time {
|
||||
panic(values.UnexpectedKind(semantic.Time, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Duration() values.Duration {
|
||||
panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Regexp() *regexp.Regexp {
|
||||
panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Array() values.Array {
|
||||
panic(values.UnexpectedKind(semantic.Array, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Object() values.Object {
|
||||
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Function() values.Function {
|
||||
return b
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Equal(rhs values.Value) bool {
|
||||
if b.Type() != rhs.Type() {
|
||||
return false
|
||||
}
|
||||
_, ok := rhs.(logarithmicBuckets)
|
||||
return ok
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) HasSideEffect() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (b logarithmicBuckets) Call(args values.Object) (values.Value, error) {
|
||||
startV, ok := args.Get("start")
|
||||
if !ok {
|
||||
return nil, errors.New("start is required")
|
||||
}
|
||||
if startV.Type() != semantic.Float {
|
||||
return nil, errors.New("start must be a float")
|
||||
}
|
||||
factorV, ok := args.Get("factor")
|
||||
if !ok {
|
||||
return nil, errors.New("factor is required")
|
||||
}
|
||||
if factorV.Type() != semantic.Float {
|
||||
return nil, errors.New("factor must be a float")
|
||||
}
|
||||
countV, ok := args.Get("count")
|
||||
if !ok {
|
||||
return nil, errors.New("count is required")
|
||||
}
|
||||
if countV.Type() != semantic.Int {
|
||||
return nil, errors.New("count must be an int")
|
||||
}
|
||||
infV, ok := args.Get("infinity")
|
||||
if !ok {
|
||||
infV = values.NewBoolValue(true)
|
||||
}
|
||||
if infV.Type() != semantic.Bool {
|
||||
return nil, errors.New("infinity must be a bool")
|
||||
}
|
||||
start := startV.Float()
|
||||
factor := factorV.Float()
|
||||
count := countV.Int()
|
||||
inf := infV.Bool()
|
||||
l := int(count)
|
||||
if inf {
|
||||
l++
|
||||
}
|
||||
elements := make([]values.Value, l)
|
||||
bound := start
|
||||
for i := 0; i < l; i++ {
|
||||
elements[i] = values.NewFloatValue(bound)
|
||||
bound *= factor
|
||||
}
|
||||
if inf {
|
||||
elements[l-1] = values.NewFloatValue(math.Inf(1))
|
||||
}
|
||||
counts := values.NewArrayWithBacking(semantic.Float, elements)
|
||||
return counts, nil
|
||||
}
|
||||
|
|
@ -14,6 +14,8 @@ import (
|
|||
|
||||
const HistogramQuantileKind = "histogramQuantile"
|
||||
|
||||
const DefaultUpperBoundColumnLabel = "le"
|
||||
|
||||
type HistogramQuantileOpSpec struct {
|
||||
Quantile float64 `json:"quantile"`
|
||||
CountColumn string `json:"countColumn"`
|
||||
|
|
@ -60,6 +62,8 @@ func createHistogramQuantileOpSpec(args query.Arguments, a *query.Administration
|
|||
return nil, err
|
||||
} else if ok {
|
||||
s.UpperBoundColumn = col
|
||||
} else {
|
||||
s.UpperBoundColumn = DefaultUpperBoundColumnLabel
|
||||
}
|
||||
|
||||
if col, ok, err := args.GetString("valueColumn"); err != nil {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,345 @@
|
|||
package functions_test
|
||||
|
||||
import (
|
||||
"math"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/platform/query"
|
||||
"github.com/influxdata/platform/query/execute"
|
||||
"github.com/influxdata/platform/query/execute/executetest"
|
||||
"github.com/influxdata/platform/query/functions"
|
||||
"github.com/influxdata/platform/query/querytest"
|
||||
)
|
||||
|
||||
func TestHistogramOperation_Marshaling(t *testing.T) {
|
||||
data := []byte(`{"id":"histogram","kind":"histogram","spec":{"column":"_value"}}`)
|
||||
op := &query.Operation{
|
||||
ID: "histogram",
|
||||
Spec: &functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
},
|
||||
}
|
||||
querytest.OperationMarshalingTestHelper(t, data, op)
|
||||
}
|
||||
|
||||
func TestHistogram_PassThrough(t *testing.T) {
|
||||
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
|
||||
s := functions.NewHistogramTransformation(
|
||||
d,
|
||||
c,
|
||||
&functions.HistogramProcedureSpec{},
|
||||
)
|
||||
return s
|
||||
})
|
||||
}
|
||||
|
||||
func TestHistogram_Process(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
spec *functions.HistogramProcedureSpec
|
||||
data []query.Table
|
||||
want []*executetest.Table
|
||||
}{
|
||||
{
|
||||
name: "linear",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{0, 10, 20, 30, 40},
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 0.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 10.0, 2.0},
|
||||
{execute.Time(1), execute.Time(3), 20.0, 4.0},
|
||||
{execute.Time(1), execute.Time(3), 30.0, 7.0},
|
||||
{execute.Time(1), execute.Time(3), 40.0, 10.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "linear+infinity",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{0, 10, 20, 30, 40, math.Inf(1)},
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 68.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 0.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 10.0, 2.0},
|
||||
{execute.Time(1), execute.Time(3), 20.0, 4.0},
|
||||
{execute.Time(1), execute.Time(3), 30.0, 7.0},
|
||||
{execute.Time(1), execute.Time(3), 40.0, 10.0},
|
||||
{execute.Time(1), execute.Time(3), math.Inf(1), 11.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "linear+normalize",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{0, 10, 20, 30, 40},
|
||||
Normalize: true,
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 0.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 10.0, 0.2},
|
||||
{execute.Time(1), execute.Time(3), 20.0, 0.4},
|
||||
{execute.Time(1), execute.Time(3), 30.0, 0.7},
|
||||
{execute.Time(1), execute.Time(3), 40.0, 1.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "logarithmic",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{1, 2, 4, 8, 16, 32, 64},
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 1.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 2.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 4.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 8.0, 2.0},
|
||||
{execute.Time(1), execute.Time(3), 16.0, 3.0},
|
||||
{execute.Time(1), execute.Time(3), 32.0, 8.0},
|
||||
{execute.Time(1), execute.Time(3), 64.0, 10.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "logarithmic unsorted",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{1, 64, 2, 4, 16, 8, 32},
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 1.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 2.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 4.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 8.0, 2.0},
|
||||
{execute.Time(1), execute.Time(3), 16.0, 3.0},
|
||||
{execute.Time(1), execute.Time(3), 32.0, 8.0},
|
||||
{execute.Time(1), execute.Time(3), 64.0, 10.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
{
|
||||
name: "fibonacci",
|
||||
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
|
||||
Column: "_value",
|
||||
UpperBoundColumn: "le",
|
||||
CountColumn: "_value",
|
||||
Buckets: []float64{1, 2, 3, 5, 8, 13, 21, 34, 55},
|
||||
}},
|
||||
data: []query.Table{&executetest.Table{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "_time", Type: query.TTime},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
|
||||
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
|
||||
},
|
||||
}},
|
||||
want: []*executetest.Table{{
|
||||
KeyCols: []string{"_start", "_stop"},
|
||||
ColMeta: []query.ColMeta{
|
||||
{Label: "_start", Type: query.TTime},
|
||||
{Label: "_stop", Type: query.TTime},
|
||||
{Label: "le", Type: query.TFloat},
|
||||
{Label: "_value", Type: query.TFloat},
|
||||
},
|
||||
Data: [][]interface{}{
|
||||
{execute.Time(1), execute.Time(3), 1.0, 0.0},
|
||||
{execute.Time(1), execute.Time(3), 2.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 3.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 5.0, 1.0},
|
||||
{execute.Time(1), execute.Time(3), 8.0, 2.0},
|
||||
{execute.Time(1), execute.Time(3), 13.0, 3.0},
|
||||
{execute.Time(1), execute.Time(3), 21.0, 4.0},
|
||||
{execute.Time(1), execute.Time(3), 34.0, 8.0},
|
||||
{execute.Time(1), execute.Time(3), 55.0, 10.0},
|
||||
},
|
||||
}},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
executetest.ProcessTestHelper(
|
||||
t,
|
||||
tc.data,
|
||||
tc.want,
|
||||
nil,
|
||||
func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
|
||||
return functions.NewHistogramTransformation(d, c, tc.spec)
|
||||
},
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -914,6 +914,16 @@ func ToStringArray(a values.Array) ([]string, error) {
|
|||
})
|
||||
return strs, nil
|
||||
}
|
||||
func ToFloatArray(a values.Array) ([]float64, error) {
|
||||
if a.Type().ElementType() != semantic.Float {
|
||||
return nil, fmt.Errorf("cannot convert array of %v to an array of floats", a.Type().ElementType())
|
||||
}
|
||||
vs := make([]float64, a.Len())
|
||||
a.Range(func(i int, v values.Value) {
|
||||
vs[i] = v.Float()
|
||||
})
|
||||
return vs, nil
|
||||
}
|
||||
|
||||
// Arguments provides access to the keyword arguments passed to a function.
|
||||
// semantic.The Get{Type} methods return three values: the typed value of the arg,
|
||||
|
|
|
|||
Loading…
Reference in New Issue