Merged pull request #738 from influxdata/nc-histogram

feat: Add histogram function for computing bucket based histograms
pull/10616/head
Nathaniel Cook 2018-09-04 09:51:32 -06:00
commit 35002fa90f
5 changed files with 892 additions and 1 deletions

View File

@ -1404,6 +1404,41 @@ from(bucket:"telegraf/autogen")
r.service == "app-server")
```
#### Histogram
Histogram approximates the cumulative distribution function of a dataset by counting data frequencies for a list of buckets.
A bucket is defined by an upper bound where all data points that are less than or equal to the bound are counted in the bucket.
The bucket counts are cumulative.
Each input table is converted into a single output table representing a single histogram.
The output table will have a the same group key as the input table.
The columns not part of the group key will be removed and an upper bound column and a count column will be added.
Histogram has the following properties:
* `column` string
Column is the name of a column containing the input data values.
The column type must be float.
Defaults to `_value`.
* `upperBoundColumn` string
UpperBoundColumn is the name of the column in which to store the histogram upper bounds.
Defaults to `le`.
* `countColumn` string
CountColumn is the name of the column in which to store the histogram counts.
Defaults to `_value`.
* `buckets` array of floats
Buckets is a list of upper bounds to use when computing the histogram frequencies.
Buckets should contain a bucket whose bound is the maximum value of the data set, this value can be set to positive infinity if no maximum is known.
* `normalize` bool
Normalize when true will convert the counts into frequencies values between 0 and 1.
Normalized histograms cannot be aggregated by summing their counts.
Defaults to `false`.
Example:
histogram(buckets:linearBuckets(start:0.0,width:10.0,count:10)) // compute the histogram of the data using 10 buckets from 0,10,20,...,100
#### HistogramQuantile
HistogramQuantile approximates a quantile given an histogram that approximates the cumulative distribution of the dataset.
@ -1432,6 +1467,7 @@ HistogramQuantile has the following properties:
* `upperBoundColumn` string
UpperBoundColumn is the name of the column containing the histogram upper bounds.
The upper bound column type must be float.
Defaults to `le`.
* `valueColumn` string
ValueColumn is the name of the output column which will contain the computed quantile.
Defaults to `_value`.
@ -1444,7 +1480,39 @@ HistogramQuantile has the following properties:
Example:
histogramQuantile(quantile:0.9, upperBoundColumn:"le") // compute the 90th quantile using histogram data.
histogramQuantile(quantile:0.9) // compute the 90th quantile using histogram data.
#### LinearBuckets
LinearBuckets produces a list of linearly separated floats.
LinearBuckets has the following properties:
* `start` float
Start is the first value in the returned list.
* `width` float
Width is the distance between subsequent bucket values.
* `count` int
Count is the number of buckets to create.
* `inifinity` bool
Infinity when true adds an additional bucket with a value of positive infinity.
Defaults to `true`.
#### LogrithmicBuckets
LogrithmicBuckets produces a list of exponentially separated floats.
LogrithmicBuckets has the following properties:
* `start` float
Start is the first value in the returned bucket list.
* `factor` float
Factor is the multiplier applied to each subsequent bucket.
* `count` int
Count is the number of buckets to create.
* `inifinity` bool
Infinity when true adds an additional bucket with a value of positive infinity.
Defaults to `true`.
#### Limit

View File

@ -0,0 +1,464 @@
package functions
import (
"fmt"
"math"
"regexp"
"sort"
"github.com/influxdata/platform/query"
"github.com/influxdata/platform/query/execute"
"github.com/influxdata/platform/query/interpreter"
"github.com/influxdata/platform/query/plan"
"github.com/influxdata/platform/query/semantic"
"github.com/influxdata/platform/query/values"
"github.com/pkg/errors"
)
const HistogramKind = "histogram"
type HistogramOpSpec struct {
Column string `json:"column"`
UpperBoundColumn string `json:"upperBoundColumn"`
CountColumn string `json:"countColumn"`
Buckets []float64 `json:"buckets"`
Normalize bool `json:"normalize"`
}
var histogramSignature = execute.DefaultAggregateSignature()
func init() {
histogramSignature.Params["column"] = semantic.String
histogramSignature.Params["upperBoundColumn"] = semantic.String
histogramSignature.Params["buckets"] = semantic.NewArrayType(semantic.Float)
histogramSignature.Params["normalize"] = semantic.Bool
query.RegisterFunction(HistogramKind, createHistogramOpSpec, histogramSignature)
query.RegisterBuiltInValue("linearBuckets", linearBuckets{})
query.RegisterBuiltInValue("logarithmicBuckets", logarithmicBuckets{})
query.RegisterOpSpec(HistogramKind, newHistogramOp)
plan.RegisterProcedureSpec(HistogramKind, newHistogramProcedure, HistogramKind)
execute.RegisterTransformation(HistogramKind, createHistogramTransformation)
}
func createHistogramOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(HistogramOpSpec)
if col, ok, err := args.GetString("column"); err != nil {
return nil, err
} else if ok {
spec.Column = col
} else {
spec.Column = execute.DefaultValueColLabel
}
if col, ok, err := args.GetString("upperBoundColumn"); err != nil {
return nil, err
} else if ok {
spec.UpperBoundColumn = col
} else {
spec.UpperBoundColumn = DefaultUpperBoundColumnLabel
}
if col, ok, err := args.GetString("countColumn"); err != nil {
return nil, err
} else if ok {
spec.CountColumn = col
} else {
spec.CountColumn = execute.DefaultValueColLabel
}
bucketsArry, err := args.GetRequiredArray("buckets", semantic.Float)
if err != nil {
return nil, err
}
spec.Buckets, err = interpreter.ToFloatArray(bucketsArry)
if err != nil {
return nil, err
}
if normalize, ok, err := args.GetBool("normalize"); err != nil {
return nil, err
} else if ok {
spec.Normalize = normalize
}
return spec, nil
}
func newHistogramOp() query.OperationSpec {
return new(HistogramOpSpec)
}
func (s *HistogramOpSpec) Kind() query.OperationKind {
return HistogramKind
}
type HistogramProcedureSpec struct {
HistogramOpSpec
}
func newHistogramProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*HistogramOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &HistogramProcedureSpec{
HistogramOpSpec: *spec,
}, nil
}
func (s *HistogramProcedureSpec) Kind() plan.ProcedureKind {
return HistogramKind
}
func (s *HistogramProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(HistogramProcedureSpec)
*ns = *s
if len(s.Buckets) > 0 {
ns.Buckets = make([]float64, len(s.Buckets))
copy(ns.Buckets, s.Buckets)
}
return ns
}
func createHistogramTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*HistogramProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewTableBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewHistogramTransformation(d, cache, s)
return t, d, nil
}
type histogramTransformation struct {
d execute.Dataset
cache execute.TableBuilderCache
spec HistogramProcedureSpec
}
func NewHistogramTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *HistogramProcedureSpec) *histogramTransformation {
sort.Float64s(spec.Buckets)
return &histogramTransformation{
d: d,
cache: cache,
spec: *spec,
}
}
func (t *histogramTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error {
return t.d.RetractTable(key)
}
func (t *histogramTransformation) Process(id execute.DatasetID, tbl query.Table) error {
builder, created := t.cache.TableBuilder(tbl.Key())
if !created {
return fmt.Errorf("histogram found duplicate table with key: %v", tbl.Key())
}
valueIdx := execute.ColIdx(t.spec.Column, tbl.Cols())
if valueIdx < 0 {
return fmt.Errorf("column %q is missing", t.spec.Column)
}
if col := tbl.Cols()[valueIdx]; col.Type != query.TFloat {
return fmt.Errorf("column %q must be a float got %v", t.spec.Column, col.Type)
}
execute.AddTableKeyCols(tbl.Key(), builder)
boundIdx := builder.AddCol(query.ColMeta{
Label: t.spec.UpperBoundColumn,
Type: query.TFloat,
})
countIdx := builder.AddCol(query.ColMeta{
Label: t.spec.CountColumn,
Type: query.TFloat,
})
totalRows := 0.0
counts := make([]float64, len(t.spec.Buckets))
err := tbl.Do(func(cr query.ColReader) error {
totalRows += float64(cr.Len())
for _, v := range cr.Floats(valueIdx) {
idx := sort.Search(len(t.spec.Buckets), func(i int) bool {
return v <= t.spec.Buckets[i]
})
if idx >= len(t.spec.Buckets) {
// Greater than highest bucket, or not found
return fmt.Errorf("found value greater than any bucket, %d %d %f %f", idx, len(t.spec.Buckets), v, t.spec.Buckets[len(t.spec.Buckets)-1])
}
// Increment counter
counts[idx]++
}
return nil
})
if err != nil {
return err
}
// Add records making counts cumulative
total := 0.0
for i, v := range counts {
execute.AppendKeyValues(tbl.Key(), builder)
count := v + total
if t.spec.Normalize {
count /= totalRows
}
builder.AppendFloat(countIdx, count)
builder.AppendFloat(boundIdx, t.spec.Buckets[i])
total += v
}
return nil
}
func (t *histogramTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *histogramTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *histogramTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}
// linearBuckets is a helper function for creating buckets spaced linearly
type linearBuckets struct{}
func (b linearBuckets) Type() semantic.Type {
return semantic.NewFunctionType(semantic.FunctionSignature{
Params: map[string]semantic.Type{
"start": semantic.Float,
"width": semantic.Float,
"count": semantic.Int,
"infinity": semantic.Bool,
},
ReturnType: semantic.String,
})
}
func (b linearBuckets) Str() string {
panic(values.UnexpectedKind(semantic.String, semantic.Function))
}
func (b linearBuckets) Int() int64 {
panic(values.UnexpectedKind(semantic.Int, semantic.Function))
}
func (b linearBuckets) UInt() uint64 {
panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
}
func (b linearBuckets) Float() float64 {
panic(values.UnexpectedKind(semantic.Float, semantic.Function))
}
func (b linearBuckets) Bool() bool {
panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
}
func (b linearBuckets) Time() values.Time {
panic(values.UnexpectedKind(semantic.Time, semantic.Function))
}
func (b linearBuckets) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
}
func (b linearBuckets) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
}
func (b linearBuckets) Array() values.Array {
panic(values.UnexpectedKind(semantic.Array, semantic.Function))
}
func (b linearBuckets) Object() values.Object {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
}
func (b linearBuckets) Function() values.Function {
return b
}
func (b linearBuckets) Equal(rhs values.Value) bool {
if b.Type() != rhs.Type() {
return false
}
_, ok := rhs.(linearBuckets)
return ok
}
func (b linearBuckets) HasSideEffect() bool {
return false
}
func (b linearBuckets) Call(args values.Object) (values.Value, error) {
startV, ok := args.Get("start")
if !ok {
return nil, errors.New("start is required")
}
if startV.Type() != semantic.Float {
return nil, errors.New("start must be a float")
}
widthV, ok := args.Get("width")
if !ok {
return nil, errors.New("width is required")
}
if widthV.Type() != semantic.Float {
return nil, errors.New("width must be a float")
}
countV, ok := args.Get("count")
if !ok {
return nil, errors.New("count is required")
}
if countV.Type() != semantic.Int {
return nil, errors.New("count must be an int")
}
infV, ok := args.Get("infinity")
if !ok {
infV = values.NewBoolValue(true)
}
if infV.Type() != semantic.Bool {
return nil, errors.New("infinity must be a bool")
}
start := startV.Float()
width := widthV.Float()
count := countV.Int()
inf := infV.Bool()
l := int(count)
if inf {
l++
}
elements := make([]values.Value, l)
bound := start
for i := 0; i < l; i++ {
elements[i] = values.NewFloatValue(bound)
bound += width
}
if inf {
elements[l-1] = values.NewFloatValue(math.Inf(1))
}
counts := values.NewArrayWithBacking(semantic.Float, elements)
return counts, nil
}
// logarithmicBuckets is a helper function for creating buckets spaced by an logarithmic factor.
type logarithmicBuckets struct{}
func (b logarithmicBuckets) Type() semantic.Type {
return semantic.NewFunctionType(semantic.FunctionSignature{
Params: map[string]semantic.Type{
"start": semantic.Float,
"factor": semantic.Float,
"count": semantic.Int,
"infinity": semantic.Bool,
},
ReturnType: semantic.String,
})
}
func (b logarithmicBuckets) Str() string {
panic(values.UnexpectedKind(semantic.String, semantic.Function))
}
func (b logarithmicBuckets) Int() int64 {
panic(values.UnexpectedKind(semantic.Int, semantic.Function))
}
func (b logarithmicBuckets) UInt() uint64 {
panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
}
func (b logarithmicBuckets) Float() float64 {
panic(values.UnexpectedKind(semantic.Float, semantic.Function))
}
func (b logarithmicBuckets) Bool() bool {
panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
}
func (b logarithmicBuckets) Time() values.Time {
panic(values.UnexpectedKind(semantic.Time, semantic.Function))
}
func (b logarithmicBuckets) Duration() values.Duration {
panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
}
func (b logarithmicBuckets) Regexp() *regexp.Regexp {
panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
}
func (b logarithmicBuckets) Array() values.Array {
panic(values.UnexpectedKind(semantic.Array, semantic.Function))
}
func (b logarithmicBuckets) Object() values.Object {
panic(values.UnexpectedKind(semantic.Object, semantic.Function))
}
func (b logarithmicBuckets) Function() values.Function {
return b
}
func (b logarithmicBuckets) Equal(rhs values.Value) bool {
if b.Type() != rhs.Type() {
return false
}
_, ok := rhs.(logarithmicBuckets)
return ok
}
func (b logarithmicBuckets) HasSideEffect() bool {
return false
}
func (b logarithmicBuckets) Call(args values.Object) (values.Value, error) {
startV, ok := args.Get("start")
if !ok {
return nil, errors.New("start is required")
}
if startV.Type() != semantic.Float {
return nil, errors.New("start must be a float")
}
factorV, ok := args.Get("factor")
if !ok {
return nil, errors.New("factor is required")
}
if factorV.Type() != semantic.Float {
return nil, errors.New("factor must be a float")
}
countV, ok := args.Get("count")
if !ok {
return nil, errors.New("count is required")
}
if countV.Type() != semantic.Int {
return nil, errors.New("count must be an int")
}
infV, ok := args.Get("infinity")
if !ok {
infV = values.NewBoolValue(true)
}
if infV.Type() != semantic.Bool {
return nil, errors.New("infinity must be a bool")
}
start := startV.Float()
factor := factorV.Float()
count := countV.Int()
inf := infV.Bool()
l := int(count)
if inf {
l++
}
elements := make([]values.Value, l)
bound := start
for i := 0; i < l; i++ {
elements[i] = values.NewFloatValue(bound)
bound *= factor
}
if inf {
elements[l-1] = values.NewFloatValue(math.Inf(1))
}
counts := values.NewArrayWithBacking(semantic.Float, elements)
return counts, nil
}

View File

@ -14,6 +14,8 @@ import (
const HistogramQuantileKind = "histogramQuantile"
const DefaultUpperBoundColumnLabel = "le"
type HistogramQuantileOpSpec struct {
Quantile float64 `json:"quantile"`
CountColumn string `json:"countColumn"`
@ -60,6 +62,8 @@ func createHistogramQuantileOpSpec(args query.Arguments, a *query.Administration
return nil, err
} else if ok {
s.UpperBoundColumn = col
} else {
s.UpperBoundColumn = DefaultUpperBoundColumnLabel
}
if col, ok, err := args.GetString("valueColumn"); err != nil {

View File

@ -0,0 +1,345 @@
package functions_test
import (
"math"
"testing"
"github.com/influxdata/platform/query"
"github.com/influxdata/platform/query/execute"
"github.com/influxdata/platform/query/execute/executetest"
"github.com/influxdata/platform/query/functions"
"github.com/influxdata/platform/query/querytest"
)
func TestHistogramOperation_Marshaling(t *testing.T) {
data := []byte(`{"id":"histogram","kind":"histogram","spec":{"column":"_value"}}`)
op := &query.Operation{
ID: "histogram",
Spec: &functions.HistogramOpSpec{
Column: "_value",
},
}
querytest.OperationMarshalingTestHelper(t, data, op)
}
func TestHistogram_PassThrough(t *testing.T) {
executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
s := functions.NewHistogramTransformation(
d,
c,
&functions.HistogramProcedureSpec{},
)
return s
})
}
func TestHistogram_Process(t *testing.T) {
testCases := []struct {
name string
spec *functions.HistogramProcedureSpec
data []query.Table
want []*executetest.Table
}{
{
name: "linear",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{0, 10, 20, 30, 40},
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 0.0, 0.0},
{execute.Time(1), execute.Time(3), 10.0, 2.0},
{execute.Time(1), execute.Time(3), 20.0, 4.0},
{execute.Time(1), execute.Time(3), 30.0, 7.0},
{execute.Time(1), execute.Time(3), 40.0, 10.0},
},
}},
},
{
name: "linear+infinity",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{0, 10, 20, 30, 40, math.Inf(1)},
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 68.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 0.0, 0.0},
{execute.Time(1), execute.Time(3), 10.0, 2.0},
{execute.Time(1), execute.Time(3), 20.0, 4.0},
{execute.Time(1), execute.Time(3), 30.0, 7.0},
{execute.Time(1), execute.Time(3), 40.0, 10.0},
{execute.Time(1), execute.Time(3), math.Inf(1), 11.0},
},
}},
},
{
name: "linear+normalize",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{0, 10, 20, 30, 40},
Normalize: true,
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 0.0, 0.0},
{execute.Time(1), execute.Time(3), 10.0, 0.2},
{execute.Time(1), execute.Time(3), 20.0, 0.4},
{execute.Time(1), execute.Time(3), 30.0, 0.7},
{execute.Time(1), execute.Time(3), 40.0, 1.0},
},
}},
},
{
name: "logarithmic",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{1, 2, 4, 8, 16, 32, 64},
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 1.0, 0.0},
{execute.Time(1), execute.Time(3), 2.0, 1.0},
{execute.Time(1), execute.Time(3), 4.0, 1.0},
{execute.Time(1), execute.Time(3), 8.0, 2.0},
{execute.Time(1), execute.Time(3), 16.0, 3.0},
{execute.Time(1), execute.Time(3), 32.0, 8.0},
{execute.Time(1), execute.Time(3), 64.0, 10.0},
},
}},
},
{
name: "logarithmic unsorted",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{1, 64, 2, 4, 16, 8, 32},
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 1.0, 0.0},
{execute.Time(1), execute.Time(3), 2.0, 1.0},
{execute.Time(1), execute.Time(3), 4.0, 1.0},
{execute.Time(1), execute.Time(3), 8.0, 2.0},
{execute.Time(1), execute.Time(3), 16.0, 3.0},
{execute.Time(1), execute.Time(3), 32.0, 8.0},
{execute.Time(1), execute.Time(3), 64.0, 10.0},
},
}},
},
{
name: "fibonacci",
spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
Column: "_value",
UpperBoundColumn: "le",
CountColumn: "_value",
Buckets: []float64{1, 2, 3, 5, 8, 13, 21, 34, 55},
}},
data: []query.Table{&executetest.Table{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "_time", Type: query.TTime},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
},
}},
want: []*executetest.Table{{
KeyCols: []string{"_start", "_stop"},
ColMeta: []query.ColMeta{
{Label: "_start", Type: query.TTime},
{Label: "_stop", Type: query.TTime},
{Label: "le", Type: query.TFloat},
{Label: "_value", Type: query.TFloat},
},
Data: [][]interface{}{
{execute.Time(1), execute.Time(3), 1.0, 0.0},
{execute.Time(1), execute.Time(3), 2.0, 1.0},
{execute.Time(1), execute.Time(3), 3.0, 1.0},
{execute.Time(1), execute.Time(3), 5.0, 1.0},
{execute.Time(1), execute.Time(3), 8.0, 2.0},
{execute.Time(1), execute.Time(3), 13.0, 3.0},
{execute.Time(1), execute.Time(3), 21.0, 4.0},
{execute.Time(1), execute.Time(3), 34.0, 8.0},
{execute.Time(1), execute.Time(3), 55.0, 10.0},
},
}},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
executetest.ProcessTestHelper(
t,
tc.data,
tc.want,
nil,
func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
return functions.NewHistogramTransformation(d, c, tc.spec)
},
)
})
}
}

View File

@ -914,6 +914,16 @@ func ToStringArray(a values.Array) ([]string, error) {
})
return strs, nil
}
func ToFloatArray(a values.Array) ([]float64, error) {
if a.Type().ElementType() != semantic.Float {
return nil, fmt.Errorf("cannot convert array of %v to an array of floats", a.Type().ElementType())
}
vs := make([]float64, a.Len())
a.Range(func(i int, v values.Value) {
vs[i] = v.Float()
})
return vs, nil
}
// Arguments provides access to the keyword arguments passed to a function.
// semantic.The Get{Type} methods return three values: the typed value of the arg,