Merged pull request #738 from influxdata/nc-histogram

feat: Add histogram function for computing bucket based histograms
2018-09-04 09:51:32 -06:00 · 2018-09-04 09:51:32 -06:00 · 35002fa90f
parent 1776778a06 128b85ecd1
commit 35002fa90f
5 changed files with 892 additions and 1 deletions
--- a/query/docs/SPEC.md
+++ b/query/docs/SPEC.md
@ -1404,6 +1404,41 @@ from(bucket:"telegraf/autogen")
                r.service == "app-server")
 ```

+#### Histogram
+
+Histogram approximates the cumulative distribution function of a dataset by counting data frequencies for a list of buckets.
+A bucket is defined by an upper bound where all data points that are less than or equal to the bound are counted in the bucket.
+The bucket counts are cumulative.
+
+Each input table is converted into a single output table representing a single histogram.
+The output table will have a the same group key as the input table.
+The columns not part of the group key will be removed and an upper bound column and a count column will be added.
+
+Histogram has the following properties:
+
+* `column` string
+    Column is the name of a column containing the input data values.
+    The column type must be float.
+    Defaults to `_value`.
+* `upperBoundColumn` string
+    UpperBoundColumn is the name of the column in which to store the histogram upper bounds.
+    Defaults to `le`.
+* `countColumn` string
+    CountColumn is the name of the column in which to store the histogram counts.
+    Defaults to `_value`.
+* `buckets` array of floats
+    Buckets is a list of upper bounds to use when computing the histogram frequencies.
+    Buckets should contain a bucket whose bound is the maximum value of the data set, this value can be set to positive infinity if no maximum is known.
+* `normalize` bool
+    Normalize when true will convert the counts into frequencies values between 0 and 1.
+    Normalized histograms cannot be aggregated by summing their counts.
+    Defaults to `false`.
+
+
+Example:
+
+    histogram(buckets:linearBuckets(start:0.0,width:10.0,count:10))  // compute the histogram of the data using 10 buckets from 0,10,20,...,100
+
 #### HistogramQuantile

 HistogramQuantile approximates a quantile given an histogram that approximates the cumulative distribution of the dataset.
@ -1432,6 +1467,7 @@ HistogramQuantile has the following properties:
 * `upperBoundColumn` string
    UpperBoundColumn is the name of the column containing the histogram upper bounds.
    The upper bound column type must be float.
+    Defaults to `le`.
 * `valueColumn` string
    ValueColumn is the name of the output column which will contain the computed quantile.
    Defaults to `_value`.
@ -1444,7 +1480,39 @@ HistogramQuantile has the following properties:

 Example:

-    histogramQuantile(quantile:0.9, upperBoundColumn:"le")  // compute the 90th quantile using histogram data.
+    histogramQuantile(quantile:0.9)  // compute the 90th quantile using histogram data.
+
+#### LinearBuckets
+
+LinearBuckets produces a list of linearly separated floats.
+
+LinearBuckets has the following properties:
+
+* `start` float
+    Start is the first value in the returned list.
+* `width` float
+    Width is the distance between subsequent bucket values.
+* `count` int
+    Count is the number of buckets to create.
+* `inifinity` bool
+    Infinity when true adds an additional bucket with a value of positive infinity.
+    Defaults to `true`.
+
+#### LogrithmicBuckets
+
+LogrithmicBuckets produces a list of exponentially separated floats.
+
+LogrithmicBuckets has the following properties:
+
+* `start` float
+    Start is the first value in the returned bucket list.
+* `factor` float
+    Factor is the multiplier applied to each subsequent bucket.
+* `count` int
+    Count is the number of buckets to create.
+* `inifinity` bool
+    Infinity when true adds an additional bucket with a value of positive infinity.
+    Defaults to `true`.

 #### Limit

--- a/query/functions/histogram.go
+++ b/query/functions/histogram.go
@ -0,0 +1,464 @@
+package functions
+
+import (
+	"fmt"
+	"math"
+	"regexp"
+	"sort"
+
+	"github.com/influxdata/platform/query"
+	"github.com/influxdata/platform/query/execute"
+	"github.com/influxdata/platform/query/interpreter"
+	"github.com/influxdata/platform/query/plan"
+	"github.com/influxdata/platform/query/semantic"
+	"github.com/influxdata/platform/query/values"
+	"github.com/pkg/errors"
+)
+
+const HistogramKind = "histogram"
+
+type HistogramOpSpec struct {
+	Column           string    `json:"column"`
+	UpperBoundColumn string    `json:"upperBoundColumn"`
+	CountColumn      string    `json:"countColumn"`
+	Buckets          []float64 `json:"buckets"`
+	Normalize        bool      `json:"normalize"`
+}
+
+var histogramSignature = execute.DefaultAggregateSignature()
+
+func init() {
+	histogramSignature.Params["column"] = semantic.String
+	histogramSignature.Params["upperBoundColumn"] = semantic.String
+	histogramSignature.Params["buckets"] = semantic.NewArrayType(semantic.Float)
+	histogramSignature.Params["normalize"] = semantic.Bool
+
+	query.RegisterFunction(HistogramKind, createHistogramOpSpec, histogramSignature)
+	query.RegisterBuiltInValue("linearBuckets", linearBuckets{})
+	query.RegisterBuiltInValue("logarithmicBuckets", logarithmicBuckets{})
+	query.RegisterOpSpec(HistogramKind, newHistogramOp)
+	plan.RegisterProcedureSpec(HistogramKind, newHistogramProcedure, HistogramKind)
+	execute.RegisterTransformation(HistogramKind, createHistogramTransformation)
+}
+
+func createHistogramOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
+	if err := a.AddParentFromArgs(args); err != nil {
+		return nil, err
+	}
+
+	spec := new(HistogramOpSpec)
+
+	if col, ok, err := args.GetString("column"); err != nil {
+		return nil, err
+	} else if ok {
+		spec.Column = col
+	} else {
+		spec.Column = execute.DefaultValueColLabel
+	}
+	if col, ok, err := args.GetString("upperBoundColumn"); err != nil {
+		return nil, err
+	} else if ok {
+		spec.UpperBoundColumn = col
+	} else {
+		spec.UpperBoundColumn = DefaultUpperBoundColumnLabel
+	}
+	if col, ok, err := args.GetString("countColumn"); err != nil {
+		return nil, err
+	} else if ok {
+		spec.CountColumn = col
+	} else {
+		spec.CountColumn = execute.DefaultValueColLabel
+	}
+	bucketsArry, err := args.GetRequiredArray("buckets", semantic.Float)
+	if err != nil {
+		return nil, err
+	}
+	spec.Buckets, err = interpreter.ToFloatArray(bucketsArry)
+	if err != nil {
+		return nil, err
+	}
+	if normalize, ok, err := args.GetBool("normalize"); err != nil {
+		return nil, err
+	} else if ok {
+		spec.Normalize = normalize
+	}
+
+	return spec, nil
+}
+
+func newHistogramOp() query.OperationSpec {
+	return new(HistogramOpSpec)
+}
+
+func (s *HistogramOpSpec) Kind() query.OperationKind {
+	return HistogramKind
+}
+
+type HistogramProcedureSpec struct {
+	HistogramOpSpec
+}
+
+func newHistogramProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
+	spec, ok := qs.(*HistogramOpSpec)
+	if !ok {
+		return nil, fmt.Errorf("invalid spec type %T", qs)
+	}
+
+	return &HistogramProcedureSpec{
+		HistogramOpSpec: *spec,
+	}, nil
+}
+
+func (s *HistogramProcedureSpec) Kind() plan.ProcedureKind {
+	return HistogramKind
+}
+func (s *HistogramProcedureSpec) Copy() plan.ProcedureSpec {
+	ns := new(HistogramProcedureSpec)
+	*ns = *s
+	if len(s.Buckets) > 0 {
+		ns.Buckets = make([]float64, len(s.Buckets))
+		copy(ns.Buckets, s.Buckets)
+	}
+	return ns
+}
+
+func createHistogramTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
+	s, ok := spec.(*HistogramProcedureSpec)
+	if !ok {
+		return nil, nil, fmt.Errorf("invalid spec type %T", spec)
+	}
+	cache := execute.NewTableBuilderCache(a.Allocator())
+	d := execute.NewDataset(id, mode, cache)
+	t := NewHistogramTransformation(d, cache, s)
+	return t, d, nil
+}
+
+type histogramTransformation struct {
+	d     execute.Dataset
+	cache execute.TableBuilderCache
+
+	spec HistogramProcedureSpec
+}
+
+func NewHistogramTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *HistogramProcedureSpec) *histogramTransformation {
+	sort.Float64s(spec.Buckets)
+	return &histogramTransformation{
+		d:     d,
+		cache: cache,
+		spec:  *spec,
+	}
+}
+
+func (t *histogramTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error {
+	return t.d.RetractTable(key)
+}
+
+func (t *histogramTransformation) Process(id execute.DatasetID, tbl query.Table) error {
+	builder, created := t.cache.TableBuilder(tbl.Key())
+	if !created {
+		return fmt.Errorf("histogram found duplicate table with key: %v", tbl.Key())
+	}
+	valueIdx := execute.ColIdx(t.spec.Column, tbl.Cols())
+	if valueIdx < 0 {
+		return fmt.Errorf("column %q is missing", t.spec.Column)
+	}
+	if col := tbl.Cols()[valueIdx]; col.Type != query.TFloat {
+		return fmt.Errorf("column %q must be a float got %v", t.spec.Column, col.Type)
+	}
+
+	execute.AddTableKeyCols(tbl.Key(), builder)
+	boundIdx := builder.AddCol(query.ColMeta{
+		Label: t.spec.UpperBoundColumn,
+		Type:  query.TFloat,
+	})
+	countIdx := builder.AddCol(query.ColMeta{
+		Label: t.spec.CountColumn,
+		Type:  query.TFloat,
+	})
+	totalRows := 0.0
+	counts := make([]float64, len(t.spec.Buckets))
+	err := tbl.Do(func(cr query.ColReader) error {
+		totalRows += float64(cr.Len())
+		for _, v := range cr.Floats(valueIdx) {
+			idx := sort.Search(len(t.spec.Buckets), func(i int) bool {
+				return v <= t.spec.Buckets[i]
+			})
+			if idx >= len(t.spec.Buckets) {
+				// Greater than highest bucket, or not found
+				return fmt.Errorf("found value greater than any bucket, %d %d %f %f", idx, len(t.spec.Buckets), v, t.spec.Buckets[len(t.spec.Buckets)-1])
+			}
+			// Increment counter
+			counts[idx]++
+		}
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+
+	// Add records making counts cumulative
+	total := 0.0
+	for i, v := range counts {
+		execute.AppendKeyValues(tbl.Key(), builder)
+		count := v + total
+		if t.spec.Normalize {
+			count /= totalRows
+		}
+		builder.AppendFloat(countIdx, count)
+		builder.AppendFloat(boundIdx, t.spec.Buckets[i])
+		total += v
+	}
+	return nil
+}
+
+func (t *histogramTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
+	return t.d.UpdateWatermark(mark)
+}
+func (t *histogramTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
+	return t.d.UpdateProcessingTime(pt)
+}
+func (t *histogramTransformation) Finish(id execute.DatasetID, err error) {
+	t.d.Finish(err)
+}
+
+// linearBuckets is a helper function for creating buckets spaced linearly
+type linearBuckets struct{}
+
+func (b linearBuckets) Type() semantic.Type {
+	return semantic.NewFunctionType(semantic.FunctionSignature{
+		Params: map[string]semantic.Type{
+			"start":    semantic.Float,
+			"width":    semantic.Float,
+			"count":    semantic.Int,
+			"infinity": semantic.Bool,
+		},
+		ReturnType: semantic.String,
+	})
+}
+
+func (b linearBuckets) Str() string {
+	panic(values.UnexpectedKind(semantic.String, semantic.Function))
+}
+
+func (b linearBuckets) Int() int64 {
+	panic(values.UnexpectedKind(semantic.Int, semantic.Function))
+}
+
+func (b linearBuckets) UInt() uint64 {
+	panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
+}
+
+func (b linearBuckets) Float() float64 {
+	panic(values.UnexpectedKind(semantic.Float, semantic.Function))
+}
+
+func (b linearBuckets) Bool() bool {
+	panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
+}
+
+func (b linearBuckets) Time() values.Time {
+	panic(values.UnexpectedKind(semantic.Time, semantic.Function))
+}
+
+func (b linearBuckets) Duration() values.Duration {
+	panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
+}
+
+func (b linearBuckets) Regexp() *regexp.Regexp {
+	panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
+}
+
+func (b linearBuckets) Array() values.Array {
+	panic(values.UnexpectedKind(semantic.Array, semantic.Function))
+}
+
+func (b linearBuckets) Object() values.Object {
+	panic(values.UnexpectedKind(semantic.Object, semantic.Function))
+}
+
+func (b linearBuckets) Function() values.Function {
+	return b
+}
+
+func (b linearBuckets) Equal(rhs values.Value) bool {
+	if b.Type() != rhs.Type() {
+		return false
+	}
+	_, ok := rhs.(linearBuckets)
+	return ok
+}
+
+func (b linearBuckets) HasSideEffect() bool {
+	return false
+}
+
+func (b linearBuckets) Call(args values.Object) (values.Value, error) {
+	startV, ok := args.Get("start")
+	if !ok {
+		return nil, errors.New("start is required")
+	}
+	if startV.Type() != semantic.Float {
+		return nil, errors.New("start must be a float")
+	}
+	widthV, ok := args.Get("width")
+	if !ok {
+		return nil, errors.New("width is required")
+	}
+	if widthV.Type() != semantic.Float {
+		return nil, errors.New("width must be a float")
+	}
+	countV, ok := args.Get("count")
+	if !ok {
+		return nil, errors.New("count is required")
+	}
+	if countV.Type() != semantic.Int {
+		return nil, errors.New("count must be an int")
+	}
+	infV, ok := args.Get("infinity")
+	if !ok {
+		infV = values.NewBoolValue(true)
+	}
+	if infV.Type() != semantic.Bool {
+		return nil, errors.New("infinity must be a bool")
+	}
+	start := startV.Float()
+	width := widthV.Float()
+	count := countV.Int()
+	inf := infV.Bool()
+	l := int(count)
+	if inf {
+		l++
+	}
+	elements := make([]values.Value, l)
+	bound := start
+	for i := 0; i < l; i++ {
+		elements[i] = values.NewFloatValue(bound)
+		bound += width
+	}
+	if inf {
+		elements[l-1] = values.NewFloatValue(math.Inf(1))
+	}
+	counts := values.NewArrayWithBacking(semantic.Float, elements)
+	return counts, nil
+}
+
+// logarithmicBuckets is a helper function for creating buckets spaced by an logarithmic factor.
+type logarithmicBuckets struct{}
+
+func (b logarithmicBuckets) Type() semantic.Type {
+	return semantic.NewFunctionType(semantic.FunctionSignature{
+		Params: map[string]semantic.Type{
+			"start":    semantic.Float,
+			"factor":   semantic.Float,
+			"count":    semantic.Int,
+			"infinity": semantic.Bool,
+		},
+		ReturnType: semantic.String,
+	})
+}
+
+func (b logarithmicBuckets) Str() string {
+	panic(values.UnexpectedKind(semantic.String, semantic.Function))
+}
+
+func (b logarithmicBuckets) Int() int64 {
+	panic(values.UnexpectedKind(semantic.Int, semantic.Function))
+}
+
+func (b logarithmicBuckets) UInt() uint64 {
+	panic(values.UnexpectedKind(semantic.UInt, semantic.Function))
+}
+
+func (b logarithmicBuckets) Float() float64 {
+	panic(values.UnexpectedKind(semantic.Float, semantic.Function))
+}
+
+func (b logarithmicBuckets) Bool() bool {
+	panic(values.UnexpectedKind(semantic.Bool, semantic.Function))
+}
+
+func (b logarithmicBuckets) Time() values.Time {
+	panic(values.UnexpectedKind(semantic.Time, semantic.Function))
+}
+
+func (b logarithmicBuckets) Duration() values.Duration {
+	panic(values.UnexpectedKind(semantic.Duration, semantic.Function))
+}
+
+func (b logarithmicBuckets) Regexp() *regexp.Regexp {
+	panic(values.UnexpectedKind(semantic.Regexp, semantic.Function))
+}
+
+func (b logarithmicBuckets) Array() values.Array {
+	panic(values.UnexpectedKind(semantic.Array, semantic.Function))
+}
+
+func (b logarithmicBuckets) Object() values.Object {
+	panic(values.UnexpectedKind(semantic.Object, semantic.Function))
+}
+
+func (b logarithmicBuckets) Function() values.Function {
+	return b
+}
+
+func (b logarithmicBuckets) Equal(rhs values.Value) bool {
+	if b.Type() != rhs.Type() {
+		return false
+	}
+	_, ok := rhs.(logarithmicBuckets)
+	return ok
+}
+
+func (b logarithmicBuckets) HasSideEffect() bool {
+	return false
+}
+
+func (b logarithmicBuckets) Call(args values.Object) (values.Value, error) {
+	startV, ok := args.Get("start")
+	if !ok {
+		return nil, errors.New("start is required")
+	}
+	if startV.Type() != semantic.Float {
+		return nil, errors.New("start must be a float")
+	}
+	factorV, ok := args.Get("factor")
+	if !ok {
+		return nil, errors.New("factor is required")
+	}
+	if factorV.Type() != semantic.Float {
+		return nil, errors.New("factor must be a float")
+	}
+	countV, ok := args.Get("count")
+	if !ok {
+		return nil, errors.New("count is required")
+	}
+	if countV.Type() != semantic.Int {
+		return nil, errors.New("count must be an int")
+	}
+	infV, ok := args.Get("infinity")
+	if !ok {
+		infV = values.NewBoolValue(true)
+	}
+	if infV.Type() != semantic.Bool {
+		return nil, errors.New("infinity must be a bool")
+	}
+	start := startV.Float()
+	factor := factorV.Float()
+	count := countV.Int()
+	inf := infV.Bool()
+	l := int(count)
+	if inf {
+		l++
+	}
+	elements := make([]values.Value, l)
+	bound := start
+	for i := 0; i < l; i++ {
+		elements[i] = values.NewFloatValue(bound)
+		bound *= factor
+	}
+	if inf {
+		elements[l-1] = values.NewFloatValue(math.Inf(1))
+	}
+	counts := values.NewArrayWithBacking(semantic.Float, elements)
+	return counts, nil
+}
--- a/query/functions/histogram_quantile.go
+++ b/query/functions/histogram_quantile.go
@ -14,6 +14,8 @@ import (

 const HistogramQuantileKind = "histogramQuantile"

+const DefaultUpperBoundColumnLabel = "le"
+
 type HistogramQuantileOpSpec struct {
 	Quantile         float64 `json:"quantile"`
 	CountColumn      string  `json:"countColumn"`
@ -60,6 +62,8 @@ func createHistogramQuantileOpSpec(args query.Arguments, a *query.Administration
 		return nil, err
 	} else if ok {
 		s.UpperBoundColumn = col
+	} else {
+		s.UpperBoundColumn = DefaultUpperBoundColumnLabel
 	}

 	if col, ok, err := args.GetString("valueColumn"); err != nil {
--- a/query/functions/histogram_test.go
+++ b/query/functions/histogram_test.go
@ -0,0 +1,345 @@
+package functions_test
+
+import (
+	"math"
+	"testing"
+
+	"github.com/influxdata/platform/query"
+	"github.com/influxdata/platform/query/execute"
+	"github.com/influxdata/platform/query/execute/executetest"
+	"github.com/influxdata/platform/query/functions"
+	"github.com/influxdata/platform/query/querytest"
+)
+
+func TestHistogramOperation_Marshaling(t *testing.T) {
+	data := []byte(`{"id":"histogram","kind":"histogram","spec":{"column":"_value"}}`)
+	op := &query.Operation{
+		ID: "histogram",
+		Spec: &functions.HistogramOpSpec{
+			Column: "_value",
+		},
+	}
+	querytest.OperationMarshalingTestHelper(t, data, op)
+}
+
+func TestHistogram_PassThrough(t *testing.T) {
+	executetest.TransformationPassThroughTestHelper(t, func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
+		s := functions.NewHistogramTransformation(
+			d,
+			c,
+			&functions.HistogramProcedureSpec{},
+		)
+		return s
+	})
+}
+
+func TestHistogram_Process(t *testing.T) {
+	testCases := []struct {
+		name string
+		spec *functions.HistogramProcedureSpec
+		data []query.Table
+		want []*executetest.Table
+	}{
+		{
+			name: "linear",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{0, 10, 20, 30, 40},
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 0.0, 0.0},
+					{execute.Time(1), execute.Time(3), 10.0, 2.0},
+					{execute.Time(1), execute.Time(3), 20.0, 4.0},
+					{execute.Time(1), execute.Time(3), 30.0, 7.0},
+					{execute.Time(1), execute.Time(3), 40.0, 10.0},
+				},
+			}},
+		},
+		{
+			name: "linear+infinity",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{0, 10, 20, 30, 40, math.Inf(1)},
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 68.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 0.0, 0.0},
+					{execute.Time(1), execute.Time(3), 10.0, 2.0},
+					{execute.Time(1), execute.Time(3), 20.0, 4.0},
+					{execute.Time(1), execute.Time(3), 30.0, 7.0},
+					{execute.Time(1), execute.Time(3), 40.0, 10.0},
+					{execute.Time(1), execute.Time(3), math.Inf(1), 11.0},
+				},
+			}},
+		},
+		{
+			name: "linear+normalize",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{0, 10, 20, 30, 40},
+				Normalize:        true,
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 0.0, 0.0},
+					{execute.Time(1), execute.Time(3), 10.0, 0.2},
+					{execute.Time(1), execute.Time(3), 20.0, 0.4},
+					{execute.Time(1), execute.Time(3), 30.0, 0.7},
+					{execute.Time(1), execute.Time(3), 40.0, 1.0},
+				},
+			}},
+		},
+		{
+			name: "logarithmic",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{1, 2, 4, 8, 16, 32, 64},
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 1.0, 0.0},
+					{execute.Time(1), execute.Time(3), 2.0, 1.0},
+					{execute.Time(1), execute.Time(3), 4.0, 1.0},
+					{execute.Time(1), execute.Time(3), 8.0, 2.0},
+					{execute.Time(1), execute.Time(3), 16.0, 3.0},
+					{execute.Time(1), execute.Time(3), 32.0, 8.0},
+					{execute.Time(1), execute.Time(3), 64.0, 10.0},
+				},
+			}},
+		},
+		{
+			name: "logarithmic unsorted",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{1, 64, 2, 4, 16, 8, 32},
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 1.0, 0.0},
+					{execute.Time(1), execute.Time(3), 2.0, 1.0},
+					{execute.Time(1), execute.Time(3), 4.0, 1.0},
+					{execute.Time(1), execute.Time(3), 8.0, 2.0},
+					{execute.Time(1), execute.Time(3), 16.0, 3.0},
+					{execute.Time(1), execute.Time(3), 32.0, 8.0},
+					{execute.Time(1), execute.Time(3), 64.0, 10.0},
+				},
+			}},
+		},
+		{
+			name: "fibonacci",
+			spec: &functions.HistogramProcedureSpec{HistogramOpSpec: functions.HistogramOpSpec{
+				Column:           "_value",
+				UpperBoundColumn: "le",
+				CountColumn:      "_value",
+				Buckets:          []float64{1, 2, 3, 5, 8, 13, 21, 34, 55},
+			}},
+			data: []query.Table{&executetest.Table{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "_time", Type: query.TTime},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), execute.Time(1), 02.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 31.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 12.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 38.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 24.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 40.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 30.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 28.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 17.0},
+					{execute.Time(1), execute.Time(3), execute.Time(2), 08.0},
+				},
+			}},
+			want: []*executetest.Table{{
+				KeyCols: []string{"_start", "_stop"},
+				ColMeta: []query.ColMeta{
+					{Label: "_start", Type: query.TTime},
+					{Label: "_stop", Type: query.TTime},
+					{Label: "le", Type: query.TFloat},
+					{Label: "_value", Type: query.TFloat},
+				},
+				Data: [][]interface{}{
+					{execute.Time(1), execute.Time(3), 1.0, 0.0},
+					{execute.Time(1), execute.Time(3), 2.0, 1.0},
+					{execute.Time(1), execute.Time(3), 3.0, 1.0},
+					{execute.Time(1), execute.Time(3), 5.0, 1.0},
+					{execute.Time(1), execute.Time(3), 8.0, 2.0},
+					{execute.Time(1), execute.Time(3), 13.0, 3.0},
+					{execute.Time(1), execute.Time(3), 21.0, 4.0},
+					{execute.Time(1), execute.Time(3), 34.0, 8.0},
+					{execute.Time(1), execute.Time(3), 55.0, 10.0},
+				},
+			}},
+		},
+	}
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			executetest.ProcessTestHelper(
+				t,
+				tc.data,
+				tc.want,
+				nil,
+				func(d execute.Dataset, c execute.TableBuilderCache) execute.Transformation {
+					return functions.NewHistogramTransformation(d, c, tc.spec)
+				},
+			)
+		})
+	}
+}
--- a/query/interpreter/interpreter.go
+++ b/query/interpreter/interpreter.go
@ -914,6 +914,16 @@ func ToStringArray(a values.Array) ([]string, error) {
 	})
 	return strs, nil
 }
+func ToFloatArray(a values.Array) ([]float64, error) {
+	if a.Type().ElementType() != semantic.Float {
+		return nil, fmt.Errorf("cannot convert array of %v to an array of floats", a.Type().ElementType())
+	}
+	vs := make([]float64, a.Len())
+	a.Range(func(i int, v values.Value) {
+		vs[i] = v.Float()
+	})
+	return vs, nil
+}

 // Arguments provides access to the keyword arguments passed to a function.
 // semantic.The Get{Type} methods return three values: the typed value of the arg,