influxdb/query/functions/percentile.go

470 lines
13 KiB
Go

package functions
import (
"fmt"
"math"
"sort"
"github.com/influxdata/platform/query"
"github.com/influxdata/platform/query/execute"
"github.com/influxdata/platform/query/plan"
"github.com/influxdata/platform/query/semantic"
"github.com/influxdata/tdigest"
"github.com/pkg/errors"
)
const PercentileKind = "percentile"
const ExactPercentileAggKind = "exact-percentile-aggregate"
const ExactPercentileSelectKind = "exact-percentile-selector"
const (
methodEstimateTdigest = "estimate_tdigest"
methodExactMean = "exact_mean"
methodExactSelector = "exact_selector"
)
type PercentileOpSpec struct {
Percentile float64 `json:"percentile"`
Compression float64 `json:"compression"`
Method string `json:"method"`
execute.AggregateConfig
}
var percentileSignature = query.DefaultFunctionSignature()
func init() {
percentileSignature.Params["p"] = semantic.Float
query.RegisterFunction(PercentileKind, createPercentileOpSpec, percentileSignature)
query.RegisterBuiltIn("median", medianBuiltin)
query.RegisterOpSpec(PercentileKind, newPercentileOp)
plan.RegisterProcedureSpec(PercentileKind, newPercentileProcedure, PercentileKind)
execute.RegisterTransformation(PercentileKind, createPercentileTransformation)
execute.RegisterTransformation(ExactPercentileAggKind, createExactPercentileAggTransformation)
execute.RegisterTransformation(ExactPercentileSelectKind, createExactPercentileSelectTransformation)
}
var medianBuiltin = `
// median returns the 50th percentile.
// By default an approximate percentile is computed, this can be disabled by passing exact:true.
// Using the exact method requires that the entire data set can fit in memory.
median = (method="estimate_tdigest", compression=0.0, table=<-) => percentile(table:table, p:0.5, method:method, compression:compression)
`
func createPercentileOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := new(PercentileOpSpec)
p, err := args.GetRequiredFloat("p")
if err != nil {
return nil, err
}
spec.Percentile = p
if spec.Percentile < 0 || spec.Percentile > 1 {
return nil, errors.New("percentile must be between 0 and 1.")
}
if m, ok, err := args.GetString("method"); err != nil {
return nil, err
} else if ok {
spec.Method = m
}
if c, ok, err := args.GetFloat("compression"); err != nil {
return nil, err
} else if ok {
spec.Compression = c
}
if spec.Compression > 0 && spec.Method != methodEstimateTdigest {
return nil, errors.New("compression parameter is only valid for method estimate_tdigest.")
}
// Set default Compression if not exact
if spec.Method == methodEstimateTdigest && spec.Compression == 0 {
spec.Compression = 1000
}
if err := spec.AggregateConfig.ReadArgs(args); err != nil {
return nil, err
}
return spec, nil
}
func newPercentileOp() query.OperationSpec {
return new(PercentileOpSpec)
}
func (s *PercentileOpSpec) Kind() query.OperationKind {
return PercentileKind
}
type TDigestPercentileProcedureSpec struct {
Percentile float64 `json:"percentile"`
Compression float64 `json:"compression"`
execute.AggregateConfig
}
func (s *TDigestPercentileProcedureSpec) Kind() plan.ProcedureKind {
return PercentileKind
}
func (s *TDigestPercentileProcedureSpec) Copy() plan.ProcedureSpec {
return &TDigestPercentileProcedureSpec{
Percentile: s.Percentile,
Compression: s.Compression,
AggregateConfig: s.AggregateConfig,
}
}
type ExactPercentileAggProcedureSpec struct {
Percentile float64 `json:"percentile"`
execute.AggregateConfig
}
func (s *ExactPercentileAggProcedureSpec) Kind() plan.ProcedureKind {
return ExactPercentileAggKind
}
func (s *ExactPercentileAggProcedureSpec) Copy() plan.ProcedureSpec {
return &ExactPercentileAggProcedureSpec{Percentile: s.Percentile, AggregateConfig: s.AggregateConfig}
}
type ExactPercentileSelectProcedureSpec struct {
Percentile float64 `json:"percentile"`
execute.SelectorConfig
}
func (s *ExactPercentileSelectProcedureSpec) Kind() plan.ProcedureKind {
return ExactPercentileSelectKind
}
func (s *ExactPercentileSelectProcedureSpec) Copy() plan.ProcedureSpec {
return &ExactPercentileSelectProcedureSpec{Percentile: s.Percentile}
}
func newPercentileProcedure(qs query.OperationSpec, a plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*PercentileOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
switch spec.Method {
case methodExactMean:
return &ExactPercentileAggProcedureSpec{
Percentile: spec.Percentile,
AggregateConfig: spec.AggregateConfig,
}, nil
case methodExactSelector:
return &ExactPercentileSelectProcedureSpec{
Percentile: spec.Percentile,
}, nil
case methodEstimateTdigest:
fallthrough
default:
// default to estimated percentile
return &TDigestPercentileProcedureSpec{
Percentile: spec.Percentile,
Compression: spec.Compression,
AggregateConfig: spec.AggregateConfig,
}, nil
}
}
type PercentileAgg struct {
Quantile,
Compression float64
digest *tdigest.TDigest
}
func createPercentileTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*TDigestPercentileProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
agg := &PercentileAgg{
Quantile: ps.Percentile,
Compression: ps.Compression,
}
t, d := execute.NewAggregateTransformationAndDataset(id, mode, agg, ps.AggregateConfig, a.Allocator())
return t, d, nil
}
func (a *PercentileAgg) Copy() *PercentileAgg {
na := new(PercentileAgg)
*na = *a
na.digest = tdigest.NewWithCompression(na.Compression)
return na
}
func (a *PercentileAgg) NewBoolAgg() execute.DoBoolAgg {
return nil
}
func (a *PercentileAgg) NewIntAgg() execute.DoIntAgg {
return nil
}
func (a *PercentileAgg) NewUIntAgg() execute.DoUIntAgg {
return nil
}
func (a *PercentileAgg) NewFloatAgg() execute.DoFloatAgg {
return a.Copy()
}
func (a *PercentileAgg) NewStringAgg() execute.DoStringAgg {
return nil
}
func (a *PercentileAgg) DoFloat(vs []float64) {
for _, v := range vs {
a.digest.Add(v, 1)
}
}
func (a *PercentileAgg) Type() query.DataType {
return query.TFloat
}
func (a *PercentileAgg) ValueFloat() float64 {
return a.digest.Quantile(a.Quantile)
}
type ExactPercentileAgg struct {
Quantile float64
data []float64
}
func createExactPercentileAggTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*ExactPercentileAggProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
agg := &ExactPercentileAgg{
Quantile: ps.Percentile,
}
t, d := execute.NewAggregateTransformationAndDataset(id, mode, agg, ps.AggregateConfig, a.Allocator())
return t, d, nil
}
func (a *ExactPercentileAgg) Copy() *ExactPercentileAgg {
na := new(ExactPercentileAgg)
*na = *a
na.data = nil
return na
}
func (a *ExactPercentileAgg) NewBoolAgg() execute.DoBoolAgg {
return nil
}
func (a *ExactPercentileAgg) NewIntAgg() execute.DoIntAgg {
return nil
}
func (a *ExactPercentileAgg) NewUIntAgg() execute.DoUIntAgg {
return nil
}
func (a *ExactPercentileAgg) NewFloatAgg() execute.DoFloatAgg {
return a.Copy()
}
func (a *ExactPercentileAgg) NewStringAgg() execute.DoStringAgg {
return nil
}
func (a *ExactPercentileAgg) DoFloat(vs []float64) {
a.data = append(a.data, vs...)
}
func (a *ExactPercentileAgg) Type() query.DataType {
return query.TFloat
}
func (a *ExactPercentileAgg) ValueFloat() float64 {
sort.Float64s(a.data)
x := a.Quantile * float64(len(a.data)-1)
x0 := math.Floor(x)
x1 := math.Ceil(x)
if x0 == x1 {
return a.data[int(x0)]
}
// Linear interpolate
y0 := a.data[int(x0)]
y1 := a.data[int(x1)]
y := y0*(x1-x) + y1*(x-x0)
return y
}
type floatRowPair struct {
row execute.Row
Value float64
}
type exactPercentileFloatSorter []floatRowPair
func (rows exactPercentileFloatSorter) Len() int { return len(rows) }
func (rows exactPercentileFloatSorter) Swap(i, j int) { rows[i], rows[j] = rows[j], rows[i] }
func (rows exactPercentileFloatSorter) Less(i, j int) bool { return rows[i].Value < rows[j].Value }
type intRowPair struct {
row execute.Row
Value int64
}
type exactPercentileIntSorter []intRowPair
func (rows exactPercentileIntSorter) Len() int { return len(rows) }
func (rows exactPercentileIntSorter) Swap(i, j int) { rows[i], rows[j] = rows[j], rows[i] }
func (rows exactPercentileIntSorter) Less(i, j int) bool { return rows[i].Value < rows[j].Value }
type uintRowPair struct {
row execute.Row
Value uint64
}
type exactPercentileUintSorter []uintRowPair
func (rows exactPercentileUintSorter) Len() int { return len(rows) }
func (rows exactPercentileUintSorter) Swap(i, j int) { rows[i], rows[j] = rows[j], rows[i] }
func (rows exactPercentileUintSorter) Less(i, j int) bool { return rows[i].Value < rows[j].Value }
type boolRowPair struct {
row execute.Row
Value bool
}
type exactPercentileBoolSorter []boolRowPair
func toInt(b bool) int {
if b {
return 1
}
return 0
}
func (rows exactPercentileBoolSorter) Len() int { return len(rows) }
func (rows exactPercentileBoolSorter) Swap(i, j int) { rows[i], rows[j] = rows[j], rows[i] }
func (rows exactPercentileBoolSorter) Less(i, j int) bool {
return toInt(rows[i].Value) < toInt(rows[j].Value)
}
type ExactPercentileSelector struct {
Quantile float64
}
type ExactPercentileFloatSelector struct {
ExactPercentileSelector
rows exactPercentileFloatSorter
}
type ExactPercentileIntSelector struct {
ExactPercentileSelector
rows exactPercentileIntSorter
}
type ExactPercentileUintSelector struct {
ExactPercentileSelector
rows exactPercentileUintSorter
}
type ExactPercentileBoolSelector struct {
ExactPercentileSelector
rows exactPercentileBoolSorter
}
func createExactPercentileSelectTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
ps, ok := spec.(*ExactPercentileSelectProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", ps)
}
sel := &ExactPercentileSelector{
Quantile: ps.Percentile,
}
t, d := execute.NewRowSelectorTransformationAndDataset(id, mode, sel, ps.SelectorConfig, a.Allocator())
return t, d, nil
}
func (s *ExactPercentileSelector) NewBoolSelector() execute.DoBoolRowSelector {
return &ExactPercentileBoolSelector{ExactPercentileSelector: ExactPercentileSelector{Quantile: s.Quantile}}
}
func (s *ExactPercentileSelector) NewIntSelector() execute.DoIntRowSelector {
return &ExactPercentileIntSelector{ExactPercentileSelector: ExactPercentileSelector{Quantile: s.Quantile}}
}
func (s *ExactPercentileSelector) NewUIntSelector() execute.DoUIntRowSelector {
return &ExactPercentileUintSelector{ExactPercentileSelector: ExactPercentileSelector{Quantile: s.Quantile}}
}
func (s *ExactPercentileSelector) NewFloatSelector() execute.DoFloatRowSelector {
return &ExactPercentileFloatSelector{ExactPercentileSelector: ExactPercentileSelector{Quantile: s.Quantile}}
}
func (s *ExactPercentileSelector) NewStringSelector() execute.DoStringRowSelector {
return nil
}
func getQuantileIndex(quantile float64, len int) int {
x := quantile * float64(len)
index := int(math.Ceil(x))
if index > 0 {
index--
}
return index
}
func (s *ExactPercentileFloatSelector) Rows() []execute.Row {
sort.Sort(s.rows)
index := getQuantileIndex(s.Quantile, len(s.rows))
return []execute.Row{s.rows[index].row}
}
func (s *ExactPercentileIntSelector) Rows() []execute.Row {
sort.Sort(s.rows)
index := getQuantileIndex(s.Quantile, len(s.rows))
return []execute.Row{s.rows[index].row}
}
func (s *ExactPercentileUintSelector) Rows() []execute.Row {
sort.Sort(s.rows)
index := getQuantileIndex(s.Quantile, len(s.rows))
return []execute.Row{s.rows[index].row}
}
func (s *ExactPercentileBoolSelector) Rows() []execute.Row {
sort.Sort(s.rows)
index := getQuantileIndex(s.Quantile, len(s.rows))
return []execute.Row{s.rows[index].row}
}
func (s *ExactPercentileFloatSelector) DoFloat(vs []float64, cr query.ColReader) {
for i, v := range vs {
s.rows = append(s.rows, floatRowPair{execute.ReadRow(i, cr), v})
}
}
func (s *ExactPercentileBoolSelector) DoBool(vs []bool, cr query.ColReader) {
for i, v := range vs {
s.rows = append(s.rows, boolRowPair{execute.ReadRow(i, cr), v})
}
}
func (s *ExactPercentileIntSelector) DoInt(vs []int64, cr query.ColReader) {
for i, v := range vs {
s.rows = append(s.rows, intRowPair{execute.ReadRow(i, cr), v})
}
}
func (s *ExactPercentileUintSelector) DoUInt(vs []uint64, cr query.ColReader) {
for i, v := range vs {
s.rows = append(s.rows, uintRowPair{execute.ReadRow(i, cr), v})
}
}