influxdb/query/functions/pivot.go

388 lines
11 KiB
Go

package functions
import (
"fmt"
"strconv"
"github.com/influxdata/platform/query"
"github.com/influxdata/platform/query/execute"
"github.com/influxdata/platform/query/interpreter"
"github.com/influxdata/platform/query/plan"
"github.com/influxdata/platform/query/semantic"
"github.com/influxdata/platform/query/values"
)
const PivotKind = "pivot"
type PivotOpSpec struct {
RowKey []string `json:"rowKey"`
ColKey []string `json:"colKey"`
ValueCol string `json:"valueCol"`
}
var pivotSignature = query.DefaultFunctionSignature()
var fromRowsBuiltin = `
// fromRows will access a database and retrieve data aligned into time-aligned tuples, grouped by measurement.
fromRows = (db) => from(db:db) |> pivot(rowKey:["_time"], colKey: ["_field"], valueCol: "_value")
`
func init() {
pivotSignature.Params["rowKey"] = semantic.Array
pivotSignature.Params["colKey"] = semantic.Array
pivotSignature.Params["valueCol"] = semantic.String
query.RegisterFunction(PivotKind, createPivotOpSpec, pivotSignature)
query.RegisterBuiltIn("fromRows", fromRowsBuiltin)
query.RegisterOpSpec(PivotKind, newPivotOp)
plan.RegisterProcedureSpec(PivotKind, newPivotProcedure, PivotKind)
execute.RegisterTransformation(PivotKind, createPivotTransformation)
}
func createPivotOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
spec := &PivotOpSpec{}
array, err := args.GetRequiredArray("rowKey", semantic.String)
if err != nil {
return nil, err
}
spec.RowKey, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
array, err = args.GetRequiredArray("colKey", semantic.String)
if err != nil {
return nil, err
}
spec.ColKey, err = interpreter.ToStringArray(array)
if err != nil {
return nil, err
}
rowKeys := make(map[string]bool)
for _, v := range spec.RowKey {
rowKeys[v] = true
}
for _, v := range spec.ColKey {
if _, ok := rowKeys[v]; ok {
return nil, fmt.Errorf("column name found in both rowKey and colKey: %s", v)
}
}
valueCol, err := args.GetRequiredString("valueCol")
if err != nil {
return nil, err
}
spec.ValueCol = valueCol
return spec, nil
}
func newPivotOp() query.OperationSpec {
return new(PivotOpSpec)
}
func (s *PivotOpSpec) Kind() query.OperationKind {
return PivotKind
}
type PivotProcedureSpec struct {
RowKey []string
ColKey []string
ValueCol string
}
func newPivotProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) {
spec, ok := qs.(*PivotOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
p := &PivotProcedureSpec{
RowKey: spec.RowKey,
ColKey: spec.ColKey,
ValueCol: spec.ValueCol,
}
return p, nil
}
func (s *PivotProcedureSpec) Kind() plan.ProcedureKind {
return PivotKind
}
func (s *PivotProcedureSpec) Copy() plan.ProcedureSpec {
ns := new(PivotProcedureSpec)
ns.RowKey = make([]string, len(s.RowKey))
copy(ns.RowKey, s.RowKey)
ns.ColKey = make([]string, len(s.ColKey))
copy(ns.ColKey, s.ColKey)
ns.ValueCol = s.ValueCol
return ns
}
func createPivotTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) {
s, ok := spec.(*PivotProcedureSpec)
if !ok {
return nil, nil, fmt.Errorf("invalid spec type %T", spec)
}
cache := execute.NewTableBuilderCache(a.Allocator())
d := execute.NewDataset(id, mode, cache)
t := NewPivotTransformation(d, cache, s)
return t, d, nil
}
type pivotTransformation struct {
d execute.Dataset
cache execute.TableBuilderCache
spec PivotProcedureSpec
// for each table, we need to store a map to keep track of which rows/columns have already been created.
colKeyMaps map[string]map[string]int
rowKeyMaps map[string]map[string]int
nextCol int
nextRow int
}
func NewPivotTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *PivotProcedureSpec) *pivotTransformation {
t := &pivotTransformation{
d: d,
cache: cache,
spec: *spec,
colKeyMaps: make(map[string]map[string]int),
rowKeyMaps: make(map[string]map[string]int),
}
return t
}
func (t *pivotTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error {
return t.d.RetractTable(key)
}
func (t *pivotTransformation) Process(id execute.DatasetID, tbl query.Table) error {
rowKeyIndex := make(map[string]int)
for _, v := range t.spec.RowKey {
idx := execute.ColIdx(v, tbl.Cols())
if idx < 0 {
return fmt.Errorf("specified column does not exist in table: %v", v)
}
rowKeyIndex[v] = idx
}
// different from above because we'll get the column indices below when we
// determine the initial column schema
colKeyIndex := make(map[string]int)
valueColIndex := -1
var valueColType query.DataType
for _, v := range t.spec.ColKey {
colKeyIndex[v] = -1
}
cols := make([]query.ColMeta, 0, len(tbl.Cols()))
keyCols := make([]query.ColMeta, 0, len(tbl.Key().Cols()))
keyValues := make([]values.Value, 0, len(tbl.Key().Cols()))
newIDX := 0
colMap := make([]int, len(tbl.Cols()))
for colIDX, v := range tbl.Cols() {
if _, ok := colKeyIndex[v.Label]; !ok && v.Label != t.spec.ValueCol {
// the columns we keep are: group key columns not in the column key and row key columns
if tbl.Key().HasCol(v.Label) {
colMap[newIDX] = colIDX
newIDX++
keyCols = append(keyCols, tbl.Cols()[colIDX])
cols = append(cols, tbl.Cols()[colIDX])
keyValues = append(keyValues, tbl.Key().LabelValue(v.Label))
} else if _, ok := rowKeyIndex[v.Label]; ok {
cols = append(cols, tbl.Cols()[colIDX])
colMap[newIDX] = colIDX
newIDX++
}
} else if v.Label == t.spec.ValueCol {
valueColIndex = colIDX
valueColType = tbl.Cols()[colIDX].Type
} else {
// we need the location of the colKey columns in the original table
colKeyIndex[v.Label] = colIDX
}
}
for k, v := range colKeyIndex {
if v < 0 {
return fmt.Errorf("specified column does not exist in table: %v", k)
}
}
newGroupKey := execute.NewGroupKey(keyCols, keyValues)
builder, created := t.cache.TableBuilder(newGroupKey)
groupKeyString := newGroupKey.String()
if created {
for _, c := range cols {
builder.AddCol(c)
}
t.colKeyMaps[groupKeyString] = make(map[string]int)
t.rowKeyMaps[groupKeyString] = make(map[string]int)
t.nextCol = len(cols)
t.nextRow = 0
}
tbl.Do(func(cr query.ColReader) error {
for row := 0; row < cr.Len(); row++ {
rowKey := ""
colKey := ""
for j, c := range cr.Cols() {
if _, ok := rowKeyIndex[c.Label]; ok {
rowKey += valueToStr(cr, c, row, j)
} else if _, ok := colKeyIndex[c.Label]; ok {
if colKey == "" {
colKey = valueToStr(cr, c, row, j)
} else {
colKey = colKey + "_" + valueToStr(cr, c, row, j)
}
}
}
// we have columns for the copy-over in place;
// we know the row key;
// we know the col key;
// 0. If we've not seen the colKey before, then we need to add a new column and backfill it.
if _, ok := t.colKeyMaps[groupKeyString][colKey]; !ok {
newCol := query.ColMeta{
Label: colKey,
Type: valueColType,
}
builder.AddCol(newCol)
growColumn(builder, newCol.Type, t.nextCol, builder.NRows())
t.colKeyMaps[groupKeyString][colKey] = t.nextCol
t.nextCol++
}
// 1. if we've not seen rowKey before, then we need to append a new row, with copied values for the
// existing columns, as well as zero values for the pivoted columns.
if _, ok := t.rowKeyMaps[groupKeyString][rowKey]; !ok {
// rowkey U groupKey cols
for cidx, c := range cols {
appendBuilderValue(cr, builder, c.Type, row, colMap[cidx], cidx)
}
// zero-out the known key columns we've already discovered.
for _, v := range t.colKeyMaps[groupKeyString] {
growColumn(builder, valueColType, v, 1)
}
t.rowKeyMaps[groupKeyString][rowKey] = t.nextRow
t.nextRow++
}
// at this point, we've created, added and back-filled all the columns we know about
// if we found a new row key, we added a new row with zeroes set for all the value columns
// so in all cases we know the row exists, and the column exists. we need to grab the
// value from valueCol and assign it to its pivoted position.
setBuilderValue(cr, builder, valueColType, row, valueColIndex, t.rowKeyMaps[groupKeyString][rowKey],
t.colKeyMaps[groupKeyString][colKey])
}
return nil
})
return nil
}
func growColumn(builder execute.TableBuilder, colType query.DataType, colIdx, nRows int) {
switch colType {
case query.TBool:
builder.GrowBools(colIdx, nRows)
case query.TInt:
builder.GrowInts(colIdx, nRows)
case query.TUInt:
builder.GrowUInts(colIdx, nRows)
case query.TFloat:
builder.GrowFloats(colIdx, nRows)
case query.TString:
builder.GrowStrings(colIdx, nRows)
case query.TTime:
builder.GrowTimes(colIdx, nRows)
default:
execute.PanicUnknownType(colType)
}
}
func setBuilderValue(cr query.ColReader, builder execute.TableBuilder, readerColType query.DataType, readerRowIndex, readerColIndex, builderRow, builderCol int) {
switch readerColType {
case query.TBool:
builder.SetBool(builderRow, builderCol, cr.Bools(readerColIndex)[readerRowIndex])
case query.TInt:
builder.SetInt(builderRow, builderCol, cr.Ints(readerColIndex)[readerRowIndex])
case query.TUInt:
builder.SetUInt(builderRow, builderCol, cr.UInts(readerColIndex)[readerRowIndex])
case query.TFloat:
builder.SetFloat(builderRow, builderCol, cr.Floats(readerColIndex)[readerRowIndex])
case query.TString:
builder.SetString(builderRow, builderCol, cr.Strings(readerColIndex)[readerRowIndex])
case query.TTime:
builder.SetTime(builderRow, builderCol, cr.Times(readerColIndex)[readerRowIndex])
default:
execute.PanicUnknownType(readerColType)
}
}
func appendBuilderValue(cr query.ColReader, builder execute.TableBuilder, readerColType query.DataType, readerRowIndex, readerColIndex, builderColIndex int) {
switch readerColType {
case query.TBool:
builder.AppendBool(builderColIndex, cr.Bools(readerColIndex)[readerRowIndex])
case query.TInt:
builder.AppendInt(builderColIndex, cr.Ints(readerColIndex)[readerRowIndex])
case query.TUInt:
builder.AppendUInt(builderColIndex, cr.UInts(readerColIndex)[readerRowIndex])
case query.TFloat:
builder.AppendFloat(builderColIndex, cr.Floats(readerColIndex)[readerRowIndex])
case query.TString:
builder.AppendString(builderColIndex, cr.Strings(readerColIndex)[readerRowIndex])
case query.TTime:
builder.AppendTime(builderColIndex, cr.Times(readerColIndex)[readerRowIndex])
default:
execute.PanicUnknownType(readerColType)
}
}
func valueToStr(cr query.ColReader, c query.ColMeta, row, col int) string {
switch c.Type {
case query.TBool:
return strconv.FormatBool(cr.Bools(col)[row])
case query.TInt:
return strconv.FormatInt(cr.Ints(col)[row], 10)
case query.TUInt:
return strconv.FormatUint(cr.UInts(col)[row], 10)
case query.TFloat:
return strconv.FormatFloat(cr.Floats(col)[row], 'E', -1, 64)
case query.TString:
return cr.Strings(col)[row]
case query.TTime:
return cr.Times(col)[row].String()
default:
execute.PanicUnknownType(c.Type)
}
return ""
}
func (t *pivotTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
return t.d.UpdateWatermark(mark)
}
func (t *pivotTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error {
return t.d.UpdateProcessingTime(pt)
}
func (t *pivotTransformation) Finish(id execute.DatasetID, err error) {
t.d.Finish(err)
}