package functions import ( "fmt" "strconv" "github.com/influxdata/platform/query" "github.com/influxdata/platform/query/execute" "github.com/influxdata/platform/query/interpreter" "github.com/influxdata/platform/query/plan" "github.com/influxdata/platform/query/semantic" "github.com/influxdata/platform/query/values" ) const PivotKind = "pivot" type PivotOpSpec struct { RowKey []string `json:"rowKey"` ColKey []string `json:"colKey"` ValueCol string `json:"valueCol"` } var pivotSignature = query.DefaultFunctionSignature() var fromRowsBuiltin = ` // fromRows will access a database and retrieve data aligned into time-aligned tuples, grouped by measurement. fromRows = (db) => from(db:db) |> pivot(rowKey:["_time"], colKey: ["_field"], valueCol: "_value") ` func init() { pivotSignature.Params["rowKey"] = semantic.Array pivotSignature.Params["colKey"] = semantic.Array pivotSignature.Params["valueCol"] = semantic.String query.RegisterFunction(PivotKind, createPivotOpSpec, pivotSignature) query.RegisterBuiltIn("fromRows", fromRowsBuiltin) query.RegisterOpSpec(PivotKind, newPivotOp) plan.RegisterProcedureSpec(PivotKind, newPivotProcedure, PivotKind) execute.RegisterTransformation(PivotKind, createPivotTransformation) } func createPivotOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) { if err := a.AddParentFromArgs(args); err != nil { return nil, err } spec := &PivotOpSpec{} array, err := args.GetRequiredArray("rowKey", semantic.String) if err != nil { return nil, err } spec.RowKey, err = interpreter.ToStringArray(array) if err != nil { return nil, err } array, err = args.GetRequiredArray("colKey", semantic.String) if err != nil { return nil, err } spec.ColKey, err = interpreter.ToStringArray(array) if err != nil { return nil, err } rowKeys := make(map[string]bool) for _, v := range spec.RowKey { rowKeys[v] = true } for _, v := range spec.ColKey { if _, ok := rowKeys[v]; ok { return nil, fmt.Errorf("column name found in both rowKey and colKey: %s", v) } } valueCol, err := args.GetRequiredString("valueCol") if err != nil { return nil, err } spec.ValueCol = valueCol return spec, nil } func newPivotOp() query.OperationSpec { return new(PivotOpSpec) } func (s *PivotOpSpec) Kind() query.OperationKind { return PivotKind } type PivotProcedureSpec struct { RowKey []string ColKey []string ValueCol string } func newPivotProcedure(qs query.OperationSpec, pa plan.Administration) (plan.ProcedureSpec, error) { spec, ok := qs.(*PivotOpSpec) if !ok { return nil, fmt.Errorf("invalid spec type %T", qs) } p := &PivotProcedureSpec{ RowKey: spec.RowKey, ColKey: spec.ColKey, ValueCol: spec.ValueCol, } return p, nil } func (s *PivotProcedureSpec) Kind() plan.ProcedureKind { return PivotKind } func (s *PivotProcedureSpec) Copy() plan.ProcedureSpec { ns := new(PivotProcedureSpec) ns.RowKey = make([]string, len(s.RowKey)) copy(ns.RowKey, s.RowKey) ns.ColKey = make([]string, len(s.ColKey)) copy(ns.ColKey, s.ColKey) ns.ValueCol = s.ValueCol return ns } func createPivotTransformation(id execute.DatasetID, mode execute.AccumulationMode, spec plan.ProcedureSpec, a execute.Administration) (execute.Transformation, execute.Dataset, error) { s, ok := spec.(*PivotProcedureSpec) if !ok { return nil, nil, fmt.Errorf("invalid spec type %T", spec) } cache := execute.NewTableBuilderCache(a.Allocator()) d := execute.NewDataset(id, mode, cache) t := NewPivotTransformation(d, cache, s) return t, d, nil } type pivotTransformation struct { d execute.Dataset cache execute.TableBuilderCache spec PivotProcedureSpec // for each table, we need to store a map to keep track of which rows/columns have already been created. colKeyMaps map[string]map[string]int rowKeyMaps map[string]map[string]int nextCol int nextRow int } func NewPivotTransformation(d execute.Dataset, cache execute.TableBuilderCache, spec *PivotProcedureSpec) *pivotTransformation { t := &pivotTransformation{ d: d, cache: cache, spec: *spec, colKeyMaps: make(map[string]map[string]int), rowKeyMaps: make(map[string]map[string]int), } return t } func (t *pivotTransformation) RetractTable(id execute.DatasetID, key query.GroupKey) error { return t.d.RetractTable(key) } func (t *pivotTransformation) Process(id execute.DatasetID, tbl query.Table) error { rowKeyIndex := make(map[string]int) for _, v := range t.spec.RowKey { idx := execute.ColIdx(v, tbl.Cols()) if idx < 0 { return fmt.Errorf("specified column does not exist in table: %v", v) } rowKeyIndex[v] = idx } // different from above because we'll get the column indices below when we // determine the initial column schema colKeyIndex := make(map[string]int) valueColIndex := -1 var valueColType query.DataType for _, v := range t.spec.ColKey { colKeyIndex[v] = -1 } cols := make([]query.ColMeta, 0, len(tbl.Cols())) keyCols := make([]query.ColMeta, 0, len(tbl.Key().Cols())) keyValues := make([]values.Value, 0, len(tbl.Key().Cols())) newIDX := 0 colMap := make([]int, len(tbl.Cols())) for colIDX, v := range tbl.Cols() { if _, ok := colKeyIndex[v.Label]; !ok && v.Label != t.spec.ValueCol { // the columns we keep are: group key columns not in the column key and row key columns if tbl.Key().HasCol(v.Label) { colMap[newIDX] = colIDX newIDX++ keyCols = append(keyCols, tbl.Cols()[colIDX]) cols = append(cols, tbl.Cols()[colIDX]) keyValues = append(keyValues, tbl.Key().LabelValue(v.Label)) } else if _, ok := rowKeyIndex[v.Label]; ok { cols = append(cols, tbl.Cols()[colIDX]) colMap[newIDX] = colIDX newIDX++ } } else if v.Label == t.spec.ValueCol { valueColIndex = colIDX valueColType = tbl.Cols()[colIDX].Type } else { // we need the location of the colKey columns in the original table colKeyIndex[v.Label] = colIDX } } for k, v := range colKeyIndex { if v < 0 { return fmt.Errorf("specified column does not exist in table: %v", k) } } newGroupKey := execute.NewGroupKey(keyCols, keyValues) builder, created := t.cache.TableBuilder(newGroupKey) groupKeyString := newGroupKey.String() if created { for _, c := range cols { builder.AddCol(c) } t.colKeyMaps[groupKeyString] = make(map[string]int) t.rowKeyMaps[groupKeyString] = make(map[string]int) t.nextCol = len(cols) t.nextRow = 0 } tbl.Do(func(cr query.ColReader) error { for row := 0; row < cr.Len(); row++ { rowKey := "" colKey := "" for j, c := range cr.Cols() { if _, ok := rowKeyIndex[c.Label]; ok { rowKey += valueToStr(cr, c, row, j) } else if _, ok := colKeyIndex[c.Label]; ok { if colKey == "" { colKey = valueToStr(cr, c, row, j) } else { colKey = colKey + "_" + valueToStr(cr, c, row, j) } } } // we have columns for the copy-over in place; // we know the row key; // we know the col key; // 0. If we've not seen the colKey before, then we need to add a new column and backfill it. if _, ok := t.colKeyMaps[groupKeyString][colKey]; !ok { newCol := query.ColMeta{ Label: colKey, Type: valueColType, } builder.AddCol(newCol) growColumn(builder, newCol.Type, t.nextCol, builder.NRows()) t.colKeyMaps[groupKeyString][colKey] = t.nextCol t.nextCol++ } // 1. if we've not seen rowKey before, then we need to append a new row, with copied values for the // existing columns, as well as zero values for the pivoted columns. if _, ok := t.rowKeyMaps[groupKeyString][rowKey]; !ok { // rowkey U groupKey cols for cidx, c := range cols { appendBuilderValue(cr, builder, c.Type, row, colMap[cidx], cidx) } // zero-out the known key columns we've already discovered. for _, v := range t.colKeyMaps[groupKeyString] { growColumn(builder, valueColType, v, 1) } t.rowKeyMaps[groupKeyString][rowKey] = t.nextRow t.nextRow++ } // at this point, we've created, added and back-filled all the columns we know about // if we found a new row key, we added a new row with zeroes set for all the value columns // so in all cases we know the row exists, and the column exists. we need to grab the // value from valueCol and assign it to its pivoted position. setBuilderValue(cr, builder, valueColType, row, valueColIndex, t.rowKeyMaps[groupKeyString][rowKey], t.colKeyMaps[groupKeyString][colKey]) } return nil }) return nil } func growColumn(builder execute.TableBuilder, colType query.DataType, colIdx, nRows int) { switch colType { case query.TBool: builder.GrowBools(colIdx, nRows) case query.TInt: builder.GrowInts(colIdx, nRows) case query.TUInt: builder.GrowUInts(colIdx, nRows) case query.TFloat: builder.GrowFloats(colIdx, nRows) case query.TString: builder.GrowStrings(colIdx, nRows) case query.TTime: builder.GrowTimes(colIdx, nRows) default: execute.PanicUnknownType(colType) } } func setBuilderValue(cr query.ColReader, builder execute.TableBuilder, readerColType query.DataType, readerRowIndex, readerColIndex, builderRow, builderCol int) { switch readerColType { case query.TBool: builder.SetBool(builderRow, builderCol, cr.Bools(readerColIndex)[readerRowIndex]) case query.TInt: builder.SetInt(builderRow, builderCol, cr.Ints(readerColIndex)[readerRowIndex]) case query.TUInt: builder.SetUInt(builderRow, builderCol, cr.UInts(readerColIndex)[readerRowIndex]) case query.TFloat: builder.SetFloat(builderRow, builderCol, cr.Floats(readerColIndex)[readerRowIndex]) case query.TString: builder.SetString(builderRow, builderCol, cr.Strings(readerColIndex)[readerRowIndex]) case query.TTime: builder.SetTime(builderRow, builderCol, cr.Times(readerColIndex)[readerRowIndex]) default: execute.PanicUnknownType(readerColType) } } func appendBuilderValue(cr query.ColReader, builder execute.TableBuilder, readerColType query.DataType, readerRowIndex, readerColIndex, builderColIndex int) { switch readerColType { case query.TBool: builder.AppendBool(builderColIndex, cr.Bools(readerColIndex)[readerRowIndex]) case query.TInt: builder.AppendInt(builderColIndex, cr.Ints(readerColIndex)[readerRowIndex]) case query.TUInt: builder.AppendUInt(builderColIndex, cr.UInts(readerColIndex)[readerRowIndex]) case query.TFloat: builder.AppendFloat(builderColIndex, cr.Floats(readerColIndex)[readerRowIndex]) case query.TString: builder.AppendString(builderColIndex, cr.Strings(readerColIndex)[readerRowIndex]) case query.TTime: builder.AppendTime(builderColIndex, cr.Times(readerColIndex)[readerRowIndex]) default: execute.PanicUnknownType(readerColType) } } func valueToStr(cr query.ColReader, c query.ColMeta, row, col int) string { switch c.Type { case query.TBool: return strconv.FormatBool(cr.Bools(col)[row]) case query.TInt: return strconv.FormatInt(cr.Ints(col)[row], 10) case query.TUInt: return strconv.FormatUint(cr.UInts(col)[row], 10) case query.TFloat: return strconv.FormatFloat(cr.Floats(col)[row], 'E', -1, 64) case query.TString: return cr.Strings(col)[row] case query.TTime: return cr.Times(col)[row].String() default: execute.PanicUnknownType(c.Type) } return "" } func (t *pivotTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error { return t.d.UpdateWatermark(mark) } func (t *pivotTransformation) UpdateProcessingTime(id execute.DatasetID, pt execute.Time) error { return t.d.UpdateProcessingTime(pt) } func (t *pivotTransformation) Finish(id execute.DatasetID, err error) { t.d.Finish(err) }