Add Duplicate function (#575)

* Add duplicate schema mutation
pull/10616/head
Adam Perlin 2018-08-17 12:12:00 -07:00 committed by GitHub
parent ce6195150a
commit 96d11c3d9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 289 additions and 19 deletions

View File

@ -1145,6 +1145,26 @@ Additionally exactly two columns must be provided to the `columns` property.
Count is an aggregate operation. Count is an aggregate operation.
For each aggregated column, it outputs the number of non null records as an integer. For each aggregated column, it outputs the number of non null records as an integer.
#### Duplicate
Duplicate will duplicate a specified column in a table
Duplicate has the following properties:
* `column` string
The column to duplicate
* `as` string
The name that should be assigned to the duplicate column
Example usage:
Duplicate column `server` under the name `host`:
```
from(db: "telegraf")
|> range(start:-5m)
|> filter(fn: (r) => r._measurement == "cpu")
|> duplicate(column: "host", as: "server")
```
##### Integral ##### Integral
Integral is an aggregate operation. Integral is an aggregate operation.

View File

@ -15,6 +15,7 @@ import (
const RenameKind = "rename" const RenameKind = "rename"
const DropKind = "drop" const DropKind = "drop"
const KeepKind = "keep" const KeepKind = "keep"
const DuplicateKind = "duplicate"
type RenameOpSpec struct { type RenameOpSpec struct {
Cols map[string]string `json:"columns"` Cols map[string]string `json:"columns"`
@ -31,6 +32,11 @@ type KeepOpSpec struct {
Predicate *semantic.FunctionExpression `json:"fn"` Predicate *semantic.FunctionExpression `json:"fn"`
} }
type DuplicateOpSpec struct {
Col string `json:"columns"`
As string `json:"as"`
}
// The base kind for SchemaMutations // The base kind for SchemaMutations
const SchemaMutationKind = "SchemaMutation" const SchemaMutationKind = "SchemaMutation"
@ -95,6 +101,15 @@ var Registrars = []MutationRegistrar{
Create: createKeepOpSpec, Create: createKeepOpSpec,
New: newKeepOp, New: newKeepOp,
}, },
{
Kind: DuplicateKind,
Args: map[string]semantic.Type{
"column": semantic.String,
"as": semantic.String,
},
Create: createDuplicateOpSpec,
New: newDuplicateOp,
},
} }
func init() { func init() {
@ -257,6 +272,27 @@ func createKeepOpSpec(args query.Arguments, a *query.Administration) (query.Oper
}, nil }, nil
} }
func createDuplicateOpSpec(args query.Arguments, a *query.Administration) (query.OperationSpec, error) {
if err := a.AddParentFromArgs(args); err != nil {
return nil, err
}
col, err := args.GetRequiredString("column")
if err != nil {
return nil, err
}
newName, err := args.GetRequiredString("as")
if err != nil {
return nil, err
}
return &DuplicateOpSpec{
Col: col,
As: newName,
}, nil
}
func newRenameOp() query.OperationSpec { func newRenameOp() query.OperationSpec {
return new(RenameOpSpec) return new(RenameOpSpec)
} }
@ -281,6 +317,14 @@ func (s *KeepOpSpec) Kind() query.OperationKind {
return KeepKind return KeepKind
} }
func newDuplicateOp() query.OperationSpec {
return new(DuplicateOpSpec)
}
func (s *DuplicateOpSpec) Kind() query.OperationKind {
return DuplicateKind
}
func (s *RenameOpSpec) Copy() SchemaMutation { func (s *RenameOpSpec) Copy() SchemaMutation {
newCols := make(map[string]string, len(s.Cols)) newCols := make(map[string]string, len(s.Cols))
for k, v := range s.Cols { for k, v := range s.Cols {
@ -317,6 +361,13 @@ func (s *KeepOpSpec) Copy() SchemaMutation {
} }
} }
func (s *DuplicateOpSpec) Copy() SchemaMutation {
return &DuplicateOpSpec{
Col: s.Col,
As: s.As,
}
}
func (s *RenameOpSpec) Mutator() (SchemaMutator, error) { func (s *RenameOpSpec) Mutator() (SchemaMutator, error) {
m, err := NewRenameMutator(s) m, err := NewRenameMutator(s)
if err != nil { if err != nil {
@ -341,6 +392,14 @@ func (s *KeepOpSpec) Mutator() (SchemaMutator, error) {
return m, nil return m, nil
} }
func (s *DuplicateOpSpec) Mutator() (SchemaMutator, error) {
m, err := NewDuplicateMutator(s)
if err != nil {
return nil, err
}
return m, nil
}
type SchemaMutationProcedureSpec struct { type SchemaMutationProcedureSpec struct {
Mutations []SchemaMutation Mutations []SchemaMutation
} }

View File

@ -17,7 +17,7 @@ import (
"github.com/influxdata/platform/query/querytest" "github.com/influxdata/platform/query/querytest"
) )
func TestDropRenameKeep_NewQueries(t *testing.T) { func TestSchemaMutions_NewQueries(t *testing.T) {
tests := []querytest.NewQueryTestCase{ tests := []querytest.NewQueryTestCase{
{ {
Name: "test rename query", Name: "test rename query",
@ -111,6 +111,37 @@ func TestDropRenameKeep_NewQueries(t *testing.T) {
}, },
}, },
}, },
{
Name: "test duplicate query",
Raw: `from(db:"mydb") |> duplicate(column: "col1", as: "col1_new") |> sum()`,
Want: &query.Spec{
Operations: []*query.Operation{
{
ID: "from0",
Spec: &functions.FromOpSpec{
Database: "mydb",
},
},
{
ID: "duplicate1",
Spec: &functions.DuplicateOpSpec{
Col: "col1",
As: "col1_new",
},
},
{
ID: "sum2",
Spec: &functions.SumOpSpec{
AggregateConfig: execute.DefaultAggregateConfig,
},
},
},
Edges: []query.Edge{
{Parent: "from0", Child: "duplicate1"},
{Parent: "duplicate1", Child: "sum2"},
},
},
},
{ {
Name: "test drop query fn param", Name: "test drop query fn param",
Raw: `from(db:"mydb") |> drop(fn: (col) => col =~ /reg*/) |> sum()`, Raw: `from(db:"mydb") |> drop(fn: (col) => col =~ /reg*/) |> sum()`,
@ -246,7 +277,14 @@ func TestDropRenameKeep_NewQueries(t *testing.T) {
Want: nil, Want: nil,
WantErr: true, WantErr: true,
}, },
{
Name: "test duplicate query invalid",
Raw: `from(db:"mydb") |> duplicate(columns: ["a", "b"], n: -1) |> sum()`,
Want: nil,
WantErr: true,
},
} }
for _, tc := range tests { for _, tc := range tests {
tc := tc tc := tc
t.Run(tc.Name, func(t *testing.T) { t.Run(tc.Name, func(t *testing.T) {
@ -367,6 +405,42 @@ func TestDropRenameKeep_Process(t *testing.T) {
}, },
}}, }},
}, },
{
name: "duplicate single col",
spec: &functions.SchemaMutationProcedureSpec{
Mutations: []functions.SchemaMutation{
&functions.DuplicateOpSpec{
Col: "a",
As: "a_1",
},
},
},
data: []query.Table{&executetest.Table{
ColMeta: []query.ColMeta{
{Label: "a", Type: query.TFloat},
{Label: "b", Type: query.TFloat},
{Label: "c", Type: query.TFloat},
},
Data: [][]interface{}{
{1.0, 2.0, 3.0},
{11.0, 12.0, 13.0},
{21.0, 22.0, 23.0},
},
}},
want: []*executetest.Table{{
ColMeta: []query.ColMeta{
{Label: "a", Type: query.TFloat},
{Label: "a_1", Type: query.TFloat},
{Label: "b", Type: query.TFloat},
{Label: "c", Type: query.TFloat},
},
Data: [][]interface{}{
{1.0, 1.0, 2.0, 3.0},
{11.0, 11.0, 12.0, 13.0},
{21.0, 21.0, 22.0, 23.0},
},
}},
},
{ {
name: "rename map fn (col) => name", name: "rename map fn (col) => name",
spec: &functions.SchemaMutationProcedureSpec{ spec: &functions.SchemaMutationProcedureSpec{
@ -604,6 +678,31 @@ func TestDropRenameKeep_Process(t *testing.T) {
want: []*executetest.Table(nil), want: []*executetest.Table(nil),
wantErr: errors.New(`keep error: column "no_exist" doesn't exist`), wantErr: errors.New(`keep error: column "no_exist" doesn't exist`),
}, },
{
name: "duplicate no exist",
spec: &functions.SchemaMutationProcedureSpec{
Mutations: []functions.SchemaMutation{
&functions.DuplicateOpSpec{
Col: "no_exist",
As: "no_exist_2",
},
},
},
data: []query.Table{&executetest.Table{
ColMeta: []query.ColMeta{
{Label: "server1", Type: query.TFloat},
{Label: "local", Type: query.TFloat},
{Label: "server2", Type: query.TFloat},
},
Data: [][]interface{}{
{1.0, 2.0, 3.0},
{11.0, 12.0, 13.0},
{21.0, 22.0, 23.0},
},
}},
want: []*executetest.Table(nil),
wantErr: errors.New(`duplicate error: column "no_exist" doesn't exist`),
},
{ {
name: "rename group key", name: "rename group key",
spec: &functions.SchemaMutationProcedureSpec{ spec: &functions.SchemaMutationProcedureSpec{

View File

@ -88,6 +88,13 @@ func toStringSet(arr []string) map[string]bool {
return set return set
} }
func checkCol(label string, cols []query.ColMeta) error {
if execute.ColIdx(label, cols) < 0 {
return fmt.Errorf(`column "%s" doesn't exist`, label)
}
return nil
}
type RenameMutator struct { type RenameMutator struct {
Cols map[string]string Cols map[string]string
Fn compiler.Func Fn compiler.Func
@ -120,17 +127,6 @@ func NewRenameMutator(qs query.OperationSpec) (*RenameMutator, error) {
return m, nil return m, nil
} }
func (m *RenameMutator) checkColumnReferences(cols []query.ColMeta) error {
if m.Cols != nil {
for c := range m.Cols {
if execute.ColIdx(c, cols) < 0 {
return fmt.Errorf(`rename error: column "%s" doesn't exist`, c)
}
}
}
return nil
}
func (m *RenameMutator) renameCol(col *query.ColMeta) error { func (m *RenameMutator) renameCol(col *query.ColMeta) error {
if col == nil { if col == nil {
return errors.New("rename error: cannot rename nil column") return errors.New("rename error: cannot rename nil column")
@ -150,8 +146,17 @@ func (m *RenameMutator) renameCol(col *query.ColMeta) error {
return nil return nil
} }
func (m *RenameMutator) checkColumns(tableCols []query.ColMeta) error {
for c := range m.Cols {
if err := checkCol(c, tableCols); err != nil {
return errors.Wrap(err, "rename error")
}
}
return nil
}
func (m *RenameMutator) Mutate(ctx *BuilderContext) error { func (m *RenameMutator) Mutate(ctx *BuilderContext) error {
if err := m.checkColumnReferences(ctx.Cols()); err != nil { if err := m.checkColumns(ctx.Cols()); err != nil {
return err return err
} }
@ -225,21 +230,23 @@ func NewDropKeepMutator(qs query.OperationSpec) (*DropKeepMutator, error) {
return m, nil return m, nil
} }
func (m *DropKeepMutator) checkColumnReferences(cols []query.ColMeta) error { func (m *DropKeepMutator) checkColumns(tableCols []query.ColMeta) error {
if m.DropCols != nil { if m.DropCols != nil {
for c := range m.DropCols { for c := range m.DropCols {
if execute.ColIdx(c, cols) < 0 { if err := checkCol(c, tableCols); err != nil {
return fmt.Errorf(`drop error: column "%s" doesn't exist`, c) return errors.Wrap(err, "drop error")
} }
} }
} }
if m.KeepCols != nil { if m.KeepCols != nil {
for c := range m.KeepCols { for c := range m.KeepCols {
if execute.ColIdx(c, cols) < 0 { if err := checkCol(c, tableCols); err != nil {
return fmt.Errorf(`keep error: column "%s" doesn't exist`, c) return errors.Wrap(err, "keep error")
} }
} }
} }
return nil return nil
} }
@ -282,7 +289,7 @@ func (m *DropKeepMutator) keepToDropCols(cols []query.ColMeta) {
} }
func (m *DropKeepMutator) Mutate(ctx *BuilderContext) error { func (m *DropKeepMutator) Mutate(ctx *BuilderContext) error {
if err := m.checkColumnReferences(ctx.Cols()); err != nil { if err := m.checkColumns(ctx.Cols()); err != nil {
return err return err
} }
@ -318,6 +325,55 @@ func (m *DropKeepMutator) Mutate(ctx *BuilderContext) error {
return nil return nil
} }
type DuplicateMutator struct {
Col string
As string
}
func NewDuplicateMutator(qs query.OperationSpec) (*DuplicateMutator, error) {
s, ok := qs.(*DuplicateOpSpec)
if !ok {
return nil, fmt.Errorf("invalid spec type %T", qs)
}
return &DuplicateMutator{
Col: s.Col,
As: s.As,
}, nil
}
func (m *DuplicateMutator) Mutate(ctx *BuilderContext) error {
if err := checkCol(m.Col, ctx.Cols()); err != nil {
return errors.Wrap(err, "duplicate error")
}
newCols := make([]query.ColMeta, 0, len(ctx.Cols())+1)
newColMap := make([]int, 0, len(ctx.Cols())+1)
oldColMap := ctx.ColMap()
for i, c := range ctx.Cols() {
newCols = append(newCols, c)
newColMap = append(newColMap, oldColMap[i])
if c.Label == m.Col {
newCols = append(newCols, duplicate(c, m.As))
newColMap = append(newColMap, oldColMap[i])
}
}
ctx.TableColumns = newCols
ctx.ColIdxMap = newColMap
return nil
}
func duplicate(col query.ColMeta, dupName string) query.ColMeta {
return query.ColMeta{
Type: col.Type,
Label: dupName,
}
}
// TODO: determine pushdown rules // TODO: determine pushdown rules
/* /*
func (s *SchemaMutationProcedureSpec) PushDownRules() []plan.PushDownRule { func (s *SchemaMutationProcedureSpec) PushDownRules() []plan.PushDownRule {

View File

@ -0,0 +1,3 @@
from(db: "test")
|> range(start:2018-05-22T19:53:26Z)
|> duplicate(column: "host", as: "host_new")

View File

@ -0,0 +1,16 @@
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string,string,string,string
#group,false,false,false,false,false,false,true,true,true,true
#default,_result,,,,,,,,,
,result,table,_start,_stop,_time,_value,_field,_measurement,cpu,host
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:26Z,0,usage_guest,cpu,cpu-total,host.local
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:36Z,0,usage_guest,cpu,cpu-total,host.local
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:46Z,0,usage_guest,cpu,cpu-total,host.local
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:56Z,0,usage_guest,cpu,cpu-total,host.local
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:06Z,0,usage_guest,cpu,cpu-total,host.local
,,0,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:16Z,0,usage_guest,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:26Z,0,usage_guest_nice,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:36Z,0,usage_guest_nice,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:46Z,0,usage_guest_nice,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:56Z,0,usage_guest_nice,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:06Z,0,usage_guest_nice,cpu,cpu-total,host.local
,,1,2018-05-22T19:53:24.421470485Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:16Z,0,usage_guest_nice,cpu,cpu-total,host.local
1 #datatype string long dateTime:RFC3339 dateTime:RFC3339 dateTime:RFC3339 double string string string string
2 #group false false false false false false true true true true
3 #default _result
4 result table _start _stop _time _value _field _measurement cpu host
5 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:26Z 0 usage_guest cpu cpu-total host.local
6 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:36Z 0 usage_guest cpu cpu-total host.local
7 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:46Z 0 usage_guest cpu cpu-total host.local
8 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:56Z 0 usage_guest cpu cpu-total host.local
9 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:06Z 0 usage_guest cpu cpu-total host.local
10 0 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:16Z 0 usage_guest cpu cpu-total host.local
11 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:26Z 0 usage_guest_nice cpu cpu-total host.local
12 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:36Z 0 usage_guest_nice cpu cpu-total host.local
13 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:46Z 0 usage_guest_nice cpu cpu-total host.local
14 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:56Z 0 usage_guest_nice cpu cpu-total host.local
15 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:06Z 0 usage_guest_nice cpu cpu-total host.local
16 1 2018-05-22T19:53:24.421470485Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:16Z 0 usage_guest_nice cpu cpu-total host.local

View File

@ -0,0 +1,17 @@
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string,string,string,string,string
#group,false,false,false,false,false,false,true,true,true,true,false
#default,_result,,,,,,,,,,
,result,table,_start,_stop,_time,_value,_field,_measurement,cpu,host,host_new
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:26Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:36Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:46Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:56Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:06Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,0,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:16Z,0,usage_guest,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:26Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:36Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:46Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:53:56Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:06Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
,,1,2018-05-22T19:53:26Z,2018-05-22T19:54:24.421470485Z,2018-05-22T19:54:16Z,0,usage_guest_nice,cpu,cpu-total,host.local,host.local
1 #datatype string long dateTime:RFC3339 dateTime:RFC3339 dateTime:RFC3339 double string string string string string
2 #group false false false false false false true true true true false
3 #default _result
4 result table _start _stop _time _value _field _measurement cpu host host_new
5 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:26Z 0 usage_guest cpu cpu-total host.local host.local
6 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:36Z 0 usage_guest cpu cpu-total host.local host.local
7 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:46Z 0 usage_guest cpu cpu-total host.local host.local
8 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:56Z 0 usage_guest cpu cpu-total host.local host.local
9 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:06Z 0 usage_guest cpu cpu-total host.local host.local
10 0 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:16Z 0 usage_guest cpu cpu-total host.local host.local
11 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:26Z 0 usage_guest_nice cpu cpu-total host.local host.local
12 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:36Z 0 usage_guest_nice cpu cpu-total host.local host.local
13 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:46Z 0 usage_guest_nice cpu cpu-total host.local host.local
14 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:53:56Z 0 usage_guest_nice cpu cpu-total host.local host.local
15 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:06Z 0 usage_guest_nice cpu cpu-total host.local host.local
16 1 2018-05-22T19:53:26Z 2018-05-22T19:54:24.421470485Z 2018-05-22T19:54:16Z 0 usage_guest_nice cpu cpu-total host.local host.local