Update subqueries so groupings are propagated to inner queries

Previously, only time expressions got propagated inwards. The reason for this was simple. If the outer query was going to filter to a specific time range, then it would be unnecessary for the inner query to output points within that time frame. It started as an optimization, but became a feature because there was no reason to have the user repeat the same time clause for the inner query as the outer query. So we allowed an aggregate query with an interval to pass validation in the subquery if the outer query had a time range. But `GROUP BY` clauses were not propagated because that same logic didn't apply to them. It's not an optimization there. So while grouping by a tag in the outer query without grouping by it in the inner query was useless, there wasn't any particular reason to care. Then a bug was found where wildcards would propagate the dimensions correctly, but the outer query containing a group by with the inner query omitting it wouldn't correctly filter out the outer group by. We could fix that filtering, but on further review, I had been seeing people make that same mistake a lot. People seem to just believe that the grouping should be propagated inwards. Instead of trying to fight what the user wanted and explicitly erase groupings that weren't propagated manually, we might as well just propagate them for the user to make their lives easier. There is no useful situation where you would want to group into buckets that can't physically exist so we might as well do _something_ useful. This will also now propagate time intervals to inner queries since the same applies there. But, while the interval propagates, the following query will not pass validation since it is still not possible to use a grouping interval with a raw query (even if the inner query is an aggregate): SELECT * FROM (SELECT mean(value) FROM cpu) WHERE time > now() - 5m GROUP BY time(1m) This also means wildcards will behave a bit differently. They will retrieve dimensions from the sources in the inner query rather than just using the dimensions in the group by. Fixing top() and bottom() to return the correct auxiliary fields. Unfortunately, we were not copying the buffer with the auxiliary fields so those values would be overwritten by a later point.
2017-01-17 13:48:20 -06:00 · 2017-01-17 13:48:20 -06:00 · 3d4d9062a0
parent a67b5457f5
commit 3d4d9062a0
7 changed files with 144 additions and 27 deletions
--- a/cmd/influxd/run/server_test.go
+++ b/cmd/influxd/run/server_test.go
@ -4705,6 +4705,11 @@ func TestServer_Query_Subqueries(t *testing.T) {
 			command: `SELECT min(value) FROM (SELECT max(usage_user), usage_user - usage_system AS value FROM cpu GROUP BY host) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z'`,
 			exp:     `{"results":[{"statement_id":0,"series":[{"name":"cpu","columns":["time","min"],"values":[["2000-01-01T00:00:10Z",-44]]}]}]}`,
 		},
+		&Query{
+			params:  url.Values{"db": []string{"db0"}},
+			command: `SELECT min(value) FROM (SELECT top(usage_user, 2), usage_user - usage_system AS value FROM cpu) WHERE time >= '2000-01-01T00:00:00Z' AND time < '2000-01-01T00:00:30Z' GROUP BY host`,
+			exp:     `{"results":[{"statement_id":0,"series":[{"name":"cpu","tags":{"host":"server01"},"columns":["time","min"],"values":[["2000-01-01T00:00:10Z",-10]]},{"name":"cpu","tags":{"host":"server02"},"columns":["time","min"],"values":[["2000-01-01T00:00:10Z",-44]]}]}]}`,
+		},
 	}...)

 	for i, query := range test.queries {
--- a/influxql/ast.go
+++ b/influxql/ast.go
@ -1922,7 +1922,7 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
 					return fmt.Errorf("invalid group interval: %v", err)
 				}

-				if c, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 {
+				if c, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery {
 					return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name)
 				} else if !ok && groupByInterval > 0 {
 					return fmt.Errorf("aggregate function required inside the call to %s", expr.Name)
@ -1983,7 +1983,7 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
 					return fmt.Errorf("invalid group interval: %v", err)
 				}

-				if _, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 {
+				if _, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery {
 					return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name)
 				} else if !ok {
 					return fmt.Errorf("must use aggregate function with %s", expr.Name)
@ -2043,6 +2043,11 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error {

 	// If we have an aggregate function with a group by time without a where clause, it's an invalid statement
 	if tr == targetNotRequired { // ignore create continuous query statements
+		if err := s.validateTimeExpression(); err != nil {
+			return err
+		}
+	}
+	if tr != targetSubquery {
 		if err := s.validateGroupByInterval(); err != nil {
 			return err
 		}
@ -2050,10 +2055,10 @@ func (s *SelectStatement) validateAggregates(tr targetRequirement) error {
 	return nil
 }

-// validateGroupByInterval ensures that any select statements that have a group
+// validateTimeExpression ensures that any select statements that have a group
 // by interval either have a time expression limiting the time range or have a
 // parent query that does that.
-func (s *SelectStatement) validateGroupByInterval() error {
+func (s *SelectStatement) validateTimeExpression() error {
 	// If we have a time expression, we and all subqueries are fine.
 	if HasTimeExpr(s.Condition) {
 		return nil
@ -2072,6 +2077,44 @@ func (s *SelectStatement) validateGroupByInterval() error {
 	// statement, we don't need to do this because parent time ranges propagate
 	// to children. So we only execute this when there is no time condition in
 	// the parent.
+	for _, source := range s.Sources {
+		switch source := source.(type) {
+		case *SubQuery:
+			if err := source.Statement.validateTimeExpression(); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// validateGroupByInterval ensures that a select statement is grouped by an
+// interval if it contains certain functions.
+func (s *SelectStatement) validateGroupByInterval() error {
+	interval, err := s.GroupByInterval()
+	if err != nil {
+		return err
+	} else if interval > 0 {
+		// If we have an interval here, that means the interval will propagate
+		// into any subqueries and we can just stop looking.
+		return nil
+	}
+
+	// Check inside of the fields for any of the specific functions that ned a group by interval.
+	for _, f := range s.Fields {
+		switch expr := f.Expr.(type) {
+		case *Call:
+			switch expr.Name {
+			case "derivative", "non_negative_derivative", "difference", "moving_average", "cumulative_sum", "elapsed", "holt_winters", "holt_winters_with_fit":
+				// If the first argument is a call, we needed a group by interval and we don't have one.
+				if _, ok := expr.Args[0].(*Call); ok {
+					return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name)
+				}
+			}
+		}
+	}
+
+	// Validate the subqueries.
 	for _, source := range s.Sources {
 		switch source := source.(type) {
 		case *SubQuery:
@ -4503,11 +4546,14 @@ func FieldDimensions(sources Sources, m FieldMapper) (fields map[string]DataType
 					fields[k] = typ
 				}
 			}
-			for _, d := range src.Statement.Dimensions {
-				switch d := d.Expr.(type) {
-				case *VarRef:
-					dimensions[d.Val] = struct{}{}
-				}
+
+			_, d, err := FieldDimensions(src.Statement.Sources, m)
+			if err != nil {
+				return nil, nil, err
+			}
+
+			for k := range d {
+				dimensions[k] = struct{}{}
 			}
 		}
 	}
--- a/influxql/ast_test.go
+++ b/influxql/ast_test.go
@ -411,7 +411,7 @@ func TestSelectStatement_RewriteFields(t *testing.T) {
 		// Rewrite subquery
 		{
 			stmt:    `SELECT * FROM (SELECT mean(value1) FROM cpu GROUP BY host) GROUP BY *`,
-			rewrite: `SELECT mean::float FROM (SELECT mean(value1::float) FROM cpu GROUP BY host) GROUP BY host`,
+			rewrite: `SELECT mean::float FROM (SELECT mean(value1::float) FROM cpu GROUP BY host) GROUP BY host, region`,
 		},
 	}

--- a/influxql/functions.gen.go
+++ b/influxql/functions.gen.go
@ -96,7 +96,7 @@ func NewFloatSliceFuncReducer(fn FloatReduceSliceFunc) *FloatSliceFuncReducer {
 // AggregateFloat copies the FloatPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *FloatSliceFuncReducer) AggregateFloat(p *FloatPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateFloatBulk performs a bulk copy of FloatPoints into the internal slice.
@ -166,7 +166,7 @@ func NewFloatSliceFuncIntegerReducer(fn FloatReduceIntegerSliceFunc) *FloatSlice
 // AggregateFloat copies the FloatPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *FloatSliceFuncIntegerReducer) AggregateFloat(p *FloatPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateFloatBulk performs a bulk copy of FloatPoints into the internal slice.
@ -236,7 +236,7 @@ func NewFloatSliceFuncStringReducer(fn FloatReduceStringSliceFunc) *FloatSliceFu
 // AggregateFloat copies the FloatPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *FloatSliceFuncStringReducer) AggregateFloat(p *FloatPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateFloatBulk performs a bulk copy of FloatPoints into the internal slice.
@ -306,7 +306,7 @@ func NewFloatSliceFuncBooleanReducer(fn FloatReduceBooleanSliceFunc) *FloatSlice
 // AggregateFloat copies the FloatPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *FloatSliceFuncBooleanReducer) AggregateFloat(p *FloatPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateFloatBulk performs a bulk copy of FloatPoints into the internal slice.
@ -510,7 +510,7 @@ func NewIntegerSliceFuncFloatReducer(fn IntegerReduceFloatSliceFunc) *IntegerSli
 // AggregateInteger copies the IntegerPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *IntegerSliceFuncFloatReducer) AggregateInteger(p *IntegerPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateIntegerBulk performs a bulk copy of IntegerPoints into the internal slice.
@ -580,7 +580,7 @@ func NewIntegerSliceFuncReducer(fn IntegerReduceSliceFunc) *IntegerSliceFuncRedu
 // AggregateInteger copies the IntegerPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *IntegerSliceFuncReducer) AggregateInteger(p *IntegerPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateIntegerBulk performs a bulk copy of IntegerPoints into the internal slice.
@ -650,7 +650,7 @@ func NewIntegerSliceFuncStringReducer(fn IntegerReduceStringSliceFunc) *IntegerS
 // AggregateInteger copies the IntegerPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *IntegerSliceFuncStringReducer) AggregateInteger(p *IntegerPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateIntegerBulk performs a bulk copy of IntegerPoints into the internal slice.
@ -720,7 +720,7 @@ func NewIntegerSliceFuncBooleanReducer(fn IntegerReduceBooleanSliceFunc) *Intege
 // AggregateInteger copies the IntegerPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *IntegerSliceFuncBooleanReducer) AggregateInteger(p *IntegerPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateIntegerBulk performs a bulk copy of IntegerPoints into the internal slice.
@ -924,7 +924,7 @@ func NewStringSliceFuncFloatReducer(fn StringReduceFloatSliceFunc) *StringSliceF
 // AggregateString copies the StringPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *StringSliceFuncFloatReducer) AggregateString(p *StringPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateStringBulk performs a bulk copy of StringPoints into the internal slice.
@ -994,7 +994,7 @@ func NewStringSliceFuncIntegerReducer(fn StringReduceIntegerSliceFunc) *StringSl
 // AggregateString copies the StringPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *StringSliceFuncIntegerReducer) AggregateString(p *StringPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateStringBulk performs a bulk copy of StringPoints into the internal slice.
@ -1064,7 +1064,7 @@ func NewStringSliceFuncReducer(fn StringReduceSliceFunc) *StringSliceFuncReducer
 // AggregateString copies the StringPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *StringSliceFuncReducer) AggregateString(p *StringPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateStringBulk performs a bulk copy of StringPoints into the internal slice.
@ -1134,7 +1134,7 @@ func NewStringSliceFuncBooleanReducer(fn StringReduceBooleanSliceFunc) *StringSl
 // AggregateString copies the StringPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *StringSliceFuncBooleanReducer) AggregateString(p *StringPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateStringBulk performs a bulk copy of StringPoints into the internal slice.
@ -1338,7 +1338,7 @@ func NewBooleanSliceFuncFloatReducer(fn BooleanReduceFloatSliceFunc) *BooleanSli
 // AggregateBoolean copies the BooleanPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *BooleanSliceFuncFloatReducer) AggregateBoolean(p *BooleanPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateBooleanBulk performs a bulk copy of BooleanPoints into the internal slice.
@ -1408,7 +1408,7 @@ func NewBooleanSliceFuncIntegerReducer(fn BooleanReduceIntegerSliceFunc) *Boolea
 // AggregateBoolean copies the BooleanPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *BooleanSliceFuncIntegerReducer) AggregateBoolean(p *BooleanPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateBooleanBulk performs a bulk copy of BooleanPoints into the internal slice.
@ -1478,7 +1478,7 @@ func NewBooleanSliceFuncStringReducer(fn BooleanReduceStringSliceFunc) *BooleanS
 // AggregateBoolean copies the BooleanPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *BooleanSliceFuncStringReducer) AggregateBoolean(p *BooleanPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateBooleanBulk performs a bulk copy of BooleanPoints into the internal slice.
@ -1548,7 +1548,7 @@ func NewBooleanSliceFuncReducer(fn BooleanReduceSliceFunc) *BooleanSliceFuncRedu
 // AggregateBoolean copies the BooleanPoint into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *BooleanSliceFuncReducer) AggregateBoolean(p *BooleanPoint) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // AggregateBooleanBulk performs a bulk copy of BooleanPoints into the internal slice.
--- a/influxql/functions.gen.go.tmpl
+++ b/influxql/functions.gen.go.tmpl
@ -94,7 +94,7 @@ func New{{$k.Name}}SliceFunc{{if ne $k.Name $v.Name}}{{$v.Name}}{{end}}Reducer(f
 // Aggregate{{$k.Name}} copies the {{$k.Name}}Point into the internal slice to be passed
 // to the reduce function when Emit is called.
 func (r *{{$k.Name}}SliceFunc{{if ne $k.Name $v.Name}}{{$v.Name}}{{end}}Reducer) Aggregate{{$k.Name}}(p *{{$k.Name}}Point) {
-	r.points = append(r.points, *p)
+	r.points = append(r.points, *p.Clone())
 }

 // Aggregate{{$k.Name}}Bulk performs a bulk copy of {{$k.Name}}Points into the internal slice.
--- a/influxql/iterator.go
+++ b/influxql/iterator.go
@ -755,7 +755,11 @@ func newIteratorOptionsSubstatement(stmt *SelectStatement, opt IteratorOptions)
 	if subOpt.EndTime > opt.EndTime {
 		subOpt.EndTime = opt.EndTime
 	}
+	// Propagate the dimensions to the inner subquery.
 	subOpt.Dimensions = opt.Dimensions
+	for d := range opt.GroupBy {
+		subOpt.GroupBy[d] = struct{}{}
+	}
 	subOpt.InterruptCh = opt.InterruptCh

 	// Propagate the SLIMIT and SOFFSET from the outer query.
@ -778,6 +782,12 @@ func newIteratorOptionsSubstatement(stmt *SelectStatement, opt IteratorOptions)
 		return IteratorOptions{}, err
 	}
 	subOpt.Ordered = opt.Ordered && (interval == 0 && stmt.HasSelector())
+
+	// If there is no interval for this subquery, but the outer query has an
+	// interval, inherit the parent interval.
+	if interval == 0 {
+		subOpt.Interval = opt.Interval
+	}
 	return subOpt, nil
 }

--- a/influxql/parser_test.go
+++ b/influxql/parser_test.go
@ -1248,6 +1248,61 @@ func TestParser_ParseStatement(t *testing.T) {
 			},
 		},

+		{
+			s: `SELECT sum(derivative) FROM (SELECT derivative(mean(value)) FROM cpu GROUP BY host) WHERE time >= now() - 1d GROUP BY time(1h)`,
+			stmt: &influxql.SelectStatement{
+				Fields: []*influxql.Field{{
+					Expr: &influxql.Call{
+						Name: "sum",
+						Args: []influxql.Expr{
+							&influxql.VarRef{Val: "derivative"},
+						}},
+				}},
+				Dimensions: []*influxql.Dimension{{
+					Expr: &influxql.Call{
+						Name: "time",
+						Args: []influxql.Expr{
+							&influxql.DurationLiteral{Val: time.Hour},
+						},
+					},
+				}},
+				Sources: []influxql.Source{
+					&influxql.SubQuery{
+						Statement: &influxql.SelectStatement{
+							Fields: []*influxql.Field{{
+								Expr: &influxql.Call{
+									Name: "derivative",
+									Args: []influxql.Expr{
+										&influxql.Call{
+											Name: "mean",
+											Args: []influxql.Expr{
+												&influxql.VarRef{Val: "value"},
+											},
+										},
+									},
+								},
+							}},
+							Dimensions: []*influxql.Dimension{{
+								Expr: &influxql.VarRef{Val: "host"},
+							}},
+							Sources: []influxql.Source{
+								&influxql.Measurement{Name: "cpu"},
+							},
+						},
+					},
+				},
+				Condition: &influxql.BinaryExpr{
+					Op:  influxql.GTE,
+					LHS: &influxql.VarRef{Val: "time"},
+					RHS: &influxql.BinaryExpr{
+						Op:  influxql.SUB,
+						LHS: &influxql.Call{Name: "now"},
+						RHS: &influxql.DurationLiteral{Val: 24 * time.Hour},
+					},
+				},
+			},
+		},
+
 		// See issues https://github.com/influxdata/influxdb/issues/1647
 		// and https://github.com/influxdata/influxdb/issues/4404
 		// DELETE statement
@ -2434,6 +2489,7 @@ func TestParser_ParseStatement(t *testing.T) {
 		{s: `SELECT derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`},
 		{s: `SELECT derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`},
 		{s: `SELECT derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`},
+		{s: `SELECT min(derivative) FROM (SELECT derivative(mean(value), 1h) FROM myseries) where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`},
 		{s: `SELECT non_negative_derivative(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`},
 		{s: `select non_negative_derivative() from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 0`},
 		{s: `select non_negative_derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 3`},