diff --git a/influxql/ast.go b/influxql/ast.go index d7d2c182e6..66eef1b951 100644 --- a/influxql/ast.go +++ b/influxql/ast.go @@ -1789,538 +1789,6 @@ func (s *SelectStatement) HasDimensionWildcard() bool { return false } -func (s *SelectStatement) validate(tr targetRequirement) error { - if err := s.validateFields(); err != nil { - return err - } - - if err := s.validateDimensions(); err != nil { - return err - } - - if err := s.validateDistinct(); err != nil { - return err - } - - if err := s.validateTopBottom(); err != nil { - return err - } - - if err := s.validateAggregates(tr); err != nil { - return err - } - - if err := s.validateFill(); err != nil { - return err - } - - return nil -} - -func (s *SelectStatement) validateFields() error { - ns := s.NamesInSelect() - if len(ns) == 1 && ns[0] == "time" { - return fmt.Errorf("at least 1 non-time field must be queried") - } - - for _, f := range s.Fields { - switch expr := f.Expr.(type) { - case *BinaryExpr: - if err := expr.validate(); err != nil { - return err - } - } - } - return nil -} - -func (s *SelectStatement) validateDimensions() error { - var dur time.Duration - for _, dim := range s.Dimensions { - switch expr := dim.Expr.(type) { - case *Call: - // Ensure the call is time() and it has one or two duration arguments. - // If we already have a duration - if expr.Name != "time" { - return errors.New("only time() calls allowed in dimensions") - } else if got := len(expr.Args); got < 1 || got > 2 { - return errors.New("time dimension expected 1 or 2 arguments") - } else if lit, ok := expr.Args[0].(*DurationLiteral); !ok { - return errors.New("time dimension must have duration argument") - } else if dur != 0 { - return errors.New("multiple time dimensions not allowed") - } else { - dur = lit.Val - if len(expr.Args) == 2 { - switch lit := expr.Args[1].(type) { - case *DurationLiteral: - // noop - case *Call: - if lit.Name != "now" { - return errors.New("time dimension offset function must be now()") - } else if len(lit.Args) != 0 { - return errors.New("time dimension offset now() function requires no arguments") - } - default: - return errors.New("time dimension offset must be duration or now()") - } - } - } - case *VarRef: - if strings.ToLower(expr.Val) == "time" { - return errors.New("time() is a function and expects at least one argument") - } - case *Wildcard: - case *RegexLiteral: - default: - return errors.New("only time and tag dimensions allowed") - } - } - return nil -} - -// validSelectWithAggregate determines if a SELECT statement has the correct -// combination of aggregate functions combined with selected fields and tags -// Currently we don't have support for all aggregates, but aggregates that -// can be combined with fields/tags are: -// TOP, BOTTOM, MAX, MIN, FIRST, LAST -func (s *SelectStatement) validSelectWithAggregate() error { - calls := map[string]struct{}{} - numAggregates := 0 - for _, f := range s.Fields { - fieldCalls := walkFunctionCalls(f.Expr) - for _, c := range fieldCalls { - calls[c.Name] = struct{}{} - } - if len(fieldCalls) != 0 { - numAggregates++ - } - } - // For TOP, BOTTOM, MAX, MIN, FIRST, LAST, PERCENTILE (selector functions) it is ok to ask for fields and tags - // but only if one function is specified. Combining multiple functions and fields and tags is not currently supported - onlySelectors := true - for k := range calls { - switch k { - case "top", "bottom", "max", "min", "first", "last", "percentile", "sample": - default: - onlySelectors = false - break - } - } - if onlySelectors { - // If they only have one selector, they can have as many fields or tags as they want - if numAggregates == 1 { - return nil - } - // If they have multiple selectors, they are not allowed to have any other fields or tags specified - if numAggregates > 1 && len(s.Fields) != numAggregates { - return fmt.Errorf("mixing multiple selector functions with tags or fields is not supported") - } - } - - if numAggregates != 0 && numAggregates != len(s.Fields) { - return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported") - } - return nil -} - -// validTopBottomAggr determines if TOP or BOTTOM aggregates have valid arguments. -func (s *SelectStatement) validTopBottomAggr(expr *Call) error { - if exp, got := 2, len(expr.Args); got < exp { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d, got %d", expr.Name, exp, got) - } - if len(expr.Args) > 1 { - callLimit, ok := expr.Args[len(expr.Args)-1].(*IntegerLiteral) - if !ok { - return fmt.Errorf("expected integer as last argument in %s(), found %s", expr.Name, expr.Args[len(expr.Args)-1]) - } - // Check if they asked for a limit smaller than what they passed into the call - if int64(callLimit.Val) > int64(s.Limit) && s.Limit != 0 { - return fmt.Errorf("limit (%d) in %s function can not be larger than the LIMIT (%d) in the select statement", int64(callLimit.Val), expr.Name, int64(s.Limit)) - } - - for _, v := range expr.Args[:len(expr.Args)-1] { - if _, ok := v.(*VarRef); !ok { - return fmt.Errorf("only fields or tags are allowed in %s(), found %s", expr.Name, v) - } - } - } - return nil -} - -// validPercentileAggr determines if the call to PERCENTILE has valid arguments. -func (s *SelectStatement) validPercentileAggr(expr *Call) error { - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 2, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - - switch expr.Args[0].(type) { - case *VarRef, *RegexLiteral, *Wildcard: - // do nothing - default: - return fmt.Errorf("expected field argument in percentile()") - } - - switch expr.Args[1].(type) { - case *IntegerLiteral, *NumberLiteral: - return nil - default: - return fmt.Errorf("expected float argument in percentile()") - } -} - -// validPercentileAggr determines if the call to SAMPLE has valid arguments. -func (s *SelectStatement) validSampleAggr(expr *Call) error { - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 2, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - - switch expr.Args[0].(type) { - case *VarRef, *RegexLiteral, *Wildcard: - // do nothing - default: - return fmt.Errorf("expected field argument in sample()") - } - - switch expr.Args[1].(type) { - case *IntegerLiteral: - return nil - default: - return fmt.Errorf("expected integer argument in sample()") - } -} - -func (s *SelectStatement) validateAggregates(tr targetRequirement) error { - for _, f := range s.Fields { - for _, expr := range walkFunctionCalls(f.Expr) { - switch expr.Name { - case "derivative", "non_negative_derivative", "difference", "non_negative_difference", "moving_average", "cumulative_sum", "elapsed": - if err := s.validSelectWithAggregate(); err != nil { - return err - } - switch expr.Name { - case "derivative", "non_negative_derivative", "elapsed": - if min, max, got := 1, 2, len(expr.Args); got > max || got < min { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", expr.Name, min, max, got) - } - // If a duration arg is passed, make sure it's a duration - if len(expr.Args) == 2 { - // Second must be a duration .e.g (1h) - if _, ok := expr.Args[1].(*DurationLiteral); !ok { - return fmt.Errorf("second argument to %s must be a duration, got %T", expr.Name, expr.Args[1]) - } - } - case "difference", "non_negative_difference", "cumulative_sum": - if got := len(expr.Args); got != 1 { - return fmt.Errorf("invalid number of arguments for %s, expected 1, got %d", expr.Name, got) - } - case "moving_average": - if got := len(expr.Args); got != 2 { - return fmt.Errorf("invalid number of arguments for moving_average, expected 2, got %d", got) - } - - if lit, ok := expr.Args[1].(*IntegerLiteral); !ok { - return fmt.Errorf("second argument for moving_average must be an integer, got %T", expr.Args[1]) - } else if lit.Val <= 1 { - return fmt.Errorf("moving_average window must be greater than 1, got %d", lit.Val) - } else if int64(int(lit.Val)) != lit.Val { - return fmt.Errorf("moving_average window too large, got %d", lit.Val) - } - } - // Validate that if they have grouping by time, they need a sub-call like min/max, etc. - groupByInterval, err := s.GroupByInterval() - if err != nil { - return fmt.Errorf("invalid group interval: %v", err) - } - - if c, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } else if !ok && groupByInterval > 0 { - return fmt.Errorf("aggregate function required inside the call to %s", expr.Name) - } else if ok { - switch c.Name { - case "top", "bottom": - if err := s.validTopBottomAggr(c); err != nil { - return err - } - case "percentile": - if err := s.validPercentileAggr(c); err != nil { - return err - } - default: - if exp, got := 1, len(c.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", c.Name, exp, got) - } - - switch fc := c.Args[0].(type) { - case *VarRef, *Wildcard, *RegexLiteral: - // do nothing - case *Call: - if fc.Name != "distinct" || expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", c.Name) - } else if exp, got := 1, len(fc.Args); got != exp { - return fmt.Errorf("count(distinct %s) can only have %d argument(s), got %d", fc.Name, exp, got) - } else if _, ok := fc.Args[0].(*VarRef); !ok { - return fmt.Errorf("expected field argument in distinct()") - } - case *Distinct: - if expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", c.Name) - } - default: - return fmt.Errorf("expected field argument in %s()", c.Name) - } - } - } - case "top", "bottom": - if err := s.validTopBottomAggr(expr); err != nil { - return err - } - case "percentile": - if err := s.validPercentileAggr(expr); err != nil { - return err - } - case "sample": - if err := s.validSampleAggr(expr); err != nil { - return err - } - case "integral": - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if min, max, got := 1, 2, len(expr.Args); got > max || got < min { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", expr.Name, min, max, got) - } - // If a duration arg is passed, make sure it's a duration - if len(expr.Args) == 2 { - // Second must be a duration .e.g (1h) - if _, ok := expr.Args[1].(*DurationLiteral); !ok { - return errors.New("second argument must be a duration") - } - } - case "holt_winters", "holt_winters_with_fit": - if exp, got := 3, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - // Validate that if they have grouping by time, they need a sub-call like min/max, etc. - groupByInterval, err := s.GroupByInterval() - if err != nil { - return fmt.Errorf("invalid group interval: %v", err) - } - - if _, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } else if !ok { - return fmt.Errorf("must use aggregate function with %s", expr.Name) - } - if arg, ok := expr.Args[1].(*IntegerLiteral); !ok { - return fmt.Errorf("expected integer argument as second arg in %s", expr.Name) - } else if arg.Val <= 0 { - return fmt.Errorf("second arg to %s must be greater than 0, got %d", expr.Name, arg.Val) - } - if _, ok := expr.Args[2].(*IntegerLiteral); !ok { - return fmt.Errorf("expected integer argument as third arg in %s", expr.Name) - } - default: - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 1, len(expr.Args); got != exp { - // Special error message if distinct was used as the argument. - if expr.Name == "count" && got >= 1 { - if _, ok := expr.Args[0].(*Distinct); ok { - return fmt.Errorf("count(distinct ) can only have one argument") - } - } - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - switch fc := expr.Args[0].(type) { - case *VarRef, *Wildcard, *RegexLiteral: - // do nothing - case *Call: - if fc.Name != "distinct" || expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", expr.Name) - } else if exp, got := 1, len(fc.Args); got != exp { - return fmt.Errorf("count(distinct ) can only have one argument") - } else if _, ok := fc.Args[0].(*VarRef); !ok { - return fmt.Errorf("expected field argument in distinct()") - } - case *Distinct: - if expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", expr.Name) - } - default: - return fmt.Errorf("expected field argument in %s()", expr.Name) - } - } - } - } - - // Check that we have valid duration and where clauses for aggregates - - // fetch the group by duration - groupByDuration, _ := s.GroupByInterval() - - // If we have a group by interval, but no aggregate function, it's an invalid statement - if s.IsRawQuery && groupByDuration > 0 { - return fmt.Errorf("GROUP BY requires at least one aggregate function") - } - - // If we have an aggregate function with a group by time without a where clause, it's an invalid statement - if tr == targetNotRequired { // ignore create continuous query statements - if err := s.validateTimeExpression(); err != nil { - return err - } - } - if tr != targetSubquery { - if err := s.validateGroupByInterval(); err != nil { - return err - } - } - return nil -} - -// validateFill ensures that the fill option matches the query type. -func (s *SelectStatement) validateFill() error { - info := newSelectInfo(s) - if len(info.calls) == 0 { - switch s.Fill { - case NoFill: - return errors.New("fill(none) must be used with a function") - case LinearFill: - return errors.New("fill(linear) must be used with a function") - } - } - return nil -} - -// validateTimeExpression ensures that any select statements that have a group -// by interval either have a time expression limiting the time range or have a -// parent query that does that. -func (s *SelectStatement) validateTimeExpression() error { - // If we have a time expression, we and all subqueries are fine. - if HasTimeExpr(s.Condition) { - return nil - } - - // Check if this is not a raw query and if the group by duration exists. - // If these are true, then we have an error. - interval, err := s.GroupByInterval() - if err != nil { - return err - } else if !s.IsRawQuery && interval > 0 { - return fmt.Errorf("aggregate functions with GROUP BY time require a WHERE time clause") - } - - // Validate the subqueries. If we have a time expression in this select - // statement, we don't need to do this because parent time ranges propagate - // to children. So we only execute this when there is no time condition in - // the parent. - for _, source := range s.Sources { - switch source := source.(type) { - case *SubQuery: - if err := source.Statement.validateTimeExpression(); err != nil { - return err - } - } - } - return nil -} - -// validateGroupByInterval ensures that a select statement is grouped by an -// interval if it contains certain functions. -func (s *SelectStatement) validateGroupByInterval() error { - interval, err := s.GroupByInterval() - if err != nil { - return err - } else if interval > 0 { - // If we have an interval here, that means the interval will propagate - // into any subqueries and we can just stop looking. - return nil - } - - // Check inside of the fields for any of the specific functions that ned a group by interval. - for _, f := range s.Fields { - switch expr := f.Expr.(type) { - case *Call: - switch expr.Name { - case "derivative", "non_negative_derivative", "difference", "non_negative_difference", "moving_average", "cumulative_sum", "elapsed", "holt_winters", "holt_winters_with_fit": - // If the first argument is a call, we needed a group by interval and we don't have one. - if _, ok := expr.Args[0].(*Call); ok { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } - } - } - } - - // Validate the subqueries. - for _, source := range s.Sources { - switch source := source.(type) { - case *SubQuery: - if err := source.Statement.validateGroupByInterval(); err != nil { - return err - } - } - } - return nil -} - -// HasDistinct checks if a select statement contains a call to DISTINCT. -func (s *SelectStatement) HasDistinct() bool { - for _, f := range s.Fields { - switch c := f.Expr.(type) { - case *Call: - if c.Name == "distinct" { - return true - } - case *Distinct: - return true - } - } - return false -} - -func (s *SelectStatement) validateDistinct() error { - if !s.HasDistinct() { - return nil - } - - if len(s.Fields) > 1 { - return fmt.Errorf("aggregate function distinct() cannot be combined with other functions or fields") - } - - switch c := s.Fields[0].Expr.(type) { - case *Call: - if len(c.Args) == 0 { - return fmt.Errorf("distinct function requires at least one argument") - } - - if len(c.Args) != 1 { - return fmt.Errorf("distinct function can only have one argument") - } - } - return nil -} - -func (s *SelectStatement) validateTopBottom() error { - // Ensure there are not multiple calls if top/bottom is present. - info := newSelectInfo(s) - if len(info.calls) > 1 { - for call := range info.calls { - if call.Name == "top" || call.Name == "bottom" { - return fmt.Errorf("selector function %s() cannot be combined with other functions", call.Name) - } - } - } - return nil -} - // GroupByInterval extracts the time interval, if specified. func (s *SelectStatement) GroupByInterval() (time.Duration, error) { // return if we've already pulled it out @@ -4129,52 +3597,6 @@ func (e *BinaryExpr) String() string { return fmt.Sprintf("%s %s %s", e.LHS.String(), e.Op.String(), e.RHS.String()) } -func (e *BinaryExpr) validate() error { - v := binaryExprValidator{} - Walk(&v, e) - if v.err != nil { - return v.err - } else if v.calls && v.refs { - return errors.New("binary expressions cannot mix aggregates and raw fields") - } - return nil -} - -type binaryExprValidator struct { - calls bool - refs bool - err error -} - -func (v *binaryExprValidator) Visit(n Node) Visitor { - if v.err != nil { - return nil - } - - switch n := n.(type) { - case *Call: - v.calls = true - - if n.Name == "top" || n.Name == "bottom" { - v.err = fmt.Errorf("cannot use %s() inside of a binary expression", n.Name) - return nil - } - - for _, expr := range n.Args { - switch e := expr.(type) { - case *BinaryExpr: - v.err = e.validate() - return nil - } - } - return nil - case *VarRef: - v.refs = true - return nil - } - return v -} - // BinaryExprName returns the name of a binary expression by concatenating // the variables in the binary expression with underscores. func BinaryExprName(expr *BinaryExpr) string { diff --git a/influxql/ast_test.go b/influxql/ast_test.go index 3fb5fa6833..18e1e8a700 100644 --- a/influxql/ast_test.go +++ b/influxql/ast_test.go @@ -1666,29 +1666,6 @@ func TestSources_HasSystemSource(t *testing.T) { } } -// Parse statements that might appear valid but should return an error. -// If allowed to execute, at least some of these statements would result in a panic. -func TestParse_Errors(t *testing.T) { - for _, tt := range []struct { - tmpl string - good string - bad string - }{ - // Second argument to derivative must be duration - {tmpl: `SELECT derivative(f, %s) FROM m`, good: "1h", bad: "true"}, - } { - good := fmt.Sprintf(tt.tmpl, tt.good) - if _, err := influxql.ParseStatement(good); err != nil { - t.Fatalf("statement %q should have parsed correctly but returned error: %s", good, err) - } - - bad := fmt.Sprintf(tt.tmpl, tt.bad) - if _, err := influxql.ParseStatement(bad); err == nil { - t.Fatalf("statement %q should have resulted in a parse error but did not", bad) - } - } -} - // This test checks to ensure that we have given thought to the database // context required for security checks. If a new statement is added, this // test will fail until it is categorized into the correct bucket below. diff --git a/influxql/parser.go b/influxql/parser.go index 36a7670137..f4f4f2ca9c 100644 --- a/influxql/parser.go +++ b/influxql/parser.go @@ -791,10 +791,6 @@ func (p *Parser) parseSelectStatement(tr targetRequirement) (*SelectStatement, e } }) - if err := stmt.validate(tr); err != nil { - return nil, err - } - return stmt, nil } diff --git a/influxql/parser_test.go b/influxql/parser_test.go index 58de3c1e07..a3b310c99d 100644 --- a/influxql/parser_test.go +++ b/influxql/parser_test.go @@ -142,7 +142,7 @@ func TestParser_ParseStatement(t *testing.T) { // SELECT statement { - s: fmt.Sprintf(`SELECT mean(field1), sum(field2) ,count(field3) AS field_x FROM myseries WHERE host = 'hosta.influxdb.org' and time > '%s' GROUP BY time(10h) ORDER BY DESC LIMIT 20 OFFSET 10;`, now.UTC().Format(time.RFC3339Nano)), + s: fmt.Sprintf(`SELECT mean(field1), sum(field2), count(field3) AS field_x FROM myseries WHERE host = 'hosta.influxdb.org' and time > '%s' GROUP BY time(10h) ORDER BY DESC LIMIT 20 OFFSET 10;`, now.UTC().Format(time.RFC3339Nano)), stmt: &influxql.SelectStatement{ IsRawQuery: false, Fields: []*influxql.Field{ @@ -2795,47 +2795,12 @@ func TestParser_ParseStatement(t *testing.T) { // Errors {s: ``, err: `found EOF, expected SELECT, DELETE, SHOW, CREATE, DROP, GRANT, REVOKE, ALTER, SET, KILL at line 1, char 1`}, {s: `SELECT`, err: `found EOF, expected identifier, string, number, bool at line 1, char 8`}, - {s: `SELECT time FROM myseries`, err: `at least 1 non-time field must be queried`}, {s: `blah blah`, err: `found blah, expected SELECT, DELETE, SHOW, CREATE, DROP, GRANT, REVOKE, ALTER, SET, KILL at line 1, char 1`}, {s: `SELECT field1 X`, err: `found X, expected FROM at line 1, char 15`}, {s: `SELECT field1 FROM "series" WHERE X +;`, err: `found ;, expected identifier, string, number, bool at line 1, char 38`}, {s: `SELECT field1 FROM myseries GROUP`, err: `found EOF, expected BY at line 1, char 35`}, {s: `SELECT field1 FROM myseries LIMIT`, err: `found EOF, expected integer at line 1, char 35`}, {s: `SELECT field1 FROM myseries LIMIT 10.5`, err: `found 10.5, expected integer at line 1, char 35`}, - {s: `SELECT count(max(value)) FROM myseries`, err: `expected field argument in count()`}, - {s: `SELECT count(distinct('value')) FROM myseries`, err: `expected field argument in distinct()`}, - {s: `SELECT distinct('value') FROM myseries`, err: `expected field argument in distinct()`}, - {s: `SELECT min(max(value)) FROM myseries`, err: `expected field argument in min()`}, - {s: `SELECT min(distinct(value)) FROM myseries`, err: `expected field argument in min()`}, - {s: `SELECT max(max(value)) FROM myseries`, err: `expected field argument in max()`}, - {s: `SELECT sum(max(value)) FROM myseries`, err: `expected field argument in sum()`}, - {s: `SELECT first(max(value)) FROM myseries`, err: `expected field argument in first()`}, - {s: `SELECT last(max(value)) FROM myseries`, err: `expected field argument in last()`}, - {s: `SELECT mean(max(value)) FROM myseries`, err: `expected field argument in mean()`}, - {s: `SELECT median(max(value)) FROM myseries`, err: `expected field argument in median()`}, - {s: `SELECT mode(max(value)) FROM myseries`, err: `expected field argument in mode()`}, - {s: `SELECT stddev(max(value)) FROM myseries`, err: `expected field argument in stddev()`}, - {s: `SELECT spread(max(value)) FROM myseries`, err: `expected field argument in spread()`}, - {s: `SELECT top() FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 0`}, - {s: `SELECT top(field1) FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT top(field1,foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, - {s: `SELECT top(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, - {s: `SELECT top(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found 5`}, - {s: `SELECT top(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found max(foo)`}, - {s: `SELECT top(value, 10) + count(value) FROM myseries`, err: `cannot use top() inside of a binary expression`}, - {s: `SELECT top(max(value), 10) FROM myseries`, err: `only fields or tags are allowed in top(), found max(value)`}, - {s: `SELECT bottom() FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 0`}, - {s: `SELECT bottom(field1) FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT bottom(field1,foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, - {s: `SELECT bottom(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, - {s: `SELECT bottom(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found 5`}, - {s: `SELECT bottom(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(foo)`}, - {s: `SELECT bottom(value, 10) + count(value) FROM myseries`, err: `cannot use bottom() inside of a binary expression`}, - {s: `SELECT bottom(max(value), 10) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(value)`}, - {s: `SELECT percentile() FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 0`}, - {s: `SELECT percentile(field1) FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT percentile(field1, foo) FROM myseries`, err: `expected float argument in percentile()`}, - {s: `SELECT percentile(max(field1), 75) FROM myseries`, err: `expected field argument in percentile()`}, {s: `SELECT field1 FROM myseries OFFSET`, err: `found EOF, expected integer at line 1, char 36`}, {s: `SELECT field1 FROM myseries OFFSET 10.5`, err: `found 10.5, expected integer at line 1, char 36`}, {s: `SELECT field1 FROM myseries ORDER`, err: `found EOF, expected BY at line 1, char 35`}, @@ -2845,99 +2810,16 @@ func TestParser_ParseStatement(t *testing.T) { {s: `SELECT field1 FROM myseries ORDER BY time ASC,`, err: `found EOF, expected identifier at line 1, char 47`}, {s: `SELECT field1 FROM myseries ORDER BY time, field1`, err: `only ORDER BY time supported at this time`}, {s: `SELECT field1 AS`, err: `found EOF, expected identifier at line 1, char 18`}, - {s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`}, - {s: `SELECT field1 FROM foo fill(none)`, err: `fill(none) must be used with a function`}, - {s: `SELECT field1 FROM foo fill(linear)`, err: `fill(linear) must be used with a function`}, - {s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT count(value)/10, value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time(500ms)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time`, err: `time() is a function and expects at least one argument`}, - {s: `SELECT count(value) FROM foo group by 'time'`, err: `only time and tag dimensions allowed`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time()`, err: `time dimension expected 1 or 2 arguments`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(b)`, err: `time dimension must have duration argument`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s), time(2s)`, err: `multiple time dimensions not allowed`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, b)`, err: `time dimension offset must be duration or now()`}, {s: `SELECT field1 FROM 12`, err: `found 12, expected identifier at line 1, char 20`}, {s: `SELECT 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 FROM myseries`, err: `unable to parse integer at line 1, char 8`}, {s: `SELECT 10.5h FROM myseries`, err: `found h, expected FROM at line 1, char 12`}, - {s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, - {s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, - {s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`}, - {s: `SELECT distinct() FROM myseries`, err: `distinct function requires at least one argument`}, {s: `SELECT distinct FROM myseries`, err: `found FROM, expected identifier at line 1, char 17`}, - {s: `SELECT distinct field1, field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, {s: `SELECT count(distinct) FROM myseries`, err: `found ), expected (, identifier at line 1, char 22`}, - {s: `SELECT count(distinct field1, field2) FROM myseries`, err: `count(distinct ) can only have one argument`}, - {s: `select count(distinct(too, many, arguments)) from myseries`, err: `count(distinct ) can only have one argument`}, - {s: `select count() from myseries`, err: `invalid number of arguments for count, expected 1, got 0`}, - {s: `SELECT derivative(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`}, - {s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`}, - {s: `SELECT derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to derivative`}, - {s: `SELECT derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT min(derivative) FROM (SELECT derivative(mean(value), 1h) FROM myseries) where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT non_negative_derivative(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `select non_negative_derivative() from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 0`}, - {s: `select non_negative_derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 3`}, - {s: `SELECT non_negative_derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_derivative`}, - {s: `SELECT non_negative_derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT non_negative_derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT non_negative_derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT non_negative_derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT non_negative_derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT difference(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT difference() from myseries`, err: `invalid number of arguments for difference, expected 1, got 0`}, - {s: `SELECT difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to difference`}, - {s: `SELECT difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `difference aggregate requires a GROUP BY interval`}, - {s: `SELECT moving_average(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT moving_average() from myseries`, err: `invalid number of arguments for moving_average, expected 2, got 0`}, - {s: `SELECT moving_average(value) FROM myseries`, err: `invalid number of arguments for moving_average, expected 2, got 1`}, - {s: `SELECT moving_average(value, 2) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to moving_average`}, - {s: `SELECT moving_average(top(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT moving_average(bottom(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT moving_average(max(), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT moving_average(percentile(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT moving_average(mean(value), 2) FROM myseries where time < now() and time > now() - 1d`, err: `moving_average aggregate requires a GROUP BY interval`}, - {s: `SELECT cumulative_sum(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT cumulative_sum() from myseries`, err: `invalid number of arguments for cumulative_sum, expected 1, got 0`}, - {s: `SELECT cumulative_sum(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to cumulative_sum`}, - {s: `SELECT cumulative_sum(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT cumulative_sum(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT cumulative_sum(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT cumulative_sum(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT cumulative_sum(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `cumulative_sum aggregate requires a GROUP BY interval`}, - {s: `SELECT holt_winters(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters, expected 3, got 1`}, - {s: `SELECT holt_winters(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters`}, - {s: `SELECT holt_winters(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters aggregate requires a GROUP BY interval`}, - {s: `SELECT holt_winters(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters must be greater than 0, got 0`}, - {s: `SELECT holt_winters(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters`}, - {s: `SELECT holt_winters(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters`}, {s: `SELECT field1 from myseries WHERE host =~ 'asd' LIMIT 1`, err: `found asd, expected regex at line 1, char 42`}, {s: `SELECT value > 2 FROM cpu`, err: `invalid operator > in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, {s: `SELECT value = 2 FROM cpu`, err: `invalid operator = in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, {s: `SELECT s =~ /foo/ FROM cpu`, err: `invalid operator =~ in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, - {s: `SELECT mean(value) + value FROM cpu WHERE time < now() and time > now() - 1h GROUP BY time(10m)`, err: `binary expressions cannot mix aggregates and raw fields`}, - // TODO: Remove this restriction in the future: https://github.com/influxdata/influxdb/issues/5968 - {s: `SELECT mean(cpu_total - cpu_idle) FROM cpu`, err: `expected field argument in mean()`}, - {s: `SELECT derivative(mean(cpu_total - cpu_idle), 1s) FROM cpu WHERE time < now() AND time > now() - 1d GROUP BY time(1h)`, err: `expected field argument in mean()`}, - // TODO: The error message will change when math is allowed inside an aggregate: https://github.com/influxdata/influxdb/pull/5990#issuecomment-195565870 - {s: `SELECT count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, - {s: `SELECT (count(foo + sum(bar))) FROM cpu`, err: `expected field argument in count()`}, - {s: `SELECT sum(value) + count(foo + sum(bar)) FROM cpu`, err: `binary expressions cannot mix aggregates and raw fields`}, {s: `SELECT mean(value) FROM cpu FILL + value`, err: `fill must be a function call`}, - {s: `SELECT sum(mean) FROM (SELECT mean(value) FROM cpu GROUP BY time(1h))`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT top(value, 2), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, - {s: `SELECT bottom(value, 2), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, // See issues https://github.com/influxdata/influxdb/issues/1647 // and https://github.com/influxdata/influxdb/issues/4404 //{s: `DELETE`, err: `found EOF, expected FROM at line 1, char 8`}, @@ -3113,16 +2995,8 @@ func TestParser_ParseStatement(t *testing.T) { } stmt, err := p.ParseStatement() - // We are memoizing a field so for testing we need to... - if s, ok := tt.stmt.(*influxql.SelectStatement); ok { - s.GroupByInterval() - for _, source := range s.Sources { - switch source := source.(type) { - case *influxql.SubQuery: - source.Statement.GroupByInterval() - } - } - } else if st, ok := stmt.(*influxql.CreateContinuousQueryStatement); ok { // if it's a CQ, there is a non-exported field that gets memoized during parsing that needs to be set + // if it's a CQ, there is a non-exported field that gets memoized during parsing that needs to be set + if st, ok := stmt.(*influxql.CreateContinuousQueryStatement); ok { if st != nil && st.Source != nil { tt.stmt.(*influxql.CreateContinuousQueryStatement).Source.GroupByInterval() } diff --git a/query/compile.go b/query/compile.go index b4d83e401d..d1ea88a985 100644 --- a/query/compile.go +++ b/query/compile.go @@ -1,7 +1,9 @@ package query import ( + "errors" "fmt" + "strings" "time" "github.com/influxdata/influxdb/influxql" @@ -19,49 +21,771 @@ type Statement interface { Prepare(shardMapper ShardMapper, opt SelectOptions) (PreparedStatement, error) } -func Compile(stmt *influxql.SelectStatement, opt CompileOptions) (Statement, error) { - // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` - now := opt.Now - if now.IsZero() { - now = time.Now().UTC() - } - - // Evaluate the now() condition immediately so we do not have to deal with this. - nowValuer := influxql.NowValuer{Now: now, Location: stmt.Location} - stmt = stmt.Reduce(&nowValuer) - - // Convert DISTINCT into a call. - stmt.RewriteDistinct() - - // Remove "time" from fields list. - stmt.RewriteTimeFields() - - // Rewrite time condition. - if err := stmt.RewriteTimeCondition(now); err != nil { - return nil, err - } - - // Rewrite any regex conditions that could make use of the index. - stmt.RewriteRegexConditions() - return &compiledStatement{stmt: stmt}, nil -} - // compiledStatement represents a select statement that has undergone some initial processing to // determine if it is valid and to have some initial modifications done on the AST. type compiledStatement struct { + // Condition is the condition used for accessing data. + Condition influxql.Expr + + // TimeRange is the TimeRange for selecting data. + TimeRange influxql.TimeRange + + // Interval holds the time grouping interval. + Interval Interval + + // InheritedInterval marks if the interval was inherited by a parent. + // If this is set, then an interval that was inherited will not cause + // a query that shouldn't have an interval to fail. + InheritedInterval bool + + // FunctionCalls holds a reference to the call expression of every function + // call that has been encountered. + FunctionCalls []*influxql.Call + + // OnlySelectors is set to true when there are no aggregate functions. + OnlySelectors bool + + // HasDistinct is set when the distinct() function is encountered. + HasDistinct bool + + // FillOption contains the fill option for aggregates. + FillOption influxql.FillOption + + // TopBottomFunction is set to top or bottom when one of those functions are + // used in the statement. + TopBottomFunction string + + // HasAuxiliaryFields is true when the function requires auxiliary fields. + HasAuxiliaryFields bool + + // Fields holds all of the fields that will be used. + Fields []*compiledField + + // TimeFieldName stores the name of the time field's column. + // The column names generated by the compiler will not conflict with + // this name. + TimeFieldName string + + // Limit is the number of rows per series this query should be limited to. + Limit int + + // HasTarget is true if this query is being written into a target. + HasTarget bool + + // Options holds the configured compiler options. + Options CompileOptions + stmt *influxql.SelectStatement } -func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) (PreparedStatement, error) { - // Determine the time range spanned by the condition so we can map shards. - nowValuer := influxql.NowValuer{Location: c.stmt.Location} - _, timeRange, err := influxql.ConditionExpr(c.stmt.Condition, &nowValuer) - if err != nil { +func newCompiler(opt CompileOptions) *compiledStatement { + if opt.Now.IsZero() { + opt.Now = time.Now().UTC() + } + return &compiledStatement{ + OnlySelectors: true, + TimeFieldName: "time", + Options: opt, + } +} + +func Compile(stmt *influxql.SelectStatement, opt CompileOptions) (Statement, error) { + c := newCompiler(opt) + if err := c.preprocess(stmt); err != nil { return nil, err } + if err := c.compile(stmt); err != nil { + return nil, err + } + c.stmt = stmt.Clone() + c.stmt.TimeAlias = c.TimeFieldName + c.stmt.Condition = c.Condition + // Convert DISTINCT into a call. + c.stmt.RewriteDistinct() + + // Remove "time" from fields list. + c.stmt.RewriteTimeFields() + + // Rewrite any regex conditions that could make use of the index. + c.stmt.RewriteRegexConditions() + return c, nil +} + +// preprocess retrieves and records the global attributes of the current statement. +func (c *compiledStatement) preprocess(stmt *influxql.SelectStatement) error { + c.Limit = stmt.Limit + c.HasTarget = stmt.Target != nil + + valuer := influxql.NowValuer{Now: c.Options.Now, Location: stmt.Location} + if cond, t, err := influxql.ConditionExpr(stmt.Condition, &valuer); err != nil { + return err + } else { + c.Condition = cond + c.TimeRange = t + } + + // Read the dimensions of the query, validate them, and retrieve the interval + // if it exists. + if err := c.compileDimensions(stmt); err != nil { + return err + } + + // Retrieve the fill option for the statement. + c.FillOption = stmt.Fill + + // Resolve the min and max times now that we know if there is an interval or not. + if c.TimeRange.Min.IsZero() { + c.TimeRange.Min = time.Unix(0, influxql.MinTime).UTC() + } + if c.TimeRange.Max.IsZero() { + // If the interval is non-zero, then we have an aggregate query and + // need to limit the maximum time to now() for backwards compatibility + // and usability. + if !c.Interval.IsZero() { + c.TimeRange.Max = c.Options.Now + } else { + c.TimeRange.Max = time.Unix(0, influxql.MaxTime).UTC() + } + } + return nil +} + +func (c *compiledStatement) compile(stmt *influxql.SelectStatement) error { + if err := c.compileFields(stmt); err != nil { + return err + } + if err := c.validateFields(); err != nil { + return err + } + if err := c.validateDimensions(); err != nil { + return err + } + + // Look through the sources and compile each of the subqueries (if they exist). + // We do this after compiling the outside because subqueries may require + // inherited state. + for _, source := range stmt.Sources { + switch source := source.(type) { + case *influxql.SubQuery: + if err := c.subquery(source.Statement); err != nil { + return err + } + } + } + return nil +} + +func (c *compiledStatement) compileFields(stmt *influxql.SelectStatement) error { + c.Fields = make([]*compiledField, 0, len(stmt.Fields)) + for _, f := range stmt.Fields { + // Remove any time selection (it is automatically selected by default) + // and set the time column name to the alias of the time field if it exists. + // Such as SELECT time, max(value) FROM cpu will be SELECT max(value) FROM cpu + // and SELECT time AS timestamp, max(value) FROM cpu will return "timestamp" + // as the column name for the time. + if ref, ok := f.Expr.(*influxql.VarRef); ok && ref.Val == "time" { + if f.Alias != "" { + c.TimeFieldName = f.Alias + } + continue + } + + // Append this field to the list of processed fields and compile it. + field := &compiledField{ + global: c, + Field: f, + AllowWildcard: true, + } + c.Fields = append(c.Fields, field) + if err := field.compileExpr(f.Expr); err != nil { + return err + } + } + return nil +} + +type compiledField struct { + // This holds the global state from the compiled statement. + global *compiledStatement + + // Field is the top level field that is being compiled. + Field *influxql.Field + + // AllowWildcard is set to true if a wildcard or regular expression is allowed. + AllowWildcard bool +} + +// compileExpr creates the node that executes the expression and connects that +// node to the WriteEdge as the output. +func (c *compiledField) compileExpr(expr influxql.Expr) error { + switch expr := expr.(type) { + case *influxql.VarRef: + // A bare variable reference will require auxiliary fields. + c.global.HasAuxiliaryFields = true + return nil + case *influxql.Wildcard: + // Wildcards use auxiliary fields. We assume there will be at least one + // expansion. + c.global.HasAuxiliaryFields = true + if !c.AllowWildcard { + return errors.New("unable to use wildcard in a binary expression") + } + return nil + case *influxql.RegexLiteral: + if !c.AllowWildcard { + return errors.New("unable to use regex in a binary expression") + } + c.global.HasAuxiliaryFields = true + return nil + case *influxql.Call: + // Register the function call in the list of function calls. + c.global.FunctionCalls = append(c.global.FunctionCalls, expr) + + switch expr.Name { + case "percentile": + return c.compilePercentile(expr.Args) + case "sample": + return c.compileSample(expr.Args) + case "distinct": + return c.compileDistinct(expr.Args) + case "top", "bottom": + return c.compileTopBottom(expr) + case "derivative", "non_negative_derivative": + isNonNegative := expr.Name == "non_negative_derivative" + return c.compileDerivative(expr.Args, isNonNegative) + case "difference", "non_negative_difference": + isNonNegative := expr.Name == "non_negative_difference" + return c.compileDifference(expr.Args, isNonNegative) + case "cumulative_sum": + return c.compileCumulativeSum(expr.Args) + case "moving_average": + return c.compileMovingAverage(expr.Args) + case "elapsed": + return c.compileElapsed(expr.Args) + case "integral": + return c.compileIntegral(expr.Args) + case "holt_winters", "holt_winters_with_fit": + withFit := expr.Name == "holt_winters_with_fit" + return c.compileHoltWinters(expr.Args, withFit) + default: + return c.compileFunction(expr) + } + case *influxql.Distinct: + call := expr.NewCall() + c.global.FunctionCalls = append(c.global.FunctionCalls, call) + return c.compileDistinct(call.Args) + case *influxql.BinaryExpr: + // Disallow wildcards in binary expressions. RewriteFields, which expands + // wildcards, is too complicated if we allow wildcards inside of expressions. + c.AllowWildcard = false + + // Check if either side is a literal so we only compile one side if it is. + if _, ok := expr.LHS.(influxql.Literal); ok { + if _, ok := expr.RHS.(influxql.Literal); ok { + return errors.New("cannot perform a binary expression on two literals") + } + return c.compileExpr(expr.RHS) + } else if _, ok := expr.RHS.(influxql.Literal); ok { + return c.compileExpr(expr.LHS) + } else { + // Validate both sides of the expression. + if err := c.compileExpr(expr.LHS); err != nil { + return err + } + if err := c.compileExpr(expr.RHS); err != nil { + return err + } + return nil + } + case *influxql.ParenExpr: + return c.compileExpr(expr.Expr) + } + return errors.New("unimplemented") +} + +func (c *compiledField) compileSymbol(name string, field influxql.Expr) error { + // Must be a variable reference, wildcard, or regexp. + switch field.(type) { + case *influxql.VarRef: + return nil + case *influxql.Wildcard: + if !c.AllowWildcard { + return fmt.Errorf("unsupported expression with wildcard: %s()", name) + } + c.global.OnlySelectors = false + return nil + case *influxql.RegexLiteral: + if !c.AllowWildcard { + return fmt.Errorf("unsupported expression with regex field: %s()", name) + } + c.global.OnlySelectors = false + return nil + default: + return fmt.Errorf("expected field argument in %s()", name) + } +} + +func (c *compiledField) compileFunction(expr *influxql.Call) error { + // Validate the function call and mark down some meta properties + // related to the function for query validation. + switch expr.Name { + case "max", "min", "first", "last": + // top/bottom are not included here since they are not typical functions. + case "count", "sum", "mean", "median", "mode", "stddev", "spread": + // These functions are not considered selectors. + c.global.OnlySelectors = false + default: + return fmt.Errorf("undefined function %s()", expr.Name) + } + + if exp, got := 1, len(expr.Args); exp != got { + return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) + } + + // If this is a call to count(), allow distinct() to be used as the function argument. + if expr.Name == "count" { + // If we have count(), the argument may be a distinct() call. + if arg0, ok := expr.Args[0].(*influxql.Call); ok && arg0.Name == "distinct" { + return c.compileDistinct(arg0.Args) + } else if arg0, ok := expr.Args[0].(*influxql.Distinct); ok { + call := arg0.NewCall() + return c.compileDistinct(call.Args) + } + } + return c.compileSymbol(expr.Name, expr.Args[0]) +} + +func (c *compiledField) compilePercentile(args []influxql.Expr) error { + if exp, got := 2, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for percentile, expected %d, got %d", exp, got) + } + + switch args[1].(type) { + case *influxql.IntegerLiteral: + case *influxql.NumberLiteral: + default: + return fmt.Errorf("expected float argument in percentile()") + } + return c.compileSymbol("percentile", args[0]) +} + +func (c *compiledField) compileSample(args []influxql.Expr) error { + if exp, got := 2, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for sample, expected %d, got %d", exp, got) + } + + switch arg1 := args[1].(type) { + case *influxql.IntegerLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("sample window must be greater than 1, got %d", arg1.Val) + } + default: + return fmt.Errorf("expected integer argument in sample()") + } + return c.compileSymbol("sample", args[0]) +} + +func (c *compiledField) compileDerivative(args []influxql.Expr, isNonNegative bool) error { + name := "derivative" + if isNonNegative { + name = "non_negative_derivative" + } + + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", name, min, max, got) + } + + // Retrieve the duration from the derivative() call, if specified. + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return fmt.Errorf("second argument to %s must be a duration, got %T", name, args[1]) + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to %s", name) + } + return c.compileSymbol(name, arg0) + } +} + +func (c *compiledField) compileElapsed(args []influxql.Expr) error { + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for elapsed, expected at least %d but no more than %d, got %d", min, max, got) + } + + // Retrieve the duration from the elapsed() call, if specified. + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return fmt.Errorf("second argument to elapsed must be a duration, got %T", args[1]) + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("elapsed aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to elapsed") + } + return c.compileSymbol("elapsed", arg0) + } +} + +func (c *compiledField) compileDifference(args []influxql.Expr, isNonNegative bool) error { + name := "difference" + if isNonNegative { + name = "non_negative_difference" + } + + if got := len(args); got != 1 { + return fmt.Errorf("invalid number of arguments for %s, expected 1, got %d", name, got) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to %s", name) + } + return c.compileSymbol(name, arg0) + } +} + +func (c *compiledField) compileCumulativeSum(args []influxql.Expr) error { + if got := len(args); got != 1 { + return fmt.Errorf("invalid number of arguments for cumulative_sum, expected 1, got %d", got) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("cumulative_sum aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to cumulative_sum") + } + return c.compileSymbol("cumulative_sum", arg0) + } +} + +func (c *compiledField) compileMovingAverage(args []influxql.Expr) error { + if got := len(args); got != 2 { + return fmt.Errorf("invalid number of arguments for moving_average, expected 2, got %d", got) + } + + switch arg1 := args[1].(type) { + case *influxql.IntegerLiteral: + if arg1.Val <= 1 { + return fmt.Errorf("moving_average window must be greater than 1, got %d", arg1.Val) + } + default: + return fmt.Errorf("second argument for moving_average must be an integer, got %T", args[1]) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("moving_average aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to moving_average") + } + return c.compileSymbol("moving_average", arg0) + } +} + +func (c *compiledField) compileIntegral(args []influxql.Expr) error { + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for integral, expected at least %d but no more than %d, got %d", min, max, got) + } + + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return errors.New("second argument must be a duration") + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, wildcard, or regexp. + return c.compileSymbol("integral", args[0]) +} + +func (c *compiledField) compileHoltWinters(args []influxql.Expr, withFit bool) error { + name := "holt_winters" + if withFit { + name = "holt_winters_with_fit" + } + + if exp, got := 3, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", name, exp, got) + } + + n, ok := args[1].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer argument as second arg in %s", name) + } else if n.Val <= 0 { + return fmt.Errorf("second arg to %s must be greater than 0, got %d", name, n.Val) + } + + s, ok := args[2].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer argument as third arg in %s", name) + } else if s.Val < 0 { + return fmt.Errorf("third arg to %s cannot be negative, got %d", name, s.Val) + } + c.global.OnlySelectors = false + + call, ok := args[0].(*influxql.Call) + if !ok { + return fmt.Errorf("must use aggregate function with %s", name) + } else if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(call) +} + +func (c *compiledField) compileDistinct(args []influxql.Expr) error { + if len(args) == 0 { + return errors.New("distinct function requires at least one argument") + } else if len(args) != 1 { + return errors.New("distinct function can only have one argument") + } + + if _, ok := args[0].(*influxql.VarRef); !ok { + return errors.New("expected field argument in distinct()") + } + c.global.HasDistinct = true + c.global.OnlySelectors = false + return nil +} + +func (c *compiledField) compileTopBottom(call *influxql.Call) error { + if c.global.TopBottomFunction != "" { + return fmt.Errorf("selector function %s() cannot be combined with other functions", c.global.TopBottomFunction) + } + + if exp, got := 2, len(call.Args); got < exp { + return fmt.Errorf("invalid number of arguments for %s, expected at least %d, got %d", call.Name, exp, got) + } + + limit, ok := call.Args[len(call.Args)-1].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer as last argument in %s(), found %s", call.Name, call.Args[len(call.Args)-1]) + } else if limit.Val <= 0 { + return fmt.Errorf("limit (%d) in %s function must be at least 1", limit.Val, call.Name) + } else if c.global.Limit > 0 && int(limit.Val) > c.global.Limit { + return fmt.Errorf("limit (%d) in %s function can not be larger than the LIMIT (%d) in the select statement", limit.Val, call.Name, c.global.Limit) + } + + if _, ok := call.Args[0].(*influxql.VarRef); !ok { + return fmt.Errorf("expected first argument to be a field in %s(), found %s", call.Name, call.Args[0]) + } + + if len(call.Args) > 2 { + for _, v := range call.Args[1 : len(call.Args)-1] { + ref, ok := v.(*influxql.VarRef) + if !ok { + return fmt.Errorf("only fields or tags are allowed in %s(), found %s", call.Name, v) + } + + // Add a field for each of the listed dimensions when not writing the results. + if !c.global.HasTarget { + field := &compiledField{ + global: c.global, + Field: &influxql.Field{Expr: ref}, + } + c.global.Fields = append(c.global.Fields, field) + if err := field.compileExpr(ref); err != nil { + return err + } + } + } + } + c.global.TopBottomFunction = call.Name + return nil +} + +func (c *compiledStatement) compileDimensions(stmt *influxql.SelectStatement) error { + for _, d := range stmt.Dimensions { + switch expr := d.Expr.(type) { + case *influxql.VarRef: + if strings.ToLower(expr.Val) == "time" { + return errors.New("time() is a function and expects at least one argument") + } + case *influxql.Call: + // Ensure the call is time() and it has one or two duration arguments. + // If we already have a duration + if expr.Name != "time" { + return errors.New("only time() calls allowed in dimensions") + } else if got := len(expr.Args); got < 1 || got > 2 { + return errors.New("time dimension expected 1 or 2 arguments") + } else if lit, ok := expr.Args[0].(*influxql.DurationLiteral); !ok { + return errors.New("time dimension must have duration argument") + } else if c.Interval.Duration != 0 { + return errors.New("multiple time dimensions not allowed") + } else { + c.Interval.Duration = lit.Val + if len(expr.Args) == 2 { + switch lit := expr.Args[1].(type) { + case *influxql.DurationLiteral: + c.Interval.Offset = lit.Val % c.Interval.Duration + case *influxql.TimeLiteral: + c.Interval.Offset = lit.Val.Sub(lit.Val.Truncate(c.Interval.Duration)) + case *influxql.Call: + if lit.Name != "now" { + return errors.New("time dimension offset function must be now()") + } else if len(lit.Args) != 0 { + return errors.New("time dimension offset now() function requires no arguments") + } + now := c.Options.Now + c.Interval.Offset = now.Sub(now.Truncate(c.Interval.Duration)) + case *influxql.StringLiteral: + // If literal looks like a date time then parse it as a time literal. + if lit.IsTimeLiteral() { + t, err := lit.ToTimeLiteral(stmt.Location) + if err != nil { + return err + } + c.Interval.Offset = t.Val.Sub(t.Val.Truncate(c.Interval.Duration)) + } else { + return errors.New("time dimension offset must be duration or now()") + } + default: + return errors.New("time dimension offset must be duration or now()") + } + } + } + case *influxql.Wildcard: + case *influxql.RegexLiteral: + default: + return errors.New("only time and tag dimensions allowed") + } + } + return nil +} + +// validateFields validates that the fields are mutually compatible with each other. +// This runs at the end of compilation but before linking. +func (c *compiledStatement) validateFields() error { + // Validate that at least one field has been selected. + if len(c.Fields) == 0 { + return errors.New("at least 1 non-time field must be queried") + } + // Ensure there are not multiple calls if top/bottom is present. + if len(c.FunctionCalls) > 1 && c.TopBottomFunction != "" { + return fmt.Errorf("selector function %s() cannot be combined with other functions", c.TopBottomFunction) + } else if len(c.FunctionCalls) == 0 { + switch c.FillOption { + case influxql.NoFill: + return errors.New("fill(none) must be used with a function") + case influxql.LinearFill: + return errors.New("fill(linear) must be used with a function") + } + if !c.Interval.IsZero() && !c.InheritedInterval { + return errors.New("GROUP BY requires at least one aggregate function") + } + } + // If a distinct() call is present, ensure there is exactly one function. + if c.HasDistinct && (len(c.FunctionCalls) != 1 || c.HasAuxiliaryFields) { + return errors.New("aggregate function distinct() cannot be combined with other functions or fields") + } + // Validate we are using a selector or raw query if auxiliary fields are required. + if c.HasAuxiliaryFields { + if !c.OnlySelectors { + return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported") + } else if len(c.FunctionCalls) > 1 { + return fmt.Errorf("mixing multiple selector functions with tags or fields is not supported") + } + } + return nil +} + +// validateDimensions validates that the dimensions are appropriate for this type of query. +func (c *compiledStatement) validateDimensions() error { + if !c.Interval.IsZero() && !c.InheritedInterval { + // There must be a lower limit that wasn't implicitly set. + if c.TimeRange.Min.UnixNano() == influxql.MinTime { + return errors.New("aggregate functions with GROUP BY time require a WHERE time clause with a lower limit") + } + } + return nil +} + +// subquery compiles and validates a compiled statement for the subquery using +// this compiledStatement as the parent. +func (c *compiledStatement) subquery(stmt *influxql.SelectStatement) error { + subquery := newCompiler(c.Options) + if err := subquery.preprocess(stmt); err != nil { + return err + } + + // Find the intersection between this time range and the parent. + // If the subquery doesn't have a time range, this causes it to + // inherit the parent's time range. + subquery.TimeRange = subquery.TimeRange.Intersect(c.TimeRange) + + // If the fill option is null, set it to none so we don't waste time on + // null values with a redundant fill iterator. + if !subquery.Interval.IsZero() && subquery.FillOption == influxql.NullFill { + subquery.FillOption = influxql.NoFill + } + + // Inherit the grouping interval if the subquery has none. + if !c.Interval.IsZero() && subquery.Interval.IsZero() { + subquery.Interval = c.Interval + subquery.InheritedInterval = true + } + return subquery.compile(stmt) +} + +func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) (PreparedStatement, error) { // Create an iterator creator based on the shards in the cluster. - shards, err := shardMapper.MapShards(c.stmt.Sources, timeRange, sopt) + shards, err := shardMapper.MapShards(c.stmt.Sources, c.TimeRange, sopt) if err != nil { return nil, err } @@ -78,6 +802,7 @@ func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) if err != nil { return nil, err } + opt.StartTime, opt.EndTime = c.TimeRange.MinTime(), c.TimeRange.MaxTime() if sopt.MaxBucketsN > 0 && !stmt.IsRawQuery { interval, err := stmt.GroupByInterval() diff --git a/query/compile_test.go b/query/compile_test.go new file mode 100644 index 0000000000..a7a4a756ff --- /dev/null +++ b/query/compile_test.go @@ -0,0 +1,338 @@ +package query_test + +import ( + "testing" + + "github.com/influxdata/influxdb/influxql" + "github.com/influxdata/influxdb/query" +) + +func TestCompile_Success(t *testing.T) { + for _, tt := range []string{ + `SELECT time, value FROM cpu`, + `SELECT value FROM cpu`, + `SELECT value, host FROM cpu`, + `SELECT * FROM cpu`, + `SELECT time, * FROM cpu`, + `SELECT value, * FROM cpu`, + `SELECT max(value) FROM cpu`, + `SELECT max(value), host FROM cpu`, + `SELECT max(value), * FROM cpu`, + `SELECT max(*) FROM cpu`, + `SELECT max(/val/) FROM cpu`, + `SELECT min(value) FROM cpu`, + `SELECT min(value), host FROM cpu`, + `SELECT min(value), * FROM cpu`, + `SELECT min(*) FROM cpu`, + `SELECT min(/val/) FROM cpu`, + `SELECT first(value) FROM cpu`, + `SELECT first(value), host FROM cpu`, + `SELECT first(value), * FROM cpu`, + `SELECT first(*) FROM cpu`, + `SELECT first(/val/) FROM cpu`, + `SELECT last(value) FROM cpu`, + `SELECT last(value), host FROM cpu`, + `SELECT last(value), * FROM cpu`, + `SELECT last(*) FROM cpu`, + `SELECT last(/val/) FROM cpu`, + `SELECT count(value) FROM cpu`, + `SELECT count(distinct(value)) FROM cpu`, + `SELECT count(distinct value) FROM cpu`, + `SELECT count(*) FROM cpu`, + `SELECT count(/val/) FROM cpu`, + `SELECT mean(value) FROM cpu`, + `SELECT mean(*) FROM cpu`, + `SELECT mean(/val/) FROM cpu`, + `SELECT min(value), max(value) FROM cpu`, + `SELECT min(*), max(*) FROM cpu`, + `SELECT min(/val/), max(/val/) FROM cpu`, + `SELECT first(value), last(value) FROM cpu`, + `SELECT first(*), last(*) FROM cpu`, + `SELECT first(/val/), last(/val/) FROM cpu`, + `SELECT count(value) FROM cpu WHERE time >= now() - 1h GROUP BY time(10m)`, + `SELECT distinct value FROM cpu`, + `SELECT distinct(value) FROM cpu`, + `SELECT value / total FROM cpu`, + `SELECT min(value) / total FROM cpu`, + `SELECT max(value) / total FROM cpu`, + `SELECT top(value, 1) FROM cpu`, + `SELECT top(value, host, 1) FROM cpu`, + `SELECT top(value, 1), host FROM cpu`, + `SELECT min(top) FROM (SELECT top(value, host, 1) FROM cpu) GROUP BY region`, + `SELECT bottom(value, 1) FROM cpu`, + `SELECT bottom(value, host, 1) FROM cpu`, + `SELECT bottom(value, 1), host FROM cpu`, + `SELECT max(bottom) FROM (SELECT bottom(value, host, 1) FROM cpu) GROUP BY region`, + `SELECT percentile(value, 75) FROM cpu`, + `SELECT percentile(value, 75.0) FROM cpu`, + `SELECT sample(value, 2) FROM cpu`, + `SELECT sample(*, 2) FROM cpu`, + `SELECT sample(/val/, 2) FROM cpu`, + `SELECT elapsed(value) FROM cpu`, + `SELECT elapsed(value, 10s) FROM cpu`, + `SELECT integral(value) FROM cpu`, + `SELECT integral(value, 10s) FROM cpu`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, 5s)`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, '2000-01-01T00:00:05Z')`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, now())`, + `SELECT max(mean) FROM (SELECT mean(value) FROM cpu GROUP BY host)`, + `SELECT max(derivative) FROM (SELECT derivative(mean(value)) FROM cpu) WHERE time >= now() - 1m GROUP BY time(10s)`, + `SELECT max(value) FROM (SELECT value + total FROM cpu) WHERE time >= now() - 1m GROUP BY time(10s)`, + `SELECT value FROM cpu WHERE time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T01:00:00Z'`, + } { + t.Run(tt, func(t *testing.T) { + stmt, err := influxql.ParseStatement(tt) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + s := stmt.(*influxql.SelectStatement) + + opt := query.CompileOptions{} + if _, err := query.Compile(s, opt); err != nil { + t.Errorf("unexpected error: %s", err) + } + }) + } +} + +func TestCompile_Failures(t *testing.T) { + for _, tt := range []struct { + s string + err string + }{ + {s: `SELECT time FROM cpu`, err: `at least 1 non-time field must be queried`}, + {s: `SELECT value, mean(value) FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT value, max(value), min(value) FROM cpu`, err: `mixing multiple selector functions with tags or fields is not supported`}, + {s: `SELECT top(value, 10), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 10), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT count() FROM cpu`, err: `invalid number of arguments for count, expected 1, got 0`}, + {s: `SELECT count(value, host) FROM cpu`, err: `invalid number of arguments for count, expected 1, got 2`}, + {s: `SELECT min() FROM cpu`, err: `invalid number of arguments for min, expected 1, got 0`}, + {s: `SELECT min(value, host) FROM cpu`, err: `invalid number of arguments for min, expected 1, got 2`}, + {s: `SELECT max() FROM cpu`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT max(value, host) FROM cpu`, err: `invalid number of arguments for max, expected 1, got 2`}, + {s: `SELECT sum() FROM cpu`, err: `invalid number of arguments for sum, expected 1, got 0`}, + {s: `SELECT sum(value, host) FROM cpu`, err: `invalid number of arguments for sum, expected 1, got 2`}, + {s: `SELECT first() FROM cpu`, err: `invalid number of arguments for first, expected 1, got 0`}, + {s: `SELECT first(value, host) FROM cpu`, err: `invalid number of arguments for first, expected 1, got 2`}, + {s: `SELECT last() FROM cpu`, err: `invalid number of arguments for last, expected 1, got 0`}, + {s: `SELECT last(value, host) FROM cpu`, err: `invalid number of arguments for last, expected 1, got 2`}, + {s: `SELECT mean() FROM cpu`, err: `invalid number of arguments for mean, expected 1, got 0`}, + {s: `SELECT mean(value, host) FROM cpu`, err: `invalid number of arguments for mean, expected 1, got 2`}, + {s: `SELECT distinct(value), max(value) FROM cpu`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct(value)), max(value) FROM cpu`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct()) FROM cpu`, err: `distinct function requires at least one argument`}, + {s: `SELECT count(distinct(value, host)) FROM cpu`, err: `distinct function can only have one argument`}, + {s: `SELECT count(distinct(2)) FROM cpu`, err: `expected field argument in distinct()`}, + {s: `SELECT value FROM cpu GROUP BY now()`, err: `only time() calls allowed in dimensions`}, + {s: `SELECT value FROM cpu GROUP BY time()`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, 30s, 1ms)`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT value FROM cpu GROUP BY time('unexpected')`, err: `time dimension must have duration argument`}, + {s: `SELECT value FROM cpu GROUP BY time(5m), time(1m)`, err: `multiple time dimensions not allowed`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, unexpected())`, err: `time dimension offset function must be now()`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, now(1m))`, err: `time dimension offset now() function requires no arguments`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, 'unexpected')`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT value FROM cpu GROUP BY 'unexpected'`, err: `only time and tag dimensions allowed`}, + {s: `SELECT top(value) FROM cpu`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT top('unexpected', 5) FROM cpu`, err: `expected first argument to be a field in top(), found 'unexpected'`}, + {s: `SELECT top(value, 'unexpected', 5) FROM cpu`, err: `only fields or tags are allowed in top(), found 'unexpected'`}, + {s: `SELECT top(value, 2.5) FROM cpu`, err: `expected integer as last argument in top(), found 2.500`}, + {s: `SELECT top(value, -1) FROM cpu`, err: `limit (-1) in top function must be at least 1`}, + {s: `SELECT top(value, 3) FROM cpu LIMIT 2`, err: `limit (3) in top function can not be larger than the LIMIT (2) in the select statement`}, + {s: `SELECT bottom(value) FROM cpu`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT bottom('unexpected', 5) FROM cpu`, err: `expected first argument to be a field in bottom(), found 'unexpected'`}, + {s: `SELECT bottom(value, 'unexpected', 5) FROM cpu`, err: `only fields or tags are allowed in bottom(), found 'unexpected'`}, + {s: `SELECT bottom(value, 2.5) FROM cpu`, err: `expected integer as last argument in bottom(), found 2.500`}, + {s: `SELECT bottom(value, -1) FROM cpu`, err: `limit (-1) in bottom function must be at least 1`}, + {s: `SELECT bottom(value, 3) FROM cpu LIMIT 2`, err: `limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement`}, + {s: `SELECT value FROM cpu WHERE time >= now() - 10m OR time < now() - 5m`, err: `cannot use OR with time conditions`}, + {s: `SELECT value FROM cpu WHERE value`, err: `invalid condition expression: value`}, + {s: `SELECT count(value), * FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT max(*), host FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT count(value), /ho/ FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT max(/val/), * FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT a(value) FROM cpu`, err: `undefined function a()`}, + {s: `SELECT count(max(value)) FROM myseries`, err: `expected field argument in count()`}, + {s: `SELECT count(distinct('value')) FROM myseries`, err: `expected field argument in distinct()`}, + {s: `SELECT distinct('value') FROM myseries`, err: `expected field argument in distinct()`}, + {s: `SELECT min(max(value)) FROM myseries`, err: `expected field argument in min()`}, + {s: `SELECT min(distinct(value)) FROM myseries`, err: `expected field argument in min()`}, + {s: `SELECT max(max(value)) FROM myseries`, err: `expected field argument in max()`}, + {s: `SELECT sum(max(value)) FROM myseries`, err: `expected field argument in sum()`}, + {s: `SELECT first(max(value)) FROM myseries`, err: `expected field argument in first()`}, + {s: `SELECT last(max(value)) FROM myseries`, err: `expected field argument in last()`}, + {s: `SELECT mean(max(value)) FROM myseries`, err: `expected field argument in mean()`}, + {s: `SELECT median(max(value)) FROM myseries`, err: `expected field argument in median()`}, + {s: `SELECT mode(max(value)) FROM myseries`, err: `expected field argument in mode()`}, + {s: `SELECT stddev(max(value)) FROM myseries`, err: `expected field argument in stddev()`}, + {s: `SELECT spread(max(value)) FROM myseries`, err: `expected field argument in spread()`}, + {s: `SELECT top() FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 0`}, + {s: `SELECT top(field1) FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT top(field1,foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, + {s: `SELECT top(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, + {s: `SELECT top(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found 5`}, + {s: `SELECT top(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found max(foo)`}, + {s: `SELECT top(value, 10) + count(value) FROM myseries`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT top(max(value), 10) FROM myseries`, err: `expected first argument to be a field in top(), found max(value)`}, + {s: `SELECT bottom() FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 0`}, + {s: `SELECT bottom(field1) FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT bottom(field1,foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, + {s: `SELECT bottom(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, + {s: `SELECT bottom(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found 5`}, + {s: `SELECT bottom(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(foo)`}, + {s: `SELECT bottom(value, 10) + count(value) FROM myseries`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT bottom(max(value), 10) FROM myseries`, err: `expected first argument to be a field in bottom(), found max(value)`}, + {s: `SELECT top(value, 10), bottom(value, 10) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 10), top(value, 10) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT sample(value) FROM myseries`, err: `invalid number of arguments for sample, expected 2, got 1`}, + {s: `SELECT sample(value, 2, 3) FROM myseries`, err: `invalid number of arguments for sample, expected 2, got 3`}, + {s: `SELECT sample(value, 0) FROM myseries`, err: `sample window must be greater than 1, got 0`}, + {s: `SELECT sample(value, 2.5) FROM myseries`, err: `expected integer argument in sample()`}, + {s: `SELECT percentile() FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 0`}, + {s: `SELECT percentile(field1) FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT percentile(field1, foo) FROM myseries`, err: `expected float argument in percentile()`}, + {s: `SELECT percentile(max(field1), 75) FROM myseries`, err: `expected field argument in percentile()`}, + {s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`}, + {s: `SELECT field1 FROM foo fill(none)`, err: `fill(none) must be used with a function`}, + {s: `SELECT field1 FROM foo fill(linear)`, err: `fill(linear) must be used with a function`}, + {s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(500ms)`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(1s) where time < now()`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time`, err: `time() is a function and expects at least one argument`}, + {s: `SELECT count(value) FROM foo group by 'time'`, err: `only time and tag dimensions allowed`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time()`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(b)`, err: `time dimension must have duration argument`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s), time(2s)`, err: `multiple time dimensions not allowed`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, b)`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, '5s')`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`}, + {s: `SELECT distinct() FROM myseries`, err: `distinct function requires at least one argument`}, + {s: `SELECT distinct field1, field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct field1, field2) FROM myseries`, err: `invalid number of arguments for count, expected 1, got 2`}, + {s: `select count(distinct(too, many, arguments)) from myseries`, err: `distinct function can only have one argument`}, + {s: `select count() from myseries`, err: `invalid number of arguments for count, expected 1, got 0`}, + {s: `SELECT derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`}, + {s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to derivative`}, + {s: `SELECT derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT derivative(value, -2h) FROM myseries`, err: `duration argument must be positive, got -2h`}, + {s: `SELECT derivative(value, 10) FROM myseries`, err: `second argument to derivative must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT derivative(f, true) FROM myseries`, err: `second argument to derivative must be a duration, got *influxql.BooleanLiteral`}, + {s: `SELECT non_negative_derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `select non_negative_derivative() from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 0`}, + {s: `select non_negative_derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT non_negative_derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_derivative`}, + {s: `SELECT non_negative_derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT non_negative_derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT non_negative_derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT non_negative_derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT non_negative_derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT non_negative_derivative(value, -2h) FROM myseries`, err: `duration argument must be positive, got -2h`}, + {s: `SELECT non_negative_derivative(value, 10) FROM myseries`, err: `second argument to non_negative_derivative must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT difference(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT difference() from myseries`, err: `invalid number of arguments for difference, expected 1, got 0`}, + {s: `SELECT difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to difference`}, + {s: `SELECT difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `difference aggregate requires a GROUP BY interval`}, + {s: `SELECT non_negative_difference(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT non_negative_difference() from myseries`, err: `invalid number of arguments for non_negative_difference, expected 1, got 0`}, + {s: `SELECT non_negative_difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_difference`}, + {s: `SELECT non_negative_difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT non_negative_difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT non_negative_difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT non_negative_difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT non_negative_difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_difference aggregate requires a GROUP BY interval`}, + {s: `SELECT elapsed() FROM myseries`, err: `invalid number of arguments for elapsed, expected at least 1 but no more than 2, got 0`}, + {s: `SELECT elapsed(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to elapsed`}, + {s: `SELECT elapsed(value, 1s, host) FROM myseries`, err: `invalid number of arguments for elapsed, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT elapsed(value, 0s) FROM myseries`, err: `duration argument must be positive, got 0s`}, + {s: `SELECT elapsed(value, -10s) FROM myseries`, err: `duration argument must be positive, got -10s`}, + {s: `SELECT elapsed(value, 10) FROM myseries`, err: `second argument to elapsed must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT elapsed(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT elapsed(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT elapsed(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT elapsed(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT elapsed(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `elapsed aggregate requires a GROUP BY interval`}, + {s: `SELECT moving_average(field1, 2), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT moving_average(field1, 1), field1 FROM myseries`, err: `moving_average window must be greater than 1, got 1`}, + {s: `SELECT moving_average(field1, 0), field1 FROM myseries`, err: `moving_average window must be greater than 1, got 0`}, + {s: `SELECT moving_average(field1, -1), field1 FROM myseries`, err: `moving_average window must be greater than 1, got -1`}, + {s: `SELECT moving_average(field1, 2.0), field1 FROM myseries`, err: `second argument for moving_average must be an integer, got *influxql.NumberLiteral`}, + {s: `SELECT moving_average() from myseries`, err: `invalid number of arguments for moving_average, expected 2, got 0`}, + {s: `SELECT moving_average(value) FROM myseries`, err: `invalid number of arguments for moving_average, expected 2, got 1`}, + {s: `SELECT moving_average(value, 2) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to moving_average`}, + {s: `SELECT moving_average(top(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT moving_average(bottom(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT moving_average(max(), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT moving_average(percentile(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT moving_average(mean(value), 2) FROM myseries where time < now() and time > now() - 1d`, err: `moving_average aggregate requires a GROUP BY interval`}, + {s: `SELECT cumulative_sum(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT cumulative_sum() from myseries`, err: `invalid number of arguments for cumulative_sum, expected 1, got 0`}, + {s: `SELECT cumulative_sum(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to cumulative_sum`}, + {s: `SELECT cumulative_sum(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT cumulative_sum(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT cumulative_sum(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT cumulative_sum(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT cumulative_sum(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `cumulative_sum aggregate requires a GROUP BY interval`}, + {s: `SELECT integral() FROM myseries`, err: `invalid number of arguments for integral, expected at least 1 but no more than 2, got 0`}, + {s: `SELECT integral(value, 10s, host) FROM myseries`, err: `invalid number of arguments for integral, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT integral(value, -10s) FROM myseries`, err: `duration argument must be positive, got -10s`}, + {s: `SELECT integral(value, 10) FROM myseries`, err: `second argument must be a duration`}, + {s: `SELECT holt_winters(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters, expected 3, got 1`}, + {s: `SELECT holt_winters(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters aggregate requires a GROUP BY interval`}, + {s: `SELECT holt_winters(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters must be greater than 0, got 0`}, + {s: `SELECT holt_winters(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, -1) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `third arg to holt_winters cannot be negative, got -1`}, + {s: `SELECT holt_winters_with_fit(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters_with_fit, expected 3, got 1`}, + {s: `SELECT holt_winters_with_fit(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters_with_fit aggregate requires a GROUP BY interval`}, + {s: `SELECT holt_winters_with_fit(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters_with_fit must be greater than 0, got 0`}, + {s: `SELECT holt_winters_with_fit(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, -1) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `third arg to holt_winters_with_fit cannot be negative, got -1`}, + {s: `SELECT mean(value) + value FROM cpu WHERE time < now() and time > now() - 1h GROUP BY time(10m)`, err: `mixing aggregate and non-aggregate queries is not supported`}, + // TODO: Remove this restriction in the future: https://github.com/influxdata/influxdb/issues/5968 + {s: `SELECT mean(cpu_total - cpu_idle) FROM cpu`, err: `expected field argument in mean()`}, + {s: `SELECT derivative(mean(cpu_total - cpu_idle), 1s) FROM cpu WHERE time < now() AND time > now() - 1d GROUP BY time(1h)`, err: `expected field argument in mean()`}, + // TODO: The error message will change when math is allowed inside an aggregate: https://github.com/influxdata/influxdb/pull/5990#issuecomment-195565870 + {s: `SELECT count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT (count(foo + sum(bar))) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT sum(value) + count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT sum(mean) FROM (SELECT mean(value) FROM cpu GROUP BY time(1h))`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT top(value, 2), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 2), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT min(derivative) FROM (SELECT derivative(mean(value), 1h) FROM myseries) where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT min(mean) FROM (SELECT mean(value) FROM myseries GROUP BY time)`, err: `time() is a function and expects at least one argument`}, + {s: `SELECT value FROM myseries WHERE value OR time >= now() - 1m`, err: `invalid condition expression: value`}, + {s: `SELECT value FROM myseries WHERE time >= now() - 1m OR value`, err: `invalid condition expression: value`}, + } { + t.Run(tt.s, func(t *testing.T) { + stmt, err := influxql.ParseStatement(tt.s) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + s := stmt.(*influxql.SelectStatement) + + opt := query.CompileOptions{} + if _, err := query.Compile(s, opt); err == nil { + t.Error("expected error") + } else if have, want := err.Error(), tt.err; have != want { + t.Errorf("unexpected error: %s != %s", have, want) + } + }) + } +} diff --git a/query/select_test.go b/query/select_test.go index fdddeef088..da08f800ed 100644 --- a/query/select_test.go +++ b/query/select_test.go @@ -2766,49 +2766,6 @@ func TestSelect_BinaryExpr_NilValues(t *testing.T) { } } -func TestSelect_InvalidQueries(t *testing.T) { - shardMapper := ShardMapper{ - MapShardsFn: func(sources influxql.Sources, _ influxql.TimeRange) query.ShardGroup { - return &ShardGroup{ - CreateIteratorFn: func(m *influxql.Measurement, opt query.IteratorOptions) (query.Iterator, error) { - return &FloatIterator{}, nil - }, - } - }, - } - - tests := []struct { - name string - q string - err string - }{ - { - name: "UnsupportedCall", - q: `SELECT foobar(value) FROM cpu`, - err: `unsupported call: foobar`, - }, - { - name: "InvalidStringExpression", - q: `SELECT 'value' FROM cpu`, - err: `invalid expression type: *influxql.StringLiteral`, - }, - { - name: "InvalidStringExpressionWithValidExpression", - q: `SELECT 'value', value FROM cpu`, - err: `invalid expression type: *influxql.StringLiteral`, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, _, err := query.Select(MustParseSelectStatement(tt.q), &shardMapper, query.SelectOptions{}) - if err == nil || err.Error() != tt.err { - t.Errorf("expected error '%s', got '%s'", tt.err, err) - } - }) - } -} - type ShardMapper struct { MapShardsFn func(sources influxql.Sources, t influxql.TimeRange) query.ShardGroup } diff --git a/tests/server_test.go b/tests/server_test.go index 42116669cb..39c472c647 100644 --- a/tests/server_test.go +++ b/tests/server_test.go @@ -1097,7 +1097,7 @@ func TestServer_Query_Count(t *testing.T) { &Query{ name: "selecting count(2) should error", command: `SELECT count(2) FROM db0.rp0.cpu`, - exp: `{"error":"error parsing query: expected field argument in count()"}`, + exp: `{"results":[{"statement_id":0,"error":"expected field argument in count()"}]}`, }, }...) @@ -4362,13 +4362,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "count - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, count(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","count"],"values":[["2000-01-01T00:00:00Z",3],["2000-01-01T00:00:30Z",3],["2000-01-01T00:01:00Z",3]]}]}]}`, }, &Query{ name: "count - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, count(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "distinct - baseline 30s", @@ -4380,13 +4380,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "distinct - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, distinct(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: aggregate function distinct() cannot be combined with other functions or fields"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","distinct"],"values":[["2000-01-01T00:00:00Z",10],["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",70],["2000-01-01T00:01:00Z",90],["2000-01-01T00:01:00Z",5]]}]}]}`, }, &Query{ name: "distinct - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, distinct(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: aggregate function distinct() cannot be combined with other functions or fields"}`, + exp: `{"results":[{"statement_id":0,"error":"aggregate function distinct() cannot be combined with other functions or fields"}]}`, }, &Query{ name: "mean - baseline 30s", @@ -4398,13 +4398,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "mean - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, mean(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","mean"],"values":[["2000-01-01T00:00:00Z",30],["2000-01-01T00:00:30Z",46.666666666666664],["2000-01-01T00:01:00Z",55]]}]}]}`, }, &Query{ name: "mean - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, mean(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "median - baseline 30s", @@ -4416,13 +4416,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "median - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, median(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","median"],"values":[["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",70]]}]}]}`, }, &Query{ name: "median - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, median(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "mode - baseline 30s", @@ -4434,31 +4434,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "mode - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, - &Query{ - name: "mode - tx", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT tx, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, - &Query{ - name: "mode - baseline 30s", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","mode"],"values":[["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",5]]}]}]}`, }, - &Query{ - name: "mode - time", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT time, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, &Query{ name: "mode - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "spread - baseline 30s", @@ -4470,13 +4452,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "spread - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, spread(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","spread"],"values":[["2000-01-01T00:00:00Z",30],["2000-01-01T00:00:30Z",10],["2000-01-01T00:01:00Z",85]]}]}]}`, }, &Query{ name: "spread - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, spread(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "stddev - baseline 30s", @@ -4488,13 +4470,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "stddev - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, stddev(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","stddev"],"values":[["2000-01-01T00:00:00Z",17.320508075688775],["2000-01-01T00:00:30Z",5.773502691896258],["2000-01-01T00:01:00Z",44.44097208657794]]}]}]}`, }, &Query{ name: "stddev - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, stddev(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "percentile - baseline 30s", @@ -4766,13 +4748,13 @@ func TestServer_Query_TopBottomInt(t *testing.T) { name: "top - cpu - 3 values with limit 2", params: url.Values{"db": []string{"db0"}}, command: `SELECT TOP(value, 3) FROM cpu limit 2`, - exp: `{"error":"error parsing query: limit (3) in top function can not be larger than the LIMIT (2) in the select statement"}`, + exp: `{"results":[{"statement_id":0,"error":"limit (3) in top function can not be larger than the LIMIT (2) in the select statement"}]}`, }, &Query{ name: "bottom - cpu - 3 values with limit 2", params: url.Values{"db": []string{"db0"}}, command: `SELECT BOTTOM(value, 3) FROM cpu limit 2`, - exp: `{"error":"error parsing query: limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement"}`, + exp: `{"results":[{"statement_id":0,"error":"limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement"}]}`, }, &Query{ name: "top - cpu - hourly",