From 905e7fe05ed2a45513497b0790e682f22c253e4a Mon Sep 17 00:00:00 2001 From: "Jonathan A. Sternberg" Date: Thu, 24 Aug 2017 16:23:00 -0500 Subject: [PATCH] Refactor validation code and move it to the compiler This refactors the validation code so it is more flexible and performs a small bit of work to make preparing and executing the query easier. The general idea is that compilation will eventually do more heavy lifting in creating the initial plan and prepare will construct an actual plan rather than just doing some basic field rewriting. This change at least sets us up for that change in the future and moves the validation code to the query execution instead of in the parser. This also frees up the parser to parse the complete AST without worrying if the query itself is valid. That could be useful for client code that wants to compile a partial query to an AST and then perform modifications on the AST for some reason. --- influxql/ast.go | 578 ----------------------------- influxql/ast_test.go | 23 -- influxql/parser.go | 4 - influxql/parser_test.go | 132 +------ query/compile.go | 791 ++++++++++++++++++++++++++++++++++++++-- query/compile_test.go | 338 +++++++++++++++++ query/select_test.go | 43 --- tests/server_test.go | 50 +-- 8 files changed, 1115 insertions(+), 844 deletions(-) create mode 100644 query/compile_test.go diff --git a/influxql/ast.go b/influxql/ast.go index d7d2c182e6..66eef1b951 100644 --- a/influxql/ast.go +++ b/influxql/ast.go @@ -1789,538 +1789,6 @@ func (s *SelectStatement) HasDimensionWildcard() bool { return false } -func (s *SelectStatement) validate(tr targetRequirement) error { - if err := s.validateFields(); err != nil { - return err - } - - if err := s.validateDimensions(); err != nil { - return err - } - - if err := s.validateDistinct(); err != nil { - return err - } - - if err := s.validateTopBottom(); err != nil { - return err - } - - if err := s.validateAggregates(tr); err != nil { - return err - } - - if err := s.validateFill(); err != nil { - return err - } - - return nil -} - -func (s *SelectStatement) validateFields() error { - ns := s.NamesInSelect() - if len(ns) == 1 && ns[0] == "time" { - return fmt.Errorf("at least 1 non-time field must be queried") - } - - for _, f := range s.Fields { - switch expr := f.Expr.(type) { - case *BinaryExpr: - if err := expr.validate(); err != nil { - return err - } - } - } - return nil -} - -func (s *SelectStatement) validateDimensions() error { - var dur time.Duration - for _, dim := range s.Dimensions { - switch expr := dim.Expr.(type) { - case *Call: - // Ensure the call is time() and it has one or two duration arguments. - // If we already have a duration - if expr.Name != "time" { - return errors.New("only time() calls allowed in dimensions") - } else if got := len(expr.Args); got < 1 || got > 2 { - return errors.New("time dimension expected 1 or 2 arguments") - } else if lit, ok := expr.Args[0].(*DurationLiteral); !ok { - return errors.New("time dimension must have duration argument") - } else if dur != 0 { - return errors.New("multiple time dimensions not allowed") - } else { - dur = lit.Val - if len(expr.Args) == 2 { - switch lit := expr.Args[1].(type) { - case *DurationLiteral: - // noop - case *Call: - if lit.Name != "now" { - return errors.New("time dimension offset function must be now()") - } else if len(lit.Args) != 0 { - return errors.New("time dimension offset now() function requires no arguments") - } - default: - return errors.New("time dimension offset must be duration or now()") - } - } - } - case *VarRef: - if strings.ToLower(expr.Val) == "time" { - return errors.New("time() is a function and expects at least one argument") - } - case *Wildcard: - case *RegexLiteral: - default: - return errors.New("only time and tag dimensions allowed") - } - } - return nil -} - -// validSelectWithAggregate determines if a SELECT statement has the correct -// combination of aggregate functions combined with selected fields and tags -// Currently we don't have support for all aggregates, but aggregates that -// can be combined with fields/tags are: -// TOP, BOTTOM, MAX, MIN, FIRST, LAST -func (s *SelectStatement) validSelectWithAggregate() error { - calls := map[string]struct{}{} - numAggregates := 0 - for _, f := range s.Fields { - fieldCalls := walkFunctionCalls(f.Expr) - for _, c := range fieldCalls { - calls[c.Name] = struct{}{} - } - if len(fieldCalls) != 0 { - numAggregates++ - } - } - // For TOP, BOTTOM, MAX, MIN, FIRST, LAST, PERCENTILE (selector functions) it is ok to ask for fields and tags - // but only if one function is specified. Combining multiple functions and fields and tags is not currently supported - onlySelectors := true - for k := range calls { - switch k { - case "top", "bottom", "max", "min", "first", "last", "percentile", "sample": - default: - onlySelectors = false - break - } - } - if onlySelectors { - // If they only have one selector, they can have as many fields or tags as they want - if numAggregates == 1 { - return nil - } - // If they have multiple selectors, they are not allowed to have any other fields or tags specified - if numAggregates > 1 && len(s.Fields) != numAggregates { - return fmt.Errorf("mixing multiple selector functions with tags or fields is not supported") - } - } - - if numAggregates != 0 && numAggregates != len(s.Fields) { - return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported") - } - return nil -} - -// validTopBottomAggr determines if TOP or BOTTOM aggregates have valid arguments. -func (s *SelectStatement) validTopBottomAggr(expr *Call) error { - if exp, got := 2, len(expr.Args); got < exp { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d, got %d", expr.Name, exp, got) - } - if len(expr.Args) > 1 { - callLimit, ok := expr.Args[len(expr.Args)-1].(*IntegerLiteral) - if !ok { - return fmt.Errorf("expected integer as last argument in %s(), found %s", expr.Name, expr.Args[len(expr.Args)-1]) - } - // Check if they asked for a limit smaller than what they passed into the call - if int64(callLimit.Val) > int64(s.Limit) && s.Limit != 0 { - return fmt.Errorf("limit (%d) in %s function can not be larger than the LIMIT (%d) in the select statement", int64(callLimit.Val), expr.Name, int64(s.Limit)) - } - - for _, v := range expr.Args[:len(expr.Args)-1] { - if _, ok := v.(*VarRef); !ok { - return fmt.Errorf("only fields or tags are allowed in %s(), found %s", expr.Name, v) - } - } - } - return nil -} - -// validPercentileAggr determines if the call to PERCENTILE has valid arguments. -func (s *SelectStatement) validPercentileAggr(expr *Call) error { - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 2, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - - switch expr.Args[0].(type) { - case *VarRef, *RegexLiteral, *Wildcard: - // do nothing - default: - return fmt.Errorf("expected field argument in percentile()") - } - - switch expr.Args[1].(type) { - case *IntegerLiteral, *NumberLiteral: - return nil - default: - return fmt.Errorf("expected float argument in percentile()") - } -} - -// validPercentileAggr determines if the call to SAMPLE has valid arguments. -func (s *SelectStatement) validSampleAggr(expr *Call) error { - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 2, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - - switch expr.Args[0].(type) { - case *VarRef, *RegexLiteral, *Wildcard: - // do nothing - default: - return fmt.Errorf("expected field argument in sample()") - } - - switch expr.Args[1].(type) { - case *IntegerLiteral: - return nil - default: - return fmt.Errorf("expected integer argument in sample()") - } -} - -func (s *SelectStatement) validateAggregates(tr targetRequirement) error { - for _, f := range s.Fields { - for _, expr := range walkFunctionCalls(f.Expr) { - switch expr.Name { - case "derivative", "non_negative_derivative", "difference", "non_negative_difference", "moving_average", "cumulative_sum", "elapsed": - if err := s.validSelectWithAggregate(); err != nil { - return err - } - switch expr.Name { - case "derivative", "non_negative_derivative", "elapsed": - if min, max, got := 1, 2, len(expr.Args); got > max || got < min { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", expr.Name, min, max, got) - } - // If a duration arg is passed, make sure it's a duration - if len(expr.Args) == 2 { - // Second must be a duration .e.g (1h) - if _, ok := expr.Args[1].(*DurationLiteral); !ok { - return fmt.Errorf("second argument to %s must be a duration, got %T", expr.Name, expr.Args[1]) - } - } - case "difference", "non_negative_difference", "cumulative_sum": - if got := len(expr.Args); got != 1 { - return fmt.Errorf("invalid number of arguments for %s, expected 1, got %d", expr.Name, got) - } - case "moving_average": - if got := len(expr.Args); got != 2 { - return fmt.Errorf("invalid number of arguments for moving_average, expected 2, got %d", got) - } - - if lit, ok := expr.Args[1].(*IntegerLiteral); !ok { - return fmt.Errorf("second argument for moving_average must be an integer, got %T", expr.Args[1]) - } else if lit.Val <= 1 { - return fmt.Errorf("moving_average window must be greater than 1, got %d", lit.Val) - } else if int64(int(lit.Val)) != lit.Val { - return fmt.Errorf("moving_average window too large, got %d", lit.Val) - } - } - // Validate that if they have grouping by time, they need a sub-call like min/max, etc. - groupByInterval, err := s.GroupByInterval() - if err != nil { - return fmt.Errorf("invalid group interval: %v", err) - } - - if c, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } else if !ok && groupByInterval > 0 { - return fmt.Errorf("aggregate function required inside the call to %s", expr.Name) - } else if ok { - switch c.Name { - case "top", "bottom": - if err := s.validTopBottomAggr(c); err != nil { - return err - } - case "percentile": - if err := s.validPercentileAggr(c); err != nil { - return err - } - default: - if exp, got := 1, len(c.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", c.Name, exp, got) - } - - switch fc := c.Args[0].(type) { - case *VarRef, *Wildcard, *RegexLiteral: - // do nothing - case *Call: - if fc.Name != "distinct" || expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", c.Name) - } else if exp, got := 1, len(fc.Args); got != exp { - return fmt.Errorf("count(distinct %s) can only have %d argument(s), got %d", fc.Name, exp, got) - } else if _, ok := fc.Args[0].(*VarRef); !ok { - return fmt.Errorf("expected field argument in distinct()") - } - case *Distinct: - if expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", c.Name) - } - default: - return fmt.Errorf("expected field argument in %s()", c.Name) - } - } - } - case "top", "bottom": - if err := s.validTopBottomAggr(expr); err != nil { - return err - } - case "percentile": - if err := s.validPercentileAggr(expr); err != nil { - return err - } - case "sample": - if err := s.validSampleAggr(expr); err != nil { - return err - } - case "integral": - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if min, max, got := 1, 2, len(expr.Args); got > max || got < min { - return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", expr.Name, min, max, got) - } - // If a duration arg is passed, make sure it's a duration - if len(expr.Args) == 2 { - // Second must be a duration .e.g (1h) - if _, ok := expr.Args[1].(*DurationLiteral); !ok { - return errors.New("second argument must be a duration") - } - } - case "holt_winters", "holt_winters_with_fit": - if exp, got := 3, len(expr.Args); got != exp { - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - // Validate that if they have grouping by time, they need a sub-call like min/max, etc. - groupByInterval, err := s.GroupByInterval() - if err != nil { - return fmt.Errorf("invalid group interval: %v", err) - } - - if _, ok := expr.Args[0].(*Call); ok && groupByInterval == 0 && tr != targetSubquery { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } else if !ok { - return fmt.Errorf("must use aggregate function with %s", expr.Name) - } - if arg, ok := expr.Args[1].(*IntegerLiteral); !ok { - return fmt.Errorf("expected integer argument as second arg in %s", expr.Name) - } else if arg.Val <= 0 { - return fmt.Errorf("second arg to %s must be greater than 0, got %d", expr.Name, arg.Val) - } - if _, ok := expr.Args[2].(*IntegerLiteral); !ok { - return fmt.Errorf("expected integer argument as third arg in %s", expr.Name) - } - default: - if err := s.validSelectWithAggregate(); err != nil { - return err - } - if exp, got := 1, len(expr.Args); got != exp { - // Special error message if distinct was used as the argument. - if expr.Name == "count" && got >= 1 { - if _, ok := expr.Args[0].(*Distinct); ok { - return fmt.Errorf("count(distinct ) can only have one argument") - } - } - return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) - } - switch fc := expr.Args[0].(type) { - case *VarRef, *Wildcard, *RegexLiteral: - // do nothing - case *Call: - if fc.Name != "distinct" || expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", expr.Name) - } else if exp, got := 1, len(fc.Args); got != exp { - return fmt.Errorf("count(distinct ) can only have one argument") - } else if _, ok := fc.Args[0].(*VarRef); !ok { - return fmt.Errorf("expected field argument in distinct()") - } - case *Distinct: - if expr.Name != "count" { - return fmt.Errorf("expected field argument in %s()", expr.Name) - } - default: - return fmt.Errorf("expected field argument in %s()", expr.Name) - } - } - } - } - - // Check that we have valid duration and where clauses for aggregates - - // fetch the group by duration - groupByDuration, _ := s.GroupByInterval() - - // If we have a group by interval, but no aggregate function, it's an invalid statement - if s.IsRawQuery && groupByDuration > 0 { - return fmt.Errorf("GROUP BY requires at least one aggregate function") - } - - // If we have an aggregate function with a group by time without a where clause, it's an invalid statement - if tr == targetNotRequired { // ignore create continuous query statements - if err := s.validateTimeExpression(); err != nil { - return err - } - } - if tr != targetSubquery { - if err := s.validateGroupByInterval(); err != nil { - return err - } - } - return nil -} - -// validateFill ensures that the fill option matches the query type. -func (s *SelectStatement) validateFill() error { - info := newSelectInfo(s) - if len(info.calls) == 0 { - switch s.Fill { - case NoFill: - return errors.New("fill(none) must be used with a function") - case LinearFill: - return errors.New("fill(linear) must be used with a function") - } - } - return nil -} - -// validateTimeExpression ensures that any select statements that have a group -// by interval either have a time expression limiting the time range or have a -// parent query that does that. -func (s *SelectStatement) validateTimeExpression() error { - // If we have a time expression, we and all subqueries are fine. - if HasTimeExpr(s.Condition) { - return nil - } - - // Check if this is not a raw query and if the group by duration exists. - // If these are true, then we have an error. - interval, err := s.GroupByInterval() - if err != nil { - return err - } else if !s.IsRawQuery && interval > 0 { - return fmt.Errorf("aggregate functions with GROUP BY time require a WHERE time clause") - } - - // Validate the subqueries. If we have a time expression in this select - // statement, we don't need to do this because parent time ranges propagate - // to children. So we only execute this when there is no time condition in - // the parent. - for _, source := range s.Sources { - switch source := source.(type) { - case *SubQuery: - if err := source.Statement.validateTimeExpression(); err != nil { - return err - } - } - } - return nil -} - -// validateGroupByInterval ensures that a select statement is grouped by an -// interval if it contains certain functions. -func (s *SelectStatement) validateGroupByInterval() error { - interval, err := s.GroupByInterval() - if err != nil { - return err - } else if interval > 0 { - // If we have an interval here, that means the interval will propagate - // into any subqueries and we can just stop looking. - return nil - } - - // Check inside of the fields for any of the specific functions that ned a group by interval. - for _, f := range s.Fields { - switch expr := f.Expr.(type) { - case *Call: - switch expr.Name { - case "derivative", "non_negative_derivative", "difference", "non_negative_difference", "moving_average", "cumulative_sum", "elapsed", "holt_winters", "holt_winters_with_fit": - // If the first argument is a call, we needed a group by interval and we don't have one. - if _, ok := expr.Args[0].(*Call); ok { - return fmt.Errorf("%s aggregate requires a GROUP BY interval", expr.Name) - } - } - } - } - - // Validate the subqueries. - for _, source := range s.Sources { - switch source := source.(type) { - case *SubQuery: - if err := source.Statement.validateGroupByInterval(); err != nil { - return err - } - } - } - return nil -} - -// HasDistinct checks if a select statement contains a call to DISTINCT. -func (s *SelectStatement) HasDistinct() bool { - for _, f := range s.Fields { - switch c := f.Expr.(type) { - case *Call: - if c.Name == "distinct" { - return true - } - case *Distinct: - return true - } - } - return false -} - -func (s *SelectStatement) validateDistinct() error { - if !s.HasDistinct() { - return nil - } - - if len(s.Fields) > 1 { - return fmt.Errorf("aggregate function distinct() cannot be combined with other functions or fields") - } - - switch c := s.Fields[0].Expr.(type) { - case *Call: - if len(c.Args) == 0 { - return fmt.Errorf("distinct function requires at least one argument") - } - - if len(c.Args) != 1 { - return fmt.Errorf("distinct function can only have one argument") - } - } - return nil -} - -func (s *SelectStatement) validateTopBottom() error { - // Ensure there are not multiple calls if top/bottom is present. - info := newSelectInfo(s) - if len(info.calls) > 1 { - for call := range info.calls { - if call.Name == "top" || call.Name == "bottom" { - return fmt.Errorf("selector function %s() cannot be combined with other functions", call.Name) - } - } - } - return nil -} - // GroupByInterval extracts the time interval, if specified. func (s *SelectStatement) GroupByInterval() (time.Duration, error) { // return if we've already pulled it out @@ -4129,52 +3597,6 @@ func (e *BinaryExpr) String() string { return fmt.Sprintf("%s %s %s", e.LHS.String(), e.Op.String(), e.RHS.String()) } -func (e *BinaryExpr) validate() error { - v := binaryExprValidator{} - Walk(&v, e) - if v.err != nil { - return v.err - } else if v.calls && v.refs { - return errors.New("binary expressions cannot mix aggregates and raw fields") - } - return nil -} - -type binaryExprValidator struct { - calls bool - refs bool - err error -} - -func (v *binaryExprValidator) Visit(n Node) Visitor { - if v.err != nil { - return nil - } - - switch n := n.(type) { - case *Call: - v.calls = true - - if n.Name == "top" || n.Name == "bottom" { - v.err = fmt.Errorf("cannot use %s() inside of a binary expression", n.Name) - return nil - } - - for _, expr := range n.Args { - switch e := expr.(type) { - case *BinaryExpr: - v.err = e.validate() - return nil - } - } - return nil - case *VarRef: - v.refs = true - return nil - } - return v -} - // BinaryExprName returns the name of a binary expression by concatenating // the variables in the binary expression with underscores. func BinaryExprName(expr *BinaryExpr) string { diff --git a/influxql/ast_test.go b/influxql/ast_test.go index 3fb5fa6833..18e1e8a700 100644 --- a/influxql/ast_test.go +++ b/influxql/ast_test.go @@ -1666,29 +1666,6 @@ func TestSources_HasSystemSource(t *testing.T) { } } -// Parse statements that might appear valid but should return an error. -// If allowed to execute, at least some of these statements would result in a panic. -func TestParse_Errors(t *testing.T) { - for _, tt := range []struct { - tmpl string - good string - bad string - }{ - // Second argument to derivative must be duration - {tmpl: `SELECT derivative(f, %s) FROM m`, good: "1h", bad: "true"}, - } { - good := fmt.Sprintf(tt.tmpl, tt.good) - if _, err := influxql.ParseStatement(good); err != nil { - t.Fatalf("statement %q should have parsed correctly but returned error: %s", good, err) - } - - bad := fmt.Sprintf(tt.tmpl, tt.bad) - if _, err := influxql.ParseStatement(bad); err == nil { - t.Fatalf("statement %q should have resulted in a parse error but did not", bad) - } - } -} - // This test checks to ensure that we have given thought to the database // context required for security checks. If a new statement is added, this // test will fail until it is categorized into the correct bucket below. diff --git a/influxql/parser.go b/influxql/parser.go index 36a7670137..f4f4f2ca9c 100644 --- a/influxql/parser.go +++ b/influxql/parser.go @@ -791,10 +791,6 @@ func (p *Parser) parseSelectStatement(tr targetRequirement) (*SelectStatement, e } }) - if err := stmt.validate(tr); err != nil { - return nil, err - } - return stmt, nil } diff --git a/influxql/parser_test.go b/influxql/parser_test.go index 58de3c1e07..a3b310c99d 100644 --- a/influxql/parser_test.go +++ b/influxql/parser_test.go @@ -142,7 +142,7 @@ func TestParser_ParseStatement(t *testing.T) { // SELECT statement { - s: fmt.Sprintf(`SELECT mean(field1), sum(field2) ,count(field3) AS field_x FROM myseries WHERE host = 'hosta.influxdb.org' and time > '%s' GROUP BY time(10h) ORDER BY DESC LIMIT 20 OFFSET 10;`, now.UTC().Format(time.RFC3339Nano)), + s: fmt.Sprintf(`SELECT mean(field1), sum(field2), count(field3) AS field_x FROM myseries WHERE host = 'hosta.influxdb.org' and time > '%s' GROUP BY time(10h) ORDER BY DESC LIMIT 20 OFFSET 10;`, now.UTC().Format(time.RFC3339Nano)), stmt: &influxql.SelectStatement{ IsRawQuery: false, Fields: []*influxql.Field{ @@ -2795,47 +2795,12 @@ func TestParser_ParseStatement(t *testing.T) { // Errors {s: ``, err: `found EOF, expected SELECT, DELETE, SHOW, CREATE, DROP, GRANT, REVOKE, ALTER, SET, KILL at line 1, char 1`}, {s: `SELECT`, err: `found EOF, expected identifier, string, number, bool at line 1, char 8`}, - {s: `SELECT time FROM myseries`, err: `at least 1 non-time field must be queried`}, {s: `blah blah`, err: `found blah, expected SELECT, DELETE, SHOW, CREATE, DROP, GRANT, REVOKE, ALTER, SET, KILL at line 1, char 1`}, {s: `SELECT field1 X`, err: `found X, expected FROM at line 1, char 15`}, {s: `SELECT field1 FROM "series" WHERE X +;`, err: `found ;, expected identifier, string, number, bool at line 1, char 38`}, {s: `SELECT field1 FROM myseries GROUP`, err: `found EOF, expected BY at line 1, char 35`}, {s: `SELECT field1 FROM myseries LIMIT`, err: `found EOF, expected integer at line 1, char 35`}, {s: `SELECT field1 FROM myseries LIMIT 10.5`, err: `found 10.5, expected integer at line 1, char 35`}, - {s: `SELECT count(max(value)) FROM myseries`, err: `expected field argument in count()`}, - {s: `SELECT count(distinct('value')) FROM myseries`, err: `expected field argument in distinct()`}, - {s: `SELECT distinct('value') FROM myseries`, err: `expected field argument in distinct()`}, - {s: `SELECT min(max(value)) FROM myseries`, err: `expected field argument in min()`}, - {s: `SELECT min(distinct(value)) FROM myseries`, err: `expected field argument in min()`}, - {s: `SELECT max(max(value)) FROM myseries`, err: `expected field argument in max()`}, - {s: `SELECT sum(max(value)) FROM myseries`, err: `expected field argument in sum()`}, - {s: `SELECT first(max(value)) FROM myseries`, err: `expected field argument in first()`}, - {s: `SELECT last(max(value)) FROM myseries`, err: `expected field argument in last()`}, - {s: `SELECT mean(max(value)) FROM myseries`, err: `expected field argument in mean()`}, - {s: `SELECT median(max(value)) FROM myseries`, err: `expected field argument in median()`}, - {s: `SELECT mode(max(value)) FROM myseries`, err: `expected field argument in mode()`}, - {s: `SELECT stddev(max(value)) FROM myseries`, err: `expected field argument in stddev()`}, - {s: `SELECT spread(max(value)) FROM myseries`, err: `expected field argument in spread()`}, - {s: `SELECT top() FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 0`}, - {s: `SELECT top(field1) FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT top(field1,foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, - {s: `SELECT top(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, - {s: `SELECT top(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found 5`}, - {s: `SELECT top(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found max(foo)`}, - {s: `SELECT top(value, 10) + count(value) FROM myseries`, err: `cannot use top() inside of a binary expression`}, - {s: `SELECT top(max(value), 10) FROM myseries`, err: `only fields or tags are allowed in top(), found max(value)`}, - {s: `SELECT bottom() FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 0`}, - {s: `SELECT bottom(field1) FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT bottom(field1,foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, - {s: `SELECT bottom(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, - {s: `SELECT bottom(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found 5`}, - {s: `SELECT bottom(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(foo)`}, - {s: `SELECT bottom(value, 10) + count(value) FROM myseries`, err: `cannot use bottom() inside of a binary expression`}, - {s: `SELECT bottom(max(value), 10) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(value)`}, - {s: `SELECT percentile() FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 0`}, - {s: `SELECT percentile(field1) FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT percentile(field1, foo) FROM myseries`, err: `expected float argument in percentile()`}, - {s: `SELECT percentile(max(field1), 75) FROM myseries`, err: `expected field argument in percentile()`}, {s: `SELECT field1 FROM myseries OFFSET`, err: `found EOF, expected integer at line 1, char 36`}, {s: `SELECT field1 FROM myseries OFFSET 10.5`, err: `found 10.5, expected integer at line 1, char 36`}, {s: `SELECT field1 FROM myseries ORDER`, err: `found EOF, expected BY at line 1, char 35`}, @@ -2845,99 +2810,16 @@ func TestParser_ParseStatement(t *testing.T) { {s: `SELECT field1 FROM myseries ORDER BY time ASC,`, err: `found EOF, expected identifier at line 1, char 47`}, {s: `SELECT field1 FROM myseries ORDER BY time, field1`, err: `only ORDER BY time supported at this time`}, {s: `SELECT field1 AS`, err: `found EOF, expected identifier at line 1, char 18`}, - {s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`}, - {s: `SELECT field1 FROM foo fill(none)`, err: `fill(none) must be used with a function`}, - {s: `SELECT field1 FROM foo fill(linear)`, err: `fill(linear) must be used with a function`}, - {s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT count(value)/10, value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time(500ms)`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT count(value) FROM foo group by time`, err: `time() is a function and expects at least one argument`}, - {s: `SELECT count(value) FROM foo group by 'time'`, err: `only time and tag dimensions allowed`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time()`, err: `time dimension expected 1 or 2 arguments`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(b)`, err: `time dimension must have duration argument`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s), time(2s)`, err: `multiple time dimensions not allowed`}, - {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, b)`, err: `time dimension offset must be duration or now()`}, {s: `SELECT field1 FROM 12`, err: `found 12, expected identifier at line 1, char 20`}, {s: `SELECT 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 FROM myseries`, err: `unable to parse integer at line 1, char 8`}, {s: `SELECT 10.5h FROM myseries`, err: `found h, expected FROM at line 1, char 12`}, - {s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, - {s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, - {s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`}, - {s: `SELECT distinct() FROM myseries`, err: `distinct function requires at least one argument`}, {s: `SELECT distinct FROM myseries`, err: `found FROM, expected identifier at line 1, char 17`}, - {s: `SELECT distinct field1, field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, {s: `SELECT count(distinct) FROM myseries`, err: `found ), expected (, identifier at line 1, char 22`}, - {s: `SELECT count(distinct field1, field2) FROM myseries`, err: `count(distinct ) can only have one argument`}, - {s: `select count(distinct(too, many, arguments)) from myseries`, err: `count(distinct ) can only have one argument`}, - {s: `select count() from myseries`, err: `invalid number of arguments for count, expected 1, got 0`}, - {s: `SELECT derivative(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`}, - {s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`}, - {s: `SELECT derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to derivative`}, - {s: `SELECT derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT min(derivative) FROM (SELECT derivative(mean(value), 1h) FROM myseries) where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT non_negative_derivative(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `select non_negative_derivative() from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 0`}, - {s: `select non_negative_derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 3`}, - {s: `SELECT non_negative_derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_derivative`}, - {s: `SELECT non_negative_derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT non_negative_derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT non_negative_derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT non_negative_derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_derivative aggregate requires a GROUP BY interval`}, - {s: `SELECT non_negative_derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT difference(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT difference() from myseries`, err: `invalid number of arguments for difference, expected 1, got 0`}, - {s: `SELECT difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to difference`}, - {s: `SELECT difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `difference aggregate requires a GROUP BY interval`}, - {s: `SELECT moving_average(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT moving_average() from myseries`, err: `invalid number of arguments for moving_average, expected 2, got 0`}, - {s: `SELECT moving_average(value) FROM myseries`, err: `invalid number of arguments for moving_average, expected 2, got 1`}, - {s: `SELECT moving_average(value, 2) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to moving_average`}, - {s: `SELECT moving_average(top(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT moving_average(bottom(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT moving_average(max(), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT moving_average(percentile(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT moving_average(mean(value), 2) FROM myseries where time < now() and time > now() - 1d`, err: `moving_average aggregate requires a GROUP BY interval`}, - {s: `SELECT cumulative_sum(), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, - {s: `SELECT cumulative_sum() from myseries`, err: `invalid number of arguments for cumulative_sum, expected 1, got 0`}, - {s: `SELECT cumulative_sum(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to cumulative_sum`}, - {s: `SELECT cumulative_sum(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, - {s: `SELECT cumulative_sum(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, - {s: `SELECT cumulative_sum(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, - {s: `SELECT cumulative_sum(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, - {s: `SELECT cumulative_sum(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `cumulative_sum aggregate requires a GROUP BY interval`}, - {s: `SELECT holt_winters(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters, expected 3, got 1`}, - {s: `SELECT holt_winters(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters`}, - {s: `SELECT holt_winters(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters aggregate requires a GROUP BY interval`}, - {s: `SELECT holt_winters(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters must be greater than 0, got 0`}, - {s: `SELECT holt_winters(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters`}, - {s: `SELECT holt_winters(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters`}, {s: `SELECT field1 from myseries WHERE host =~ 'asd' LIMIT 1`, err: `found asd, expected regex at line 1, char 42`}, {s: `SELECT value > 2 FROM cpu`, err: `invalid operator > in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, {s: `SELECT value = 2 FROM cpu`, err: `invalid operator = in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, {s: `SELECT s =~ /foo/ FROM cpu`, err: `invalid operator =~ in SELECT clause at line 1, char 8; operator is intended for WHERE clause`}, - {s: `SELECT mean(value) + value FROM cpu WHERE time < now() and time > now() - 1h GROUP BY time(10m)`, err: `binary expressions cannot mix aggregates and raw fields`}, - // TODO: Remove this restriction in the future: https://github.com/influxdata/influxdb/issues/5968 - {s: `SELECT mean(cpu_total - cpu_idle) FROM cpu`, err: `expected field argument in mean()`}, - {s: `SELECT derivative(mean(cpu_total - cpu_idle), 1s) FROM cpu WHERE time < now() AND time > now() - 1d GROUP BY time(1h)`, err: `expected field argument in mean()`}, - // TODO: The error message will change when math is allowed inside an aggregate: https://github.com/influxdata/influxdb/pull/5990#issuecomment-195565870 - {s: `SELECT count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, - {s: `SELECT (count(foo + sum(bar))) FROM cpu`, err: `expected field argument in count()`}, - {s: `SELECT sum(value) + count(foo + sum(bar)) FROM cpu`, err: `binary expressions cannot mix aggregates and raw fields`}, {s: `SELECT mean(value) FROM cpu FILL + value`, err: `fill must be a function call`}, - {s: `SELECT sum(mean) FROM (SELECT mean(value) FROM cpu GROUP BY time(1h))`, err: `aggregate functions with GROUP BY time require a WHERE time clause`}, - {s: `SELECT top(value, 2), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, - {s: `SELECT bottom(value, 2), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, // See issues https://github.com/influxdata/influxdb/issues/1647 // and https://github.com/influxdata/influxdb/issues/4404 //{s: `DELETE`, err: `found EOF, expected FROM at line 1, char 8`}, @@ -3113,16 +2995,8 @@ func TestParser_ParseStatement(t *testing.T) { } stmt, err := p.ParseStatement() - // We are memoizing a field so for testing we need to... - if s, ok := tt.stmt.(*influxql.SelectStatement); ok { - s.GroupByInterval() - for _, source := range s.Sources { - switch source := source.(type) { - case *influxql.SubQuery: - source.Statement.GroupByInterval() - } - } - } else if st, ok := stmt.(*influxql.CreateContinuousQueryStatement); ok { // if it's a CQ, there is a non-exported field that gets memoized during parsing that needs to be set + // if it's a CQ, there is a non-exported field that gets memoized during parsing that needs to be set + if st, ok := stmt.(*influxql.CreateContinuousQueryStatement); ok { if st != nil && st.Source != nil { tt.stmt.(*influxql.CreateContinuousQueryStatement).Source.GroupByInterval() } diff --git a/query/compile.go b/query/compile.go index b4d83e401d..d1ea88a985 100644 --- a/query/compile.go +++ b/query/compile.go @@ -1,7 +1,9 @@ package query import ( + "errors" "fmt" + "strings" "time" "github.com/influxdata/influxdb/influxql" @@ -19,49 +21,771 @@ type Statement interface { Prepare(shardMapper ShardMapper, opt SelectOptions) (PreparedStatement, error) } -func Compile(stmt *influxql.SelectStatement, opt CompileOptions) (Statement, error) { - // It is important to "stamp" this time so that everywhere we evaluate `now()` in the statement is EXACTLY the same `now` - now := opt.Now - if now.IsZero() { - now = time.Now().UTC() - } - - // Evaluate the now() condition immediately so we do not have to deal with this. - nowValuer := influxql.NowValuer{Now: now, Location: stmt.Location} - stmt = stmt.Reduce(&nowValuer) - - // Convert DISTINCT into a call. - stmt.RewriteDistinct() - - // Remove "time" from fields list. - stmt.RewriteTimeFields() - - // Rewrite time condition. - if err := stmt.RewriteTimeCondition(now); err != nil { - return nil, err - } - - // Rewrite any regex conditions that could make use of the index. - stmt.RewriteRegexConditions() - return &compiledStatement{stmt: stmt}, nil -} - // compiledStatement represents a select statement that has undergone some initial processing to // determine if it is valid and to have some initial modifications done on the AST. type compiledStatement struct { + // Condition is the condition used for accessing data. + Condition influxql.Expr + + // TimeRange is the TimeRange for selecting data. + TimeRange influxql.TimeRange + + // Interval holds the time grouping interval. + Interval Interval + + // InheritedInterval marks if the interval was inherited by a parent. + // If this is set, then an interval that was inherited will not cause + // a query that shouldn't have an interval to fail. + InheritedInterval bool + + // FunctionCalls holds a reference to the call expression of every function + // call that has been encountered. + FunctionCalls []*influxql.Call + + // OnlySelectors is set to true when there are no aggregate functions. + OnlySelectors bool + + // HasDistinct is set when the distinct() function is encountered. + HasDistinct bool + + // FillOption contains the fill option for aggregates. + FillOption influxql.FillOption + + // TopBottomFunction is set to top or bottom when one of those functions are + // used in the statement. + TopBottomFunction string + + // HasAuxiliaryFields is true when the function requires auxiliary fields. + HasAuxiliaryFields bool + + // Fields holds all of the fields that will be used. + Fields []*compiledField + + // TimeFieldName stores the name of the time field's column. + // The column names generated by the compiler will not conflict with + // this name. + TimeFieldName string + + // Limit is the number of rows per series this query should be limited to. + Limit int + + // HasTarget is true if this query is being written into a target. + HasTarget bool + + // Options holds the configured compiler options. + Options CompileOptions + stmt *influxql.SelectStatement } -func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) (PreparedStatement, error) { - // Determine the time range spanned by the condition so we can map shards. - nowValuer := influxql.NowValuer{Location: c.stmt.Location} - _, timeRange, err := influxql.ConditionExpr(c.stmt.Condition, &nowValuer) - if err != nil { +func newCompiler(opt CompileOptions) *compiledStatement { + if opt.Now.IsZero() { + opt.Now = time.Now().UTC() + } + return &compiledStatement{ + OnlySelectors: true, + TimeFieldName: "time", + Options: opt, + } +} + +func Compile(stmt *influxql.SelectStatement, opt CompileOptions) (Statement, error) { + c := newCompiler(opt) + if err := c.preprocess(stmt); err != nil { return nil, err } + if err := c.compile(stmt); err != nil { + return nil, err + } + c.stmt = stmt.Clone() + c.stmt.TimeAlias = c.TimeFieldName + c.stmt.Condition = c.Condition + // Convert DISTINCT into a call. + c.stmt.RewriteDistinct() + + // Remove "time" from fields list. + c.stmt.RewriteTimeFields() + + // Rewrite any regex conditions that could make use of the index. + c.stmt.RewriteRegexConditions() + return c, nil +} + +// preprocess retrieves and records the global attributes of the current statement. +func (c *compiledStatement) preprocess(stmt *influxql.SelectStatement) error { + c.Limit = stmt.Limit + c.HasTarget = stmt.Target != nil + + valuer := influxql.NowValuer{Now: c.Options.Now, Location: stmt.Location} + if cond, t, err := influxql.ConditionExpr(stmt.Condition, &valuer); err != nil { + return err + } else { + c.Condition = cond + c.TimeRange = t + } + + // Read the dimensions of the query, validate them, and retrieve the interval + // if it exists. + if err := c.compileDimensions(stmt); err != nil { + return err + } + + // Retrieve the fill option for the statement. + c.FillOption = stmt.Fill + + // Resolve the min and max times now that we know if there is an interval or not. + if c.TimeRange.Min.IsZero() { + c.TimeRange.Min = time.Unix(0, influxql.MinTime).UTC() + } + if c.TimeRange.Max.IsZero() { + // If the interval is non-zero, then we have an aggregate query and + // need to limit the maximum time to now() for backwards compatibility + // and usability. + if !c.Interval.IsZero() { + c.TimeRange.Max = c.Options.Now + } else { + c.TimeRange.Max = time.Unix(0, influxql.MaxTime).UTC() + } + } + return nil +} + +func (c *compiledStatement) compile(stmt *influxql.SelectStatement) error { + if err := c.compileFields(stmt); err != nil { + return err + } + if err := c.validateFields(); err != nil { + return err + } + if err := c.validateDimensions(); err != nil { + return err + } + + // Look through the sources and compile each of the subqueries (if they exist). + // We do this after compiling the outside because subqueries may require + // inherited state. + for _, source := range stmt.Sources { + switch source := source.(type) { + case *influxql.SubQuery: + if err := c.subquery(source.Statement); err != nil { + return err + } + } + } + return nil +} + +func (c *compiledStatement) compileFields(stmt *influxql.SelectStatement) error { + c.Fields = make([]*compiledField, 0, len(stmt.Fields)) + for _, f := range stmt.Fields { + // Remove any time selection (it is automatically selected by default) + // and set the time column name to the alias of the time field if it exists. + // Such as SELECT time, max(value) FROM cpu will be SELECT max(value) FROM cpu + // and SELECT time AS timestamp, max(value) FROM cpu will return "timestamp" + // as the column name for the time. + if ref, ok := f.Expr.(*influxql.VarRef); ok && ref.Val == "time" { + if f.Alias != "" { + c.TimeFieldName = f.Alias + } + continue + } + + // Append this field to the list of processed fields and compile it. + field := &compiledField{ + global: c, + Field: f, + AllowWildcard: true, + } + c.Fields = append(c.Fields, field) + if err := field.compileExpr(f.Expr); err != nil { + return err + } + } + return nil +} + +type compiledField struct { + // This holds the global state from the compiled statement. + global *compiledStatement + + // Field is the top level field that is being compiled. + Field *influxql.Field + + // AllowWildcard is set to true if a wildcard or regular expression is allowed. + AllowWildcard bool +} + +// compileExpr creates the node that executes the expression and connects that +// node to the WriteEdge as the output. +func (c *compiledField) compileExpr(expr influxql.Expr) error { + switch expr := expr.(type) { + case *influxql.VarRef: + // A bare variable reference will require auxiliary fields. + c.global.HasAuxiliaryFields = true + return nil + case *influxql.Wildcard: + // Wildcards use auxiliary fields. We assume there will be at least one + // expansion. + c.global.HasAuxiliaryFields = true + if !c.AllowWildcard { + return errors.New("unable to use wildcard in a binary expression") + } + return nil + case *influxql.RegexLiteral: + if !c.AllowWildcard { + return errors.New("unable to use regex in a binary expression") + } + c.global.HasAuxiliaryFields = true + return nil + case *influxql.Call: + // Register the function call in the list of function calls. + c.global.FunctionCalls = append(c.global.FunctionCalls, expr) + + switch expr.Name { + case "percentile": + return c.compilePercentile(expr.Args) + case "sample": + return c.compileSample(expr.Args) + case "distinct": + return c.compileDistinct(expr.Args) + case "top", "bottom": + return c.compileTopBottom(expr) + case "derivative", "non_negative_derivative": + isNonNegative := expr.Name == "non_negative_derivative" + return c.compileDerivative(expr.Args, isNonNegative) + case "difference", "non_negative_difference": + isNonNegative := expr.Name == "non_negative_difference" + return c.compileDifference(expr.Args, isNonNegative) + case "cumulative_sum": + return c.compileCumulativeSum(expr.Args) + case "moving_average": + return c.compileMovingAverage(expr.Args) + case "elapsed": + return c.compileElapsed(expr.Args) + case "integral": + return c.compileIntegral(expr.Args) + case "holt_winters", "holt_winters_with_fit": + withFit := expr.Name == "holt_winters_with_fit" + return c.compileHoltWinters(expr.Args, withFit) + default: + return c.compileFunction(expr) + } + case *influxql.Distinct: + call := expr.NewCall() + c.global.FunctionCalls = append(c.global.FunctionCalls, call) + return c.compileDistinct(call.Args) + case *influxql.BinaryExpr: + // Disallow wildcards in binary expressions. RewriteFields, which expands + // wildcards, is too complicated if we allow wildcards inside of expressions. + c.AllowWildcard = false + + // Check if either side is a literal so we only compile one side if it is. + if _, ok := expr.LHS.(influxql.Literal); ok { + if _, ok := expr.RHS.(influxql.Literal); ok { + return errors.New("cannot perform a binary expression on two literals") + } + return c.compileExpr(expr.RHS) + } else if _, ok := expr.RHS.(influxql.Literal); ok { + return c.compileExpr(expr.LHS) + } else { + // Validate both sides of the expression. + if err := c.compileExpr(expr.LHS); err != nil { + return err + } + if err := c.compileExpr(expr.RHS); err != nil { + return err + } + return nil + } + case *influxql.ParenExpr: + return c.compileExpr(expr.Expr) + } + return errors.New("unimplemented") +} + +func (c *compiledField) compileSymbol(name string, field influxql.Expr) error { + // Must be a variable reference, wildcard, or regexp. + switch field.(type) { + case *influxql.VarRef: + return nil + case *influxql.Wildcard: + if !c.AllowWildcard { + return fmt.Errorf("unsupported expression with wildcard: %s()", name) + } + c.global.OnlySelectors = false + return nil + case *influxql.RegexLiteral: + if !c.AllowWildcard { + return fmt.Errorf("unsupported expression with regex field: %s()", name) + } + c.global.OnlySelectors = false + return nil + default: + return fmt.Errorf("expected field argument in %s()", name) + } +} + +func (c *compiledField) compileFunction(expr *influxql.Call) error { + // Validate the function call and mark down some meta properties + // related to the function for query validation. + switch expr.Name { + case "max", "min", "first", "last": + // top/bottom are not included here since they are not typical functions. + case "count", "sum", "mean", "median", "mode", "stddev", "spread": + // These functions are not considered selectors. + c.global.OnlySelectors = false + default: + return fmt.Errorf("undefined function %s()", expr.Name) + } + + if exp, got := 1, len(expr.Args); exp != got { + return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", expr.Name, exp, got) + } + + // If this is a call to count(), allow distinct() to be used as the function argument. + if expr.Name == "count" { + // If we have count(), the argument may be a distinct() call. + if arg0, ok := expr.Args[0].(*influxql.Call); ok && arg0.Name == "distinct" { + return c.compileDistinct(arg0.Args) + } else if arg0, ok := expr.Args[0].(*influxql.Distinct); ok { + call := arg0.NewCall() + return c.compileDistinct(call.Args) + } + } + return c.compileSymbol(expr.Name, expr.Args[0]) +} + +func (c *compiledField) compilePercentile(args []influxql.Expr) error { + if exp, got := 2, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for percentile, expected %d, got %d", exp, got) + } + + switch args[1].(type) { + case *influxql.IntegerLiteral: + case *influxql.NumberLiteral: + default: + return fmt.Errorf("expected float argument in percentile()") + } + return c.compileSymbol("percentile", args[0]) +} + +func (c *compiledField) compileSample(args []influxql.Expr) error { + if exp, got := 2, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for sample, expected %d, got %d", exp, got) + } + + switch arg1 := args[1].(type) { + case *influxql.IntegerLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("sample window must be greater than 1, got %d", arg1.Val) + } + default: + return fmt.Errorf("expected integer argument in sample()") + } + return c.compileSymbol("sample", args[0]) +} + +func (c *compiledField) compileDerivative(args []influxql.Expr, isNonNegative bool) error { + name := "derivative" + if isNonNegative { + name = "non_negative_derivative" + } + + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for %s, expected at least %d but no more than %d, got %d", name, min, max, got) + } + + // Retrieve the duration from the derivative() call, if specified. + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return fmt.Errorf("second argument to %s must be a duration, got %T", name, args[1]) + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to %s", name) + } + return c.compileSymbol(name, arg0) + } +} + +func (c *compiledField) compileElapsed(args []influxql.Expr) error { + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for elapsed, expected at least %d but no more than %d, got %d", min, max, got) + } + + // Retrieve the duration from the elapsed() call, if specified. + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return fmt.Errorf("second argument to elapsed must be a duration, got %T", args[1]) + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("elapsed aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to elapsed") + } + return c.compileSymbol("elapsed", arg0) + } +} + +func (c *compiledField) compileDifference(args []influxql.Expr, isNonNegative bool) error { + name := "difference" + if isNonNegative { + name = "non_negative_difference" + } + + if got := len(args); got != 1 { + return fmt.Errorf("invalid number of arguments for %s, expected 1, got %d", name, got) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to %s", name) + } + return c.compileSymbol(name, arg0) + } +} + +func (c *compiledField) compileCumulativeSum(args []influxql.Expr) error { + if got := len(args); got != 1 { + return fmt.Errorf("invalid number of arguments for cumulative_sum, expected 1, got %d", got) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("cumulative_sum aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to cumulative_sum") + } + return c.compileSymbol("cumulative_sum", arg0) + } +} + +func (c *compiledField) compileMovingAverage(args []influxql.Expr) error { + if got := len(args); got != 2 { + return fmt.Errorf("invalid number of arguments for moving_average, expected 2, got %d", got) + } + + switch arg1 := args[1].(type) { + case *influxql.IntegerLiteral: + if arg1.Val <= 1 { + return fmt.Errorf("moving_average window must be greater than 1, got %d", arg1.Val) + } + default: + return fmt.Errorf("second argument for moving_average must be an integer, got %T", args[1]) + } + c.global.OnlySelectors = false + + // Must be a variable reference, function, wildcard, or regexp. + switch arg0 := args[0].(type) { + case *influxql.Call: + if c.global.Interval.IsZero() { + return fmt.Errorf("moving_average aggregate requires a GROUP BY interval") + } + return c.compileExpr(arg0) + default: + if !c.global.Interval.IsZero() { + return fmt.Errorf("aggregate function required inside the call to moving_average") + } + return c.compileSymbol("moving_average", arg0) + } +} + +func (c *compiledField) compileIntegral(args []influxql.Expr) error { + if min, max, got := 1, 2, len(args); got > max || got < min { + return fmt.Errorf("invalid number of arguments for integral, expected at least %d but no more than %d, got %d", min, max, got) + } + + if len(args) == 2 { + switch arg1 := args[1].(type) { + case *influxql.DurationLiteral: + if arg1.Val <= 0 { + return fmt.Errorf("duration argument must be positive, got %s", influxql.FormatDuration(arg1.Val)) + } + default: + return errors.New("second argument must be a duration") + } + } + c.global.OnlySelectors = false + + // Must be a variable reference, wildcard, or regexp. + return c.compileSymbol("integral", args[0]) +} + +func (c *compiledField) compileHoltWinters(args []influxql.Expr, withFit bool) error { + name := "holt_winters" + if withFit { + name = "holt_winters_with_fit" + } + + if exp, got := 3, len(args); got != exp { + return fmt.Errorf("invalid number of arguments for %s, expected %d, got %d", name, exp, got) + } + + n, ok := args[1].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer argument as second arg in %s", name) + } else if n.Val <= 0 { + return fmt.Errorf("second arg to %s must be greater than 0, got %d", name, n.Val) + } + + s, ok := args[2].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer argument as third arg in %s", name) + } else if s.Val < 0 { + return fmt.Errorf("third arg to %s cannot be negative, got %d", name, s.Val) + } + c.global.OnlySelectors = false + + call, ok := args[0].(*influxql.Call) + if !ok { + return fmt.Errorf("must use aggregate function with %s", name) + } else if c.global.Interval.IsZero() { + return fmt.Errorf("%s aggregate requires a GROUP BY interval", name) + } + return c.compileExpr(call) +} + +func (c *compiledField) compileDistinct(args []influxql.Expr) error { + if len(args) == 0 { + return errors.New("distinct function requires at least one argument") + } else if len(args) != 1 { + return errors.New("distinct function can only have one argument") + } + + if _, ok := args[0].(*influxql.VarRef); !ok { + return errors.New("expected field argument in distinct()") + } + c.global.HasDistinct = true + c.global.OnlySelectors = false + return nil +} + +func (c *compiledField) compileTopBottom(call *influxql.Call) error { + if c.global.TopBottomFunction != "" { + return fmt.Errorf("selector function %s() cannot be combined with other functions", c.global.TopBottomFunction) + } + + if exp, got := 2, len(call.Args); got < exp { + return fmt.Errorf("invalid number of arguments for %s, expected at least %d, got %d", call.Name, exp, got) + } + + limit, ok := call.Args[len(call.Args)-1].(*influxql.IntegerLiteral) + if !ok { + return fmt.Errorf("expected integer as last argument in %s(), found %s", call.Name, call.Args[len(call.Args)-1]) + } else if limit.Val <= 0 { + return fmt.Errorf("limit (%d) in %s function must be at least 1", limit.Val, call.Name) + } else if c.global.Limit > 0 && int(limit.Val) > c.global.Limit { + return fmt.Errorf("limit (%d) in %s function can not be larger than the LIMIT (%d) in the select statement", limit.Val, call.Name, c.global.Limit) + } + + if _, ok := call.Args[0].(*influxql.VarRef); !ok { + return fmt.Errorf("expected first argument to be a field in %s(), found %s", call.Name, call.Args[0]) + } + + if len(call.Args) > 2 { + for _, v := range call.Args[1 : len(call.Args)-1] { + ref, ok := v.(*influxql.VarRef) + if !ok { + return fmt.Errorf("only fields or tags are allowed in %s(), found %s", call.Name, v) + } + + // Add a field for each of the listed dimensions when not writing the results. + if !c.global.HasTarget { + field := &compiledField{ + global: c.global, + Field: &influxql.Field{Expr: ref}, + } + c.global.Fields = append(c.global.Fields, field) + if err := field.compileExpr(ref); err != nil { + return err + } + } + } + } + c.global.TopBottomFunction = call.Name + return nil +} + +func (c *compiledStatement) compileDimensions(stmt *influxql.SelectStatement) error { + for _, d := range stmt.Dimensions { + switch expr := d.Expr.(type) { + case *influxql.VarRef: + if strings.ToLower(expr.Val) == "time" { + return errors.New("time() is a function and expects at least one argument") + } + case *influxql.Call: + // Ensure the call is time() and it has one or two duration arguments. + // If we already have a duration + if expr.Name != "time" { + return errors.New("only time() calls allowed in dimensions") + } else if got := len(expr.Args); got < 1 || got > 2 { + return errors.New("time dimension expected 1 or 2 arguments") + } else if lit, ok := expr.Args[0].(*influxql.DurationLiteral); !ok { + return errors.New("time dimension must have duration argument") + } else if c.Interval.Duration != 0 { + return errors.New("multiple time dimensions not allowed") + } else { + c.Interval.Duration = lit.Val + if len(expr.Args) == 2 { + switch lit := expr.Args[1].(type) { + case *influxql.DurationLiteral: + c.Interval.Offset = lit.Val % c.Interval.Duration + case *influxql.TimeLiteral: + c.Interval.Offset = lit.Val.Sub(lit.Val.Truncate(c.Interval.Duration)) + case *influxql.Call: + if lit.Name != "now" { + return errors.New("time dimension offset function must be now()") + } else if len(lit.Args) != 0 { + return errors.New("time dimension offset now() function requires no arguments") + } + now := c.Options.Now + c.Interval.Offset = now.Sub(now.Truncate(c.Interval.Duration)) + case *influxql.StringLiteral: + // If literal looks like a date time then parse it as a time literal. + if lit.IsTimeLiteral() { + t, err := lit.ToTimeLiteral(stmt.Location) + if err != nil { + return err + } + c.Interval.Offset = t.Val.Sub(t.Val.Truncate(c.Interval.Duration)) + } else { + return errors.New("time dimension offset must be duration or now()") + } + default: + return errors.New("time dimension offset must be duration or now()") + } + } + } + case *influxql.Wildcard: + case *influxql.RegexLiteral: + default: + return errors.New("only time and tag dimensions allowed") + } + } + return nil +} + +// validateFields validates that the fields are mutually compatible with each other. +// This runs at the end of compilation but before linking. +func (c *compiledStatement) validateFields() error { + // Validate that at least one field has been selected. + if len(c.Fields) == 0 { + return errors.New("at least 1 non-time field must be queried") + } + // Ensure there are not multiple calls if top/bottom is present. + if len(c.FunctionCalls) > 1 && c.TopBottomFunction != "" { + return fmt.Errorf("selector function %s() cannot be combined with other functions", c.TopBottomFunction) + } else if len(c.FunctionCalls) == 0 { + switch c.FillOption { + case influxql.NoFill: + return errors.New("fill(none) must be used with a function") + case influxql.LinearFill: + return errors.New("fill(linear) must be used with a function") + } + if !c.Interval.IsZero() && !c.InheritedInterval { + return errors.New("GROUP BY requires at least one aggregate function") + } + } + // If a distinct() call is present, ensure there is exactly one function. + if c.HasDistinct && (len(c.FunctionCalls) != 1 || c.HasAuxiliaryFields) { + return errors.New("aggregate function distinct() cannot be combined with other functions or fields") + } + // Validate we are using a selector or raw query if auxiliary fields are required. + if c.HasAuxiliaryFields { + if !c.OnlySelectors { + return fmt.Errorf("mixing aggregate and non-aggregate queries is not supported") + } else if len(c.FunctionCalls) > 1 { + return fmt.Errorf("mixing multiple selector functions with tags or fields is not supported") + } + } + return nil +} + +// validateDimensions validates that the dimensions are appropriate for this type of query. +func (c *compiledStatement) validateDimensions() error { + if !c.Interval.IsZero() && !c.InheritedInterval { + // There must be a lower limit that wasn't implicitly set. + if c.TimeRange.Min.UnixNano() == influxql.MinTime { + return errors.New("aggregate functions with GROUP BY time require a WHERE time clause with a lower limit") + } + } + return nil +} + +// subquery compiles and validates a compiled statement for the subquery using +// this compiledStatement as the parent. +func (c *compiledStatement) subquery(stmt *influxql.SelectStatement) error { + subquery := newCompiler(c.Options) + if err := subquery.preprocess(stmt); err != nil { + return err + } + + // Find the intersection between this time range and the parent. + // If the subquery doesn't have a time range, this causes it to + // inherit the parent's time range. + subquery.TimeRange = subquery.TimeRange.Intersect(c.TimeRange) + + // If the fill option is null, set it to none so we don't waste time on + // null values with a redundant fill iterator. + if !subquery.Interval.IsZero() && subquery.FillOption == influxql.NullFill { + subquery.FillOption = influxql.NoFill + } + + // Inherit the grouping interval if the subquery has none. + if !c.Interval.IsZero() && subquery.Interval.IsZero() { + subquery.Interval = c.Interval + subquery.InheritedInterval = true + } + return subquery.compile(stmt) +} + +func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) (PreparedStatement, error) { // Create an iterator creator based on the shards in the cluster. - shards, err := shardMapper.MapShards(c.stmt.Sources, timeRange, sopt) + shards, err := shardMapper.MapShards(c.stmt.Sources, c.TimeRange, sopt) if err != nil { return nil, err } @@ -78,6 +802,7 @@ func (c *compiledStatement) Prepare(shardMapper ShardMapper, sopt SelectOptions) if err != nil { return nil, err } + opt.StartTime, opt.EndTime = c.TimeRange.MinTime(), c.TimeRange.MaxTime() if sopt.MaxBucketsN > 0 && !stmt.IsRawQuery { interval, err := stmt.GroupByInterval() diff --git a/query/compile_test.go b/query/compile_test.go new file mode 100644 index 0000000000..a7a4a756ff --- /dev/null +++ b/query/compile_test.go @@ -0,0 +1,338 @@ +package query_test + +import ( + "testing" + + "github.com/influxdata/influxdb/influxql" + "github.com/influxdata/influxdb/query" +) + +func TestCompile_Success(t *testing.T) { + for _, tt := range []string{ + `SELECT time, value FROM cpu`, + `SELECT value FROM cpu`, + `SELECT value, host FROM cpu`, + `SELECT * FROM cpu`, + `SELECT time, * FROM cpu`, + `SELECT value, * FROM cpu`, + `SELECT max(value) FROM cpu`, + `SELECT max(value), host FROM cpu`, + `SELECT max(value), * FROM cpu`, + `SELECT max(*) FROM cpu`, + `SELECT max(/val/) FROM cpu`, + `SELECT min(value) FROM cpu`, + `SELECT min(value), host FROM cpu`, + `SELECT min(value), * FROM cpu`, + `SELECT min(*) FROM cpu`, + `SELECT min(/val/) FROM cpu`, + `SELECT first(value) FROM cpu`, + `SELECT first(value), host FROM cpu`, + `SELECT first(value), * FROM cpu`, + `SELECT first(*) FROM cpu`, + `SELECT first(/val/) FROM cpu`, + `SELECT last(value) FROM cpu`, + `SELECT last(value), host FROM cpu`, + `SELECT last(value), * FROM cpu`, + `SELECT last(*) FROM cpu`, + `SELECT last(/val/) FROM cpu`, + `SELECT count(value) FROM cpu`, + `SELECT count(distinct(value)) FROM cpu`, + `SELECT count(distinct value) FROM cpu`, + `SELECT count(*) FROM cpu`, + `SELECT count(/val/) FROM cpu`, + `SELECT mean(value) FROM cpu`, + `SELECT mean(*) FROM cpu`, + `SELECT mean(/val/) FROM cpu`, + `SELECT min(value), max(value) FROM cpu`, + `SELECT min(*), max(*) FROM cpu`, + `SELECT min(/val/), max(/val/) FROM cpu`, + `SELECT first(value), last(value) FROM cpu`, + `SELECT first(*), last(*) FROM cpu`, + `SELECT first(/val/), last(/val/) FROM cpu`, + `SELECT count(value) FROM cpu WHERE time >= now() - 1h GROUP BY time(10m)`, + `SELECT distinct value FROM cpu`, + `SELECT distinct(value) FROM cpu`, + `SELECT value / total FROM cpu`, + `SELECT min(value) / total FROM cpu`, + `SELECT max(value) / total FROM cpu`, + `SELECT top(value, 1) FROM cpu`, + `SELECT top(value, host, 1) FROM cpu`, + `SELECT top(value, 1), host FROM cpu`, + `SELECT min(top) FROM (SELECT top(value, host, 1) FROM cpu) GROUP BY region`, + `SELECT bottom(value, 1) FROM cpu`, + `SELECT bottom(value, host, 1) FROM cpu`, + `SELECT bottom(value, 1), host FROM cpu`, + `SELECT max(bottom) FROM (SELECT bottom(value, host, 1) FROM cpu) GROUP BY region`, + `SELECT percentile(value, 75) FROM cpu`, + `SELECT percentile(value, 75.0) FROM cpu`, + `SELECT sample(value, 2) FROM cpu`, + `SELECT sample(*, 2) FROM cpu`, + `SELECT sample(/val/, 2) FROM cpu`, + `SELECT elapsed(value) FROM cpu`, + `SELECT elapsed(value, 10s) FROM cpu`, + `SELECT integral(value) FROM cpu`, + `SELECT integral(value, 10s) FROM cpu`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, 5s)`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, '2000-01-01T00:00:05Z')`, + `SELECT max(value) FROM cpu WHERE time >= now() - 1m GROUP BY time(10s, now())`, + `SELECT max(mean) FROM (SELECT mean(value) FROM cpu GROUP BY host)`, + `SELECT max(derivative) FROM (SELECT derivative(mean(value)) FROM cpu) WHERE time >= now() - 1m GROUP BY time(10s)`, + `SELECT max(value) FROM (SELECT value + total FROM cpu) WHERE time >= now() - 1m GROUP BY time(10s)`, + `SELECT value FROM cpu WHERE time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T01:00:00Z'`, + } { + t.Run(tt, func(t *testing.T) { + stmt, err := influxql.ParseStatement(tt) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + s := stmt.(*influxql.SelectStatement) + + opt := query.CompileOptions{} + if _, err := query.Compile(s, opt); err != nil { + t.Errorf("unexpected error: %s", err) + } + }) + } +} + +func TestCompile_Failures(t *testing.T) { + for _, tt := range []struct { + s string + err string + }{ + {s: `SELECT time FROM cpu`, err: `at least 1 non-time field must be queried`}, + {s: `SELECT value, mean(value) FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT value, max(value), min(value) FROM cpu`, err: `mixing multiple selector functions with tags or fields is not supported`}, + {s: `SELECT top(value, 10), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 10), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT count() FROM cpu`, err: `invalid number of arguments for count, expected 1, got 0`}, + {s: `SELECT count(value, host) FROM cpu`, err: `invalid number of arguments for count, expected 1, got 2`}, + {s: `SELECT min() FROM cpu`, err: `invalid number of arguments for min, expected 1, got 0`}, + {s: `SELECT min(value, host) FROM cpu`, err: `invalid number of arguments for min, expected 1, got 2`}, + {s: `SELECT max() FROM cpu`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT max(value, host) FROM cpu`, err: `invalid number of arguments for max, expected 1, got 2`}, + {s: `SELECT sum() FROM cpu`, err: `invalid number of arguments for sum, expected 1, got 0`}, + {s: `SELECT sum(value, host) FROM cpu`, err: `invalid number of arguments for sum, expected 1, got 2`}, + {s: `SELECT first() FROM cpu`, err: `invalid number of arguments for first, expected 1, got 0`}, + {s: `SELECT first(value, host) FROM cpu`, err: `invalid number of arguments for first, expected 1, got 2`}, + {s: `SELECT last() FROM cpu`, err: `invalid number of arguments for last, expected 1, got 0`}, + {s: `SELECT last(value, host) FROM cpu`, err: `invalid number of arguments for last, expected 1, got 2`}, + {s: `SELECT mean() FROM cpu`, err: `invalid number of arguments for mean, expected 1, got 0`}, + {s: `SELECT mean(value, host) FROM cpu`, err: `invalid number of arguments for mean, expected 1, got 2`}, + {s: `SELECT distinct(value), max(value) FROM cpu`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct(value)), max(value) FROM cpu`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct()) FROM cpu`, err: `distinct function requires at least one argument`}, + {s: `SELECT count(distinct(value, host)) FROM cpu`, err: `distinct function can only have one argument`}, + {s: `SELECT count(distinct(2)) FROM cpu`, err: `expected field argument in distinct()`}, + {s: `SELECT value FROM cpu GROUP BY now()`, err: `only time() calls allowed in dimensions`}, + {s: `SELECT value FROM cpu GROUP BY time()`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, 30s, 1ms)`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT value FROM cpu GROUP BY time('unexpected')`, err: `time dimension must have duration argument`}, + {s: `SELECT value FROM cpu GROUP BY time(5m), time(1m)`, err: `multiple time dimensions not allowed`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, unexpected())`, err: `time dimension offset function must be now()`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, now(1m))`, err: `time dimension offset now() function requires no arguments`}, + {s: `SELECT value FROM cpu GROUP BY time(5m, 'unexpected')`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT value FROM cpu GROUP BY 'unexpected'`, err: `only time and tag dimensions allowed`}, + {s: `SELECT top(value) FROM cpu`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT top('unexpected', 5) FROM cpu`, err: `expected first argument to be a field in top(), found 'unexpected'`}, + {s: `SELECT top(value, 'unexpected', 5) FROM cpu`, err: `only fields or tags are allowed in top(), found 'unexpected'`}, + {s: `SELECT top(value, 2.5) FROM cpu`, err: `expected integer as last argument in top(), found 2.500`}, + {s: `SELECT top(value, -1) FROM cpu`, err: `limit (-1) in top function must be at least 1`}, + {s: `SELECT top(value, 3) FROM cpu LIMIT 2`, err: `limit (3) in top function can not be larger than the LIMIT (2) in the select statement`}, + {s: `SELECT bottom(value) FROM cpu`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT bottom('unexpected', 5) FROM cpu`, err: `expected first argument to be a field in bottom(), found 'unexpected'`}, + {s: `SELECT bottom(value, 'unexpected', 5) FROM cpu`, err: `only fields or tags are allowed in bottom(), found 'unexpected'`}, + {s: `SELECT bottom(value, 2.5) FROM cpu`, err: `expected integer as last argument in bottom(), found 2.500`}, + {s: `SELECT bottom(value, -1) FROM cpu`, err: `limit (-1) in bottom function must be at least 1`}, + {s: `SELECT bottom(value, 3) FROM cpu LIMIT 2`, err: `limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement`}, + {s: `SELECT value FROM cpu WHERE time >= now() - 10m OR time < now() - 5m`, err: `cannot use OR with time conditions`}, + {s: `SELECT value FROM cpu WHERE value`, err: `invalid condition expression: value`}, + {s: `SELECT count(value), * FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT max(*), host FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT count(value), /ho/ FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT max(/val/), * FROM cpu`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT a(value) FROM cpu`, err: `undefined function a()`}, + {s: `SELECT count(max(value)) FROM myseries`, err: `expected field argument in count()`}, + {s: `SELECT count(distinct('value')) FROM myseries`, err: `expected field argument in distinct()`}, + {s: `SELECT distinct('value') FROM myseries`, err: `expected field argument in distinct()`}, + {s: `SELECT min(max(value)) FROM myseries`, err: `expected field argument in min()`}, + {s: `SELECT min(distinct(value)) FROM myseries`, err: `expected field argument in min()`}, + {s: `SELECT max(max(value)) FROM myseries`, err: `expected field argument in max()`}, + {s: `SELECT sum(max(value)) FROM myseries`, err: `expected field argument in sum()`}, + {s: `SELECT first(max(value)) FROM myseries`, err: `expected field argument in first()`}, + {s: `SELECT last(max(value)) FROM myseries`, err: `expected field argument in last()`}, + {s: `SELECT mean(max(value)) FROM myseries`, err: `expected field argument in mean()`}, + {s: `SELECT median(max(value)) FROM myseries`, err: `expected field argument in median()`}, + {s: `SELECT mode(max(value)) FROM myseries`, err: `expected field argument in mode()`}, + {s: `SELECT stddev(max(value)) FROM myseries`, err: `expected field argument in stddev()`}, + {s: `SELECT spread(max(value)) FROM myseries`, err: `expected field argument in spread()`}, + {s: `SELECT top() FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 0`}, + {s: `SELECT top(field1) FROM myseries`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT top(field1,foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, + {s: `SELECT top(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in top(), found foo`}, + {s: `SELECT top(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found 5`}, + {s: `SELECT top(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in top(), found max(foo)`}, + {s: `SELECT top(value, 10) + count(value) FROM myseries`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT top(max(value), 10) FROM myseries`, err: `expected first argument to be a field in top(), found max(value)`}, + {s: `SELECT bottom() FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 0`}, + {s: `SELECT bottom(field1) FROM myseries`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT bottom(field1,foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, + {s: `SELECT bottom(field1,host,'server',foo) FROM myseries`, err: `expected integer as last argument in bottom(), found foo`}, + {s: `SELECT bottom(field1,5,'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found 5`}, + {s: `SELECT bottom(field1,max(foo),'server',2) FROM myseries`, err: `only fields or tags are allowed in bottom(), found max(foo)`}, + {s: `SELECT bottom(value, 10) + count(value) FROM myseries`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT bottom(max(value), 10) FROM myseries`, err: `expected first argument to be a field in bottom(), found max(value)`}, + {s: `SELECT top(value, 10), bottom(value, 10) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 10), top(value, 10) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT sample(value) FROM myseries`, err: `invalid number of arguments for sample, expected 2, got 1`}, + {s: `SELECT sample(value, 2, 3) FROM myseries`, err: `invalid number of arguments for sample, expected 2, got 3`}, + {s: `SELECT sample(value, 0) FROM myseries`, err: `sample window must be greater than 1, got 0`}, + {s: `SELECT sample(value, 2.5) FROM myseries`, err: `expected integer argument in sample()`}, + {s: `SELECT percentile() FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 0`}, + {s: `SELECT percentile(field1) FROM myseries`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT percentile(field1, foo) FROM myseries`, err: `expected float argument in percentile()`}, + {s: `SELECT percentile(max(field1), 75) FROM myseries`, err: `expected field argument in percentile()`}, + {s: `SELECT field1 FROM foo group by time(1s)`, err: `GROUP BY requires at least one aggregate function`}, + {s: `SELECT field1 FROM foo fill(none)`, err: `fill(none) must be used with a function`}, + {s: `SELECT field1 FROM foo fill(linear)`, err: `fill(linear) must be used with a function`}, + {s: `SELECT count(value), value FROM foo`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT count(value) FROM foo group by time(1s)`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(500ms)`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(1s) where host = 'hosta.influxdb.org'`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time(1s) where time < now()`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT count(value) FROM foo group by time`, err: `time() is a function and expects at least one argument`}, + {s: `SELECT count(value) FROM foo group by 'time'`, err: `only time and tag dimensions allowed`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time()`, err: `time dimension expected 1 or 2 arguments`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(b)`, err: `time dimension must have duration argument`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s), time(2s)`, err: `multiple time dimensions not allowed`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, b)`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT count(value) FROM foo where time > now() and time < now() group by time(1s, '5s')`, err: `time dimension offset must be duration or now()`}, + {s: `SELECT distinct(field1), sum(field1) FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT distinct(field1), field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT distinct(field1, field2) FROM myseries`, err: `distinct function can only have one argument`}, + {s: `SELECT distinct() FROM myseries`, err: `distinct function requires at least one argument`}, + {s: `SELECT distinct field1, field2 FROM myseries`, err: `aggregate function distinct() cannot be combined with other functions or fields`}, + {s: `SELECT count(distinct field1, field2) FROM myseries`, err: `invalid number of arguments for count, expected 1, got 2`}, + {s: `select count(distinct(too, many, arguments)) from myseries`, err: `distinct function can only have one argument`}, + {s: `select count() from myseries`, err: `invalid number of arguments for count, expected 1, got 0`}, + {s: `SELECT derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `select derivative() from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 0`}, + {s: `select derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for derivative, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to derivative`}, + {s: `SELECT derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT derivative(value, -2h) FROM myseries`, err: `duration argument must be positive, got -2h`}, + {s: `SELECT derivative(value, 10) FROM myseries`, err: `second argument to derivative must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT derivative(f, true) FROM myseries`, err: `second argument to derivative must be a duration, got *influxql.BooleanLiteral`}, + {s: `SELECT non_negative_derivative(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `select non_negative_derivative() from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 0`}, + {s: `select non_negative_derivative(mean(value), 1h, 3) from myseries`, err: `invalid number of arguments for non_negative_derivative, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT non_negative_derivative(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_derivative`}, + {s: `SELECT non_negative_derivative(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT non_negative_derivative(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT non_negative_derivative(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT non_negative_derivative(mean(value), 1h) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT non_negative_derivative(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT non_negative_derivative(value, -2h) FROM myseries`, err: `duration argument must be positive, got -2h`}, + {s: `SELECT non_negative_derivative(value, 10) FROM myseries`, err: `second argument to non_negative_derivative must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT difference(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT difference() from myseries`, err: `invalid number of arguments for difference, expected 1, got 0`}, + {s: `SELECT difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to difference`}, + {s: `SELECT difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `difference aggregate requires a GROUP BY interval`}, + {s: `SELECT non_negative_difference(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT non_negative_difference() from myseries`, err: `invalid number of arguments for non_negative_difference, expected 1, got 0`}, + {s: `SELECT non_negative_difference(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to non_negative_difference`}, + {s: `SELECT non_negative_difference(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT non_negative_difference(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT non_negative_difference(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT non_negative_difference(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT non_negative_difference(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `non_negative_difference aggregate requires a GROUP BY interval`}, + {s: `SELECT elapsed() FROM myseries`, err: `invalid number of arguments for elapsed, expected at least 1 but no more than 2, got 0`}, + {s: `SELECT elapsed(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to elapsed`}, + {s: `SELECT elapsed(value, 1s, host) FROM myseries`, err: `invalid number of arguments for elapsed, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT elapsed(value, 0s) FROM myseries`, err: `duration argument must be positive, got 0s`}, + {s: `SELECT elapsed(value, -10s) FROM myseries`, err: `duration argument must be positive, got -10s`}, + {s: `SELECT elapsed(value, 10) FROM myseries`, err: `second argument to elapsed must be a duration, got *influxql.IntegerLiteral`}, + {s: `SELECT elapsed(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT elapsed(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT elapsed(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT elapsed(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT elapsed(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `elapsed aggregate requires a GROUP BY interval`}, + {s: `SELECT moving_average(field1, 2), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT moving_average(field1, 1), field1 FROM myseries`, err: `moving_average window must be greater than 1, got 1`}, + {s: `SELECT moving_average(field1, 0), field1 FROM myseries`, err: `moving_average window must be greater than 1, got 0`}, + {s: `SELECT moving_average(field1, -1), field1 FROM myseries`, err: `moving_average window must be greater than 1, got -1`}, + {s: `SELECT moving_average(field1, 2.0), field1 FROM myseries`, err: `second argument for moving_average must be an integer, got *influxql.NumberLiteral`}, + {s: `SELECT moving_average() from myseries`, err: `invalid number of arguments for moving_average, expected 2, got 0`}, + {s: `SELECT moving_average(value) FROM myseries`, err: `invalid number of arguments for moving_average, expected 2, got 1`}, + {s: `SELECT moving_average(value, 2) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to moving_average`}, + {s: `SELECT moving_average(top(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT moving_average(bottom(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT moving_average(max(), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT moving_average(percentile(value), 2) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT moving_average(mean(value), 2) FROM myseries where time < now() and time > now() - 1d`, err: `moving_average aggregate requires a GROUP BY interval`}, + {s: `SELECT cumulative_sum(field1), field1 FROM myseries`, err: `mixing aggregate and non-aggregate queries is not supported`}, + {s: `SELECT cumulative_sum() from myseries`, err: `invalid number of arguments for cumulative_sum, expected 1, got 0`}, + {s: `SELECT cumulative_sum(value) FROM myseries group by time(1h)`, err: `aggregate function required inside the call to cumulative_sum`}, + {s: `SELECT cumulative_sum(top(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for top, expected at least 2, got 1`}, + {s: `SELECT cumulative_sum(bottom(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for bottom, expected at least 2, got 1`}, + {s: `SELECT cumulative_sum(max()) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for max, expected 1, got 0`}, + {s: `SELECT cumulative_sum(percentile(value)) FROM myseries where time < now() and time > now() - 1d group by time(1h)`, err: `invalid number of arguments for percentile, expected 2, got 1`}, + {s: `SELECT cumulative_sum(mean(value)) FROM myseries where time < now() and time > now() - 1d`, err: `cumulative_sum aggregate requires a GROUP BY interval`}, + {s: `SELECT integral() FROM myseries`, err: `invalid number of arguments for integral, expected at least 1 but no more than 2, got 0`}, + {s: `SELECT integral(value, 10s, host) FROM myseries`, err: `invalid number of arguments for integral, expected at least 1 but no more than 2, got 3`}, + {s: `SELECT integral(value, -10s) FROM myseries`, err: `duration argument must be positive, got -10s`}, + {s: `SELECT integral(value, 10) FROM myseries`, err: `second argument must be a duration`}, + {s: `SELECT holt_winters(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters, expected 3, got 1`}, + {s: `SELECT holt_winters(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters aggregate requires a GROUP BY interval`}, + {s: `SELECT holt_winters(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters must be greater than 0, got 0`}, + {s: `SELECT holt_winters(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters`}, + {s: `SELECT holt_winters(min(value), 10, -1) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `third arg to holt_winters cannot be negative, got -1`}, + {s: `SELECT holt_winters_with_fit(value) FROM myseries where time < now() and time > now() - 1d`, err: `invalid number of arguments for holt_winters_with_fit, expected 3, got 1`}, + {s: `SELECT holt_winters_with_fit(value, 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `must use aggregate function with holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, 2) FROM myseries where time < now() and time > now() - 1d`, err: `holt_winters_with_fit aggregate requires a GROUP BY interval`}, + {s: `SELECT holt_winters_with_fit(min(value), 0, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `second arg to holt_winters_with_fit must be greater than 0, got 0`}, + {s: `SELECT holt_winters_with_fit(min(value), false, 2) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as second arg in holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, 'string') FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `expected integer argument as third arg in holt_winters_with_fit`}, + {s: `SELECT holt_winters_with_fit(min(value), 10, -1) FROM myseries where time < now() and time > now() - 1d GROUP BY time(1d)`, err: `third arg to holt_winters_with_fit cannot be negative, got -1`}, + {s: `SELECT mean(value) + value FROM cpu WHERE time < now() and time > now() - 1h GROUP BY time(10m)`, err: `mixing aggregate and non-aggregate queries is not supported`}, + // TODO: Remove this restriction in the future: https://github.com/influxdata/influxdb/issues/5968 + {s: `SELECT mean(cpu_total - cpu_idle) FROM cpu`, err: `expected field argument in mean()`}, + {s: `SELECT derivative(mean(cpu_total - cpu_idle), 1s) FROM cpu WHERE time < now() AND time > now() - 1d GROUP BY time(1h)`, err: `expected field argument in mean()`}, + // TODO: The error message will change when math is allowed inside an aggregate: https://github.com/influxdata/influxdb/pull/5990#issuecomment-195565870 + {s: `SELECT count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT (count(foo + sum(bar))) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT sum(value) + count(foo + sum(bar)) FROM cpu`, err: `expected field argument in count()`}, + {s: `SELECT sum(mean) FROM (SELECT mean(value) FROM cpu GROUP BY time(1h))`, err: `aggregate functions with GROUP BY time require a WHERE time clause with a lower limit`}, + {s: `SELECT top(value, 2), max(value) FROM cpu`, err: `selector function top() cannot be combined with other functions`}, + {s: `SELECT bottom(value, 2), max(value) FROM cpu`, err: `selector function bottom() cannot be combined with other functions`}, + {s: `SELECT min(derivative) FROM (SELECT derivative(mean(value), 1h) FROM myseries) where time < now() and time > now() - 1d`, err: `derivative aggregate requires a GROUP BY interval`}, + {s: `SELECT min(mean) FROM (SELECT mean(value) FROM myseries GROUP BY time)`, err: `time() is a function and expects at least one argument`}, + {s: `SELECT value FROM myseries WHERE value OR time >= now() - 1m`, err: `invalid condition expression: value`}, + {s: `SELECT value FROM myseries WHERE time >= now() - 1m OR value`, err: `invalid condition expression: value`}, + } { + t.Run(tt.s, func(t *testing.T) { + stmt, err := influxql.ParseStatement(tt.s) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + s := stmt.(*influxql.SelectStatement) + + opt := query.CompileOptions{} + if _, err := query.Compile(s, opt); err == nil { + t.Error("expected error") + } else if have, want := err.Error(), tt.err; have != want { + t.Errorf("unexpected error: %s != %s", have, want) + } + }) + } +} diff --git a/query/select_test.go b/query/select_test.go index fdddeef088..da08f800ed 100644 --- a/query/select_test.go +++ b/query/select_test.go @@ -2766,49 +2766,6 @@ func TestSelect_BinaryExpr_NilValues(t *testing.T) { } } -func TestSelect_InvalidQueries(t *testing.T) { - shardMapper := ShardMapper{ - MapShardsFn: func(sources influxql.Sources, _ influxql.TimeRange) query.ShardGroup { - return &ShardGroup{ - CreateIteratorFn: func(m *influxql.Measurement, opt query.IteratorOptions) (query.Iterator, error) { - return &FloatIterator{}, nil - }, - } - }, - } - - tests := []struct { - name string - q string - err string - }{ - { - name: "UnsupportedCall", - q: `SELECT foobar(value) FROM cpu`, - err: `unsupported call: foobar`, - }, - { - name: "InvalidStringExpression", - q: `SELECT 'value' FROM cpu`, - err: `invalid expression type: *influxql.StringLiteral`, - }, - { - name: "InvalidStringExpressionWithValidExpression", - q: `SELECT 'value', value FROM cpu`, - err: `invalid expression type: *influxql.StringLiteral`, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - _, _, err := query.Select(MustParseSelectStatement(tt.q), &shardMapper, query.SelectOptions{}) - if err == nil || err.Error() != tt.err { - t.Errorf("expected error '%s', got '%s'", tt.err, err) - } - }) - } -} - type ShardMapper struct { MapShardsFn func(sources influxql.Sources, t influxql.TimeRange) query.ShardGroup } diff --git a/tests/server_test.go b/tests/server_test.go index 42116669cb..39c472c647 100644 --- a/tests/server_test.go +++ b/tests/server_test.go @@ -1097,7 +1097,7 @@ func TestServer_Query_Count(t *testing.T) { &Query{ name: "selecting count(2) should error", command: `SELECT count(2) FROM db0.rp0.cpu`, - exp: `{"error":"error parsing query: expected field argument in count()"}`, + exp: `{"results":[{"statement_id":0,"error":"expected field argument in count()"}]}`, }, }...) @@ -4362,13 +4362,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "count - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, count(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","count"],"values":[["2000-01-01T00:00:00Z",3],["2000-01-01T00:00:30Z",3],["2000-01-01T00:01:00Z",3]]}]}]}`, }, &Query{ name: "count - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, count(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "distinct - baseline 30s", @@ -4380,13 +4380,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "distinct - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, distinct(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: aggregate function distinct() cannot be combined with other functions or fields"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","distinct"],"values":[["2000-01-01T00:00:00Z",10],["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",70],["2000-01-01T00:01:00Z",90],["2000-01-01T00:01:00Z",5]]}]}]}`, }, &Query{ name: "distinct - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, distinct(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: aggregate function distinct() cannot be combined with other functions or fields"}`, + exp: `{"results":[{"statement_id":0,"error":"aggregate function distinct() cannot be combined with other functions or fields"}]}`, }, &Query{ name: "mean - baseline 30s", @@ -4398,13 +4398,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "mean - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, mean(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","mean"],"values":[["2000-01-01T00:00:00Z",30],["2000-01-01T00:00:30Z",46.666666666666664],["2000-01-01T00:01:00Z",55]]}]}]}`, }, &Query{ name: "mean - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, mean(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "median - baseline 30s", @@ -4416,13 +4416,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "median - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, median(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","median"],"values":[["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",70]]}]}]}`, }, &Query{ name: "median - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, median(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "mode - baseline 30s", @@ -4434,31 +4434,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "mode - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, - &Query{ - name: "mode - tx", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT tx, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, - &Query{ - name: "mode - baseline 30s", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","mode"],"values":[["2000-01-01T00:00:00Z",40],["2000-01-01T00:00:30Z",50],["2000-01-01T00:01:00Z",5]]}]}]}`, }, - &Query{ - name: "mode - time", - params: url.Values{"db": []string{"db0"}}, - command: `SELECT time, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, - }, &Query{ name: "mode - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, mode(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "spread - baseline 30s", @@ -4470,13 +4452,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "spread - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, spread(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","spread"],"values":[["2000-01-01T00:00:00Z",30],["2000-01-01T00:00:30Z",10],["2000-01-01T00:01:00Z",85]]}]}]}`, }, &Query{ name: "spread - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, spread(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "stddev - baseline 30s", @@ -4488,13 +4470,13 @@ func TestServer_Query_AggregateSelectors(t *testing.T) { name: "stddev - time", params: url.Values{"db": []string{"db0"}}, command: `SELECT time, stddev(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"series":[{"name":"network","columns":["time","stddev"],"values":[["2000-01-01T00:00:00Z",17.320508075688775],["2000-01-01T00:00:30Z",5.773502691896258],["2000-01-01T00:01:00Z",44.44097208657794]]}]}]}`, }, &Query{ name: "stddev - tx", params: url.Values{"db": []string{"db0"}}, command: `SELECT tx, stddev(rx) FROM network where time >= '2000-01-01T00:00:00Z' AND time <= '2000-01-01T00:01:29Z' group by time(30s)`, - exp: `{"error":"error parsing query: mixing aggregate and non-aggregate queries is not supported"}`, + exp: `{"results":[{"statement_id":0,"error":"mixing aggregate and non-aggregate queries is not supported"}]}`, }, &Query{ name: "percentile - baseline 30s", @@ -4766,13 +4748,13 @@ func TestServer_Query_TopBottomInt(t *testing.T) { name: "top - cpu - 3 values with limit 2", params: url.Values{"db": []string{"db0"}}, command: `SELECT TOP(value, 3) FROM cpu limit 2`, - exp: `{"error":"error parsing query: limit (3) in top function can not be larger than the LIMIT (2) in the select statement"}`, + exp: `{"results":[{"statement_id":0,"error":"limit (3) in top function can not be larger than the LIMIT (2) in the select statement"}]}`, }, &Query{ name: "bottom - cpu - 3 values with limit 2", params: url.Values{"db": []string{"db0"}}, command: `SELECT BOTTOM(value, 3) FROM cpu limit 2`, - exp: `{"error":"error parsing query: limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement"}`, + exp: `{"results":[{"statement_id":0,"error":"limit (3) in bottom function can not be larger than the LIMIT (2) in the select statement"}]}`, }, &Query{ name: "top - cpu - hourly",