From 08ef689d2196c79cdd94680f51460929c2f406be Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Thu, 23 Mar 2023 12:13:15 +1100 Subject: [PATCH] feat: Teach InfluxQL how to plan an aggregate query (#7230) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Display failed query Allows a user to immediately identify the failed query. * feat: API improvements to InfluxQL parser * feat: Extend `SchemaProvider` trait to query for UDFs * fix: We don't want the parser to panic on overflows * fix: ensure `map_type` maps the timestamp data type * feat: API to map a InfluxQL duration expression to a DataFusion interval * chore: Copied APIs from DataFusion SQL planner These APIs are private but useful for InfluxQL planning. * feat: Initial aggregate query support * feat: Add an API to fetch a field by name * chore: Fixes to handling NULLs in aggregates * chore: Add ability to test expected failures for InfluxQL * chore: appease rustfmt and clippy 😬 * chore: produce same error as InfluxQL * chore: appease clippy * chore: Improve docs * chore: Simplify aggregate and raw planning * feat: Add support for GROUP BY TIME(stride, offset) * chore: Update docs * chore: remove redundant `is_empty` check Co-authored-by: Christopher M. Wolff * chore: PR feedback to clarify purpose of function * chore: The series_sort can't be empty, as `time` is always added This was originally intended as an optimisation when executing an aggregate query that did not group by time or tags, as it will produce N rows, where N is the number of measurements queried. * chore: update comment for clarity --------- Co-authored-by: Christopher M. Wolff --- influxdb_influxql_parser/src/literal.rs | 31 +- influxdb_influxql_parser/src/select.rs | 83 +- ...rser__visit__test__select_statement-6.snap | 18 +- ...__visit_mut__test__select_statement-6.snap | 18 +- influxdb_influxql_parser/src/visit.rs | 57 +- influxdb_influxql_parser/src/visit_mut.rs | 60 +- .../query_tests2/cases/in/issue_6112.influxql | 49 + .../cases/in/issue_6112.influxql.expected | 153 +- iox_query_influxql/src/frontend/planner.rs | 18 +- iox_query_influxql/src/plan/field_mapper.rs | 14 +- iox_query_influxql/src/plan/mod.rs | 1 + iox_query_influxql/src/plan/planner.rs | 1425 ++++++++++------- iox_query_influxql/src/plan/planner/select.rs | 141 ++ .../src/plan/planner_time_range_expression.rs | 73 + iox_query_influxql/src/plan/rewriter.rs | 8 +- iox_query_influxql/src/plan/test_utils.rs | 10 +- iox_query_influxql/src/plan/util_copy.rs | 337 ++++ schema/src/lib.rs | 6 + test_helpers_end_to_end/src/client.rs | 7 +- .../src/snapshot_comparison.rs | 15 +- 20 files changed, 1903 insertions(+), 621 deletions(-) create mode 100644 iox_query_influxql/src/plan/planner/select.rs create mode 100644 iox_query_influxql/src/plan/util_copy.rs diff --git a/influxdb_influxql_parser/src/literal.rs b/influxdb_influxql_parser/src/literal.rs index 24f91a5e94..4de0305251 100644 --- a/influxdb_influxql_parser/src/literal.rs +++ b/influxdb_influxql_parser/src/literal.rs @@ -289,7 +289,8 @@ impl Display for Duration { fn single_duration(i: &str) -> ParseResult<&str, i64> { use DurationUnit::*; - map( + map_fail( + "overflow", pair( integer, alt(( @@ -304,15 +305,18 @@ fn single_duration(i: &str) -> ParseResult<&str, i64> { value(Week, tag("w")), // weeks )), ), - |(v, unit)| match unit { - Nanosecond => v, - Microsecond => v * NANOS_PER_MICRO, - Millisecond => v * NANOS_PER_MILLI, - Second => v * NANOS_PER_SEC, - Minute => v * NANOS_PER_MIN, - Hour => v * NANOS_PER_HOUR, - Day => v * NANOS_PER_DAY, - Week => v * NANOS_PER_WEEK, + |(v, unit)| { + (match unit { + Nanosecond => Some(v), + Microsecond => v.checked_mul(NANOS_PER_MICRO), + Millisecond => v.checked_mul(NANOS_PER_MILLI), + Second => v.checked_mul(NANOS_PER_SEC), + Minute => v.checked_mul(NANOS_PER_MIN), + Hour => v.checked_mul(NANOS_PER_HOUR), + Day => v.checked_mul(NANOS_PER_DAY), + Week => v.checked_mul(NANOS_PER_WEEK), + }) + .ok_or("integer overflow") }, )(i) } @@ -407,6 +411,8 @@ mod test { // Fallible cases integer("hello").unwrap_err(); + + integer("9223372036854775808").expect_err("expected overflow"); } #[test] @@ -487,6 +493,11 @@ mod test { let (_, got) = single_duration("5w").unwrap(); assert_eq!(got, 5 * NANOS_PER_WEEK); + + // Fallible + + // Handle overflow + single_duration("16000w").expect_err("expected overflow"); } #[test] diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs index 57e51fc769..c47861798e 100644 --- a/influxdb_influxql_parser/src/select.rs +++ b/influxdb_influxql_parser/src/select.rs @@ -70,6 +70,15 @@ pub struct SelectStatement { pub timezone: Option, } +impl SelectStatement { + /// Return the `FILL` behaviour for the `SELECT` statement. + /// + /// The default when no `FILL` clause present is `FILL(null)`. + pub fn fill(&self) -> FillClause { + self.fill.unwrap_or_default() + } +} + impl Display for SelectStatement { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "SELECT {} {}", self.fields, self.from)?; @@ -242,6 +251,24 @@ impl Display for GroupByClause { } } +impl GroupByClause { + /// Returns the time dimension for the `GROUP BY` clause. + pub fn time_dimension(&self) -> Option<&TimeDimension> { + self.contents.iter().find_map(|dim| match dim { + Dimension::Time(t) => Some(t), + _ => None, + }) + } + + /// Returns an iterator of all the tag dimensions for the `GROUP BY` clause. + pub fn tags(&self) -> impl Iterator + '_ { + self.contents.iter().filter_map(|dim| match dim { + Dimension::Tag(i) => Some(i), + _ => None, + }) + } +} + /// Used to parse the interval argument of the TIME function struct TimeCallIntervalArgument; @@ -290,16 +317,30 @@ impl ArithmeticParsers for TimeCallOffsetArgument { } } +/// Represents a `TIME` dimension in a `GROUP BY` clause. +#[derive(Clone, Debug, PartialEq)] +pub struct TimeDimension { + /// The first argument of the `TIME` call. + pub interval: Expr, + /// An optional second argument to specify the offset applied to the `TIME` call. + pub offset: Option, +} + +impl Display for TimeDimension { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "TIME({}", self.interval)?; + if let Some(offset) = &self.offset { + write!(f, ", {offset}")?; + } + write!(f, ")") + } +} + /// Represents a dimension of a `GROUP BY` clause. #[derive(Clone, Debug, PartialEq)] pub enum Dimension { /// Represents a `TIME` call in a `GROUP BY` clause. - Time { - /// The first argument of the `TIME` call. - interval: Expr, - /// An optional second argument to specify the offset applied to the `TIME` call. - offset: Option, - }, + Time(TimeDimension), /// Represents a literal tag reference in a `GROUP BY` clause. Tag(Identifier), @@ -314,11 +355,7 @@ pub enum Dimension { impl Display for Dimension { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - Self::Time { - interval, - offset: Some(offset), - } => write!(f, "TIME({interval}, {offset})"), - Self::Time { interval, .. } => write!(f, "TIME({interval})"), + Self::Time(v) => Display::fmt(v, f), Self::Tag(v) => Display::fmt(v, f), Self::Regex(v) => Display::fmt(v, f), Self::Wildcard => f.write_char('*'), @@ -366,7 +403,7 @@ fn time_call_expression(i: &str) -> ParseResult<&str, Dimension> { expect("invalid TIME call, expected ')'", preceded(ws0, char(')'))), ), ), - |(interval, offset)| Dimension::Time { interval, offset }, + |(interval, offset)| Dimension::Time(TimeDimension { interval, offset }), )(i) } @@ -390,9 +427,12 @@ fn group_by_clause(i: &str) -> ParseResult<&str, GroupByClause> { } /// Represents a `FILL` clause, and specifies all possible cases of the argument to the `FILL` clause. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Default, Clone, Copy, PartialEq)] pub enum FillClause { /// Empty aggregate windows will contain null values and is specified as `fill(null)` + /// + /// This is the default behavior of a `SELECT` statement, when the `FILL` clause is omitted. + #[default] Null, /// Empty aggregate windows will be discarded and is specified as `fill(none)`. @@ -704,6 +744,8 @@ mod test { fn test_select_statement() { let (_, got) = select_statement("SELECT value FROM foo").unwrap(); assert_eq!(got.to_string(), "SELECT value FROM foo"); + // Assert default behaviour when `FILL` is omitted + assert_eq!(got.fill(), FillClause::Null); let (_, got) = select_statement(r#"SELECT f1,/f2/, f3 AS "a field" FROM foo WHERE host =~ /c1/"#) @@ -740,6 +782,7 @@ mod test { got.to_string(), r#"SELECT sum(value) FROM foo GROUP BY TIME(5m), host FILL(PREVIOUS)"# ); + assert_eq!(got.fill(), FillClause::Previous); let (_, got) = select_statement("SELECT value FROM foo ORDER BY DESC").unwrap(); assert_eq!( @@ -1141,6 +1184,20 @@ mod test { ); } + #[test] + fn test_group_by_clause_tags_time_dimension() { + let (_, got) = group_by_clause("GROUP BY *, /foo/, TIME(5m), tag1, tag2").unwrap(); + assert!(got.time_dimension().is_some()); + assert_eq!( + got.tags().cloned().collect::>(), + vec!["tag1".into(), "tag2".into()] + ); + + let (_, got) = group_by_clause("GROUP BY *, /foo/").unwrap(); + assert!(got.time_dimension().is_none()); + assert_eq!(got.tags().count(), 0); + } + #[test] fn test_time_call_expression() { let (got, _) = time_call_expression("TIME(5m)").unwrap(); diff --git a/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit__test__select_statement-6.snap b/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit__test__select_statement-6.snap index 14e6e9497e..df91bcffcc 100644 --- a/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit__test__select_statement-6.snap +++ b/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit__test__select_statement-6.snap @@ -2,8 +2,8 @@ source: influxdb_influxql_parser/src/visit.rs expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE host = \"node1\")\n WHERE region =~ /west/ AND value > 5\n GROUP BY TIME(5m), host\n FILL(previous)\n ORDER BY TIME DESC\n LIMIT 1 OFFSET 2\n SLIMIT 3 SOFFSET 4\n TZ('Australia/Hobart')\n \"#)" --- -- "pre_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" -- "pre_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" +- "pre_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" +- "pre_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" - "pre_visit_select_field_list: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }" - "pre_visit_select_field: Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }" - "pre_visit_expr: VarRef { name: Identifier(\"value\"), data_type: None }" @@ -66,14 +66,16 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE - "post_visit_conditional_expression: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) }" - "post_visit_conditional_expression: Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } }" - "post_visit_where_clause: WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })" -- "pre_visit_group_by_clause: ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }" -- "pre_visit_select_dimension: Time { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "pre_visit_group_by_clause: ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }" +- "pre_visit_select_dimension: Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None })" +- "pre_visit_select_time_dimension: TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }" - "pre_visit_expr: Literal(Duration(Duration(300000000000)))" - "post_visit_expr: Literal(Duration(Duration(300000000000)))" -- "post_visit_select_dimension: Time { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "post_visit_select_time_dimension: TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "post_visit_select_dimension: Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None })" - "pre_visit_select_dimension: Tag(Identifier(\"host\"))" - "post_visit_select_dimension: Tag(Identifier(\"host\"))" -- "post_visit_group_by_clause: ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }" +- "post_visit_group_by_clause: ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }" - "pre_visit_fill_clause: Previous" - "post_visit_fill_clause: Previous" - "pre_visit_order_by_clause: Descending" @@ -88,6 +90,6 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE - "post_visit_soffset_clause: SOffsetClause(4)" - "pre_visit_timezone_clause: TimeZoneClause(Australia/Hobart)" - "post_visit_timezone_clause: TimeZoneClause(Australia/Hobart)" -- "post_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" -- "post_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" +- "post_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" +- "post_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" diff --git a/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit_mut__test__select_statement-6.snap b/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit_mut__test__select_statement-6.snap index 8776da84f7..554061f195 100644 --- a/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit_mut__test__select_statement-6.snap +++ b/influxdb_influxql_parser/src/snapshots/influxdb_influxql_parser__visit_mut__test__select_statement-6.snap @@ -2,8 +2,8 @@ source: influxdb_influxql_parser/src/visit_mut.rs expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE host = \"node1\")\n WHERE region =~ /west/ AND value > 5\n GROUP BY TIME(5m), host\n FILL(previous)\n ORDER BY TIME DESC\n LIMIT 1 OFFSET 2\n SLIMIT 3 SOFFSET 4\n TZ('Australia/Hobart')\n \"#)" --- -- "pre_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" -- "pre_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" +- "pre_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" +- "pre_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" - "pre_visit_select_field_list: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }" - "pre_visit_select_field: Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }" - "pre_visit_expr: VarRef { name: Identifier(\"value\"), data_type: None }" @@ -66,14 +66,16 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE - "post_visit_conditional_expression: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) }" - "post_visit_conditional_expression: Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } }" - "post_visit_where_clause: WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })" -- "pre_visit_group_by_clause: ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }" -- "pre_visit_select_dimension: Time { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "pre_visit_group_by_clause: ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }" +- "pre_visit_select_dimension: Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None })" +- "pre_visit_select_time_dimension: TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }" - "pre_visit_expr: Literal(Duration(Duration(300000000000)))" - "post_visit_expr: Literal(Duration(Duration(300000000000)))" -- "post_visit_select_dimension: Time { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "post_visit_select_time_dimension: TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }" +- "post_visit_select_dimension: Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None })" - "pre_visit_select_dimension: Tag(Identifier(\"host\"))" - "post_visit_select_dimension: Tag(Identifier(\"host\"))" -- "post_visit_group_by_clause: ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }" +- "post_visit_group_by_clause: ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }" - "pre_visit_fill_clause: Previous" - "post_visit_fill_clause: Previous" - "pre_visit_order_by_clause: Descending" @@ -88,6 +90,6 @@ expression: "visit_statement!(r#\"SELECT value FROM (SELECT usage FROM cpu WHERE - "post_visit_soffset_clause: SOffsetClause(4)" - "pre_visit_timezone_clause: TimeZoneClause(Australia/Hobart)" - "post_visit_timezone_clause: TimeZoneClause(Australia/Hobart)" -- "post_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" -- "post_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time { interval: Literal(Duration(Duration(300000000000))), offset: None }, Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" +- "post_visit_select_statement: SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) }" +- "post_visit_statement: Select(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"value\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Subquery(SelectStatement { fields: ZeroOrMore { contents: [Field { expr: VarRef { name: Identifier(\"usage\"), data_type: None }, alias: None }] }, from: ZeroOrMore { contents: [Name(QualifiedMeasurementName { database: None, retention_policy: None, name: Name(Identifier(\"cpu\")) })] }, condition: Some(WhereClause(Binary { lhs: Expr(VarRef { name: Identifier(\"host\"), data_type: None }), op: Eq, rhs: Expr(VarRef { name: Identifier(\"node1\"), data_type: None }) })), group_by: None, fill: None, order_by: None, limit: None, offset: None, series_limit: None, series_offset: None, timezone: None })] }, condition: Some(WhereClause(Binary { lhs: Binary { lhs: Expr(VarRef { name: Identifier(\"region\"), data_type: None }), op: EqRegex, rhs: Expr(Literal(Regex(Regex(\"west\")))) }, op: And, rhs: Binary { lhs: Expr(VarRef { name: Identifier(\"value\"), data_type: None }), op: Gt, rhs: Expr(Literal(Integer(5))) } })), group_by: Some(ZeroOrMore { contents: [Time(TimeDimension { interval: Literal(Duration(Duration(300000000000))), offset: None }), Tag(Identifier(\"host\"))] }), fill: Some(Previous), order_by: Some(Descending), limit: Some(LimitClause(1)), offset: Some(OffsetClause(2)), series_limit: Some(SLimitClause(3)), series_offset: Some(SOffsetClause(4)), timezone: Some(TimeZoneClause(Australia/Hobart)) })" diff --git a/influxdb_influxql_parser/src/visit.rs b/influxdb_influxql_parser/src/visit.rs index c1e5fe391e..ba6e93d973 100644 --- a/influxdb_influxql_parser/src/visit.rs +++ b/influxdb_influxql_parser/src/visit.rs @@ -36,7 +36,8 @@ use crate::expression::arithmetic::Expr; use crate::expression::conditional::ConditionalExpression; use crate::select::{ Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, - MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeZoneClause, + MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension, + TimeZoneClause, }; use crate::show::{OnClause, ShowDatabasesStatement}; use crate::show_field_keys::ShowFieldKeysStatement; @@ -367,6 +368,19 @@ pub trait Visitor: Sized { Ok(self) } + /// Invoked before `TIME` dimension clause is visited. + fn pre_visit_select_time_dimension( + self, + _n: &TimeDimension, + ) -> Result, Self::Error> { + Ok(Continue(self)) + } + + /// Invoked after `TIME` dimension clause is visited. + fn post_visit_select_time_dimension(self, _n: &TimeDimension) -> Result { + Ok(self) + } + /// Invoked before any children of the `WHERE` clause are visited. fn pre_visit_where_clause(self, _n: &WhereClause) -> Result, Self::Error> { Ok(Continue(self)) @@ -1108,14 +1122,7 @@ impl Visitable for Dimension { }; let visitor = match self { - Self::Time { interval, offset } => { - let visitor = interval.accept(visitor)?; - if let Some(offset) = offset { - offset.accept(visitor) - } else { - Ok(visitor) - } - } + Self::Time(v) => v.accept(visitor), Self::Tag(_) | Self::Regex(_) | Self::Wildcard => Ok(visitor), }?; @@ -1123,6 +1130,24 @@ impl Visitable for Dimension { } } +impl Visitable for TimeDimension { + fn accept(&self, visitor: V) -> Result { + let visitor = match visitor.pre_visit_select_time_dimension(self)? { + Continue(visitor) => visitor, + Stop(visitor) => return Ok(visitor), + }; + + let visitor = self.interval.accept(visitor)?; + let visitor = if let Some(offset) = &self.offset { + offset.accept(visitor)? + } else { + visitor + }; + + visitor.post_visit_select_time_dimension(self) + } +} + impl Visitable for WithKeyClause { fn accept(&self, visitor: V) -> Result { let visitor = match visitor.pre_visit_with_key_clause(self)? { @@ -1218,7 +1243,8 @@ mod test { use crate::expression::conditional::ConditionalExpression; use crate::select::{ Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, - MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeZoneClause, + MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension, + TimeZoneClause, }; use crate::show::{OnClause, ShowDatabasesStatement}; use crate::show_field_keys::ShowFieldKeysStatement; @@ -1506,6 +1532,17 @@ mod test { Ok(self.push_post("select_dimension", n)) } + fn pre_visit_select_time_dimension( + self, + n: &TimeDimension, + ) -> Result, Self::Error> { + Ok(Continue(self.push_pre("select_time_dimension", n))) + } + + fn post_visit_select_time_dimension(self, n: &TimeDimension) -> Result { + Ok(self.push_post("select_time_dimension", n)) + } + fn pre_visit_where_clause(self, n: &WhereClause) -> Result, Self::Error> { Ok(Continue(self.push_pre("where_clause", n))) } diff --git a/influxdb_influxql_parser/src/visit_mut.rs b/influxdb_influxql_parser/src/visit_mut.rs index ca5626a8c4..ccbdb550da 100644 --- a/influxdb_influxql_parser/src/visit_mut.rs +++ b/influxdb_influxql_parser/src/visit_mut.rs @@ -36,7 +36,8 @@ use crate::expression::arithmetic::Expr; use crate::expression::conditional::ConditionalExpression; use crate::select::{ Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, - MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeZoneClause, + MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension, + TimeZoneClause, }; use crate::show::{OnClause, ShowDatabasesStatement}; use crate::show_field_keys::ShowFieldKeysStatement; @@ -380,6 +381,22 @@ pub trait VisitorMut: Sized { Ok(()) } + /// Invoked before `TIME` dimension clause is visited. + fn pre_visit_select_time_dimension( + &mut self, + _n: &mut TimeDimension, + ) -> Result { + Ok(Continue) + } + + /// Invoked after `TIME` dimension clause is visited. + fn post_visit_select_time_dimension( + &mut self, + _n: &mut TimeDimension, + ) -> Result<(), Self::Error> { + Ok(()) + } + /// Invoked before any children of the `WHERE` clause are visited. fn pre_visit_where_clause(&mut self, _n: &mut WhereClause) -> Result { Ok(Continue) @@ -1052,12 +1069,7 @@ impl VisitableMut for Dimension { }; match self { - Self::Time { interval, offset } => { - interval.accept(visitor)?; - if let Some(offset) = offset { - offset.accept(visitor)?; - } - } + Self::Time(v) => v.accept(visitor)?, Self::Tag(_) | Self::Regex(_) | Self::Wildcard => {} }; @@ -1065,6 +1077,21 @@ impl VisitableMut for Dimension { } } +impl VisitableMut for TimeDimension { + fn accept(&mut self, visitor: &mut V) -> Result<(), V::Error> { + if let Stop = visitor.pre_visit_select_time_dimension(self)? { + return Ok(()); + }; + + self.interval.accept(visitor)?; + if let Some(offset) = &mut self.offset { + offset.accept(visitor)?; + } + + visitor.post_visit_select_time_dimension(self) + } +} + impl VisitableMut for WithKeyClause { fn accept(&mut self, visitor: &mut V) -> Result<(), V::Error> { if let Stop = visitor.pre_visit_with_key_clause(self)? { @@ -1156,7 +1183,8 @@ mod test { use crate::parse_statements; use crate::select::{ Dimension, Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, - MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeZoneClause, + MeasurementSelection, SLimitClause, SOffsetClause, SelectStatement, TimeDimension, + TimeZoneClause, }; use crate::show::{OnClause, ShowDatabasesStatement}; use crate::show_field_keys::ShowFieldKeysStatement; @@ -1498,6 +1526,22 @@ mod test { Ok(()) } + fn pre_visit_select_time_dimension( + &mut self, + n: &mut TimeDimension, + ) -> Result { + self.push_pre("select_time_dimension", n); + Ok(Continue) + } + + fn post_visit_select_time_dimension( + &mut self, + n: &mut TimeDimension, + ) -> Result<(), Self::Error> { + self.push_post("select_time_dimension", n); + Ok(()) + } + fn pre_visit_where_clause( &mut self, n: &mut WhereClause, diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql index 8bb474f614..c3bc395ff3 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql +++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql @@ -271,9 +271,58 @@ SELECT cpu, usage_idle FROM cpu; SELECT usage_idle FROM cpu GROUP BY cpu; SELECT usage_idle, cpu FROM cpu GROUP BY cpu; +-- group by a non-existent tag +SELECT usage_idle FROM cpu GROUP BY cpu, non_existent; +-- group by and project a non-existent tag +SELECT usage_idle, non_existent FROM cpu GROUP BY cpu, non_existent; + -- multiple measurements and tags in the group by SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu; +SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, non_existent; SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, device; SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY device, cpu; SELECT usage_idle, bytes_free, device, cpu FROM cpu, disk GROUP BY device, cpu; + +-- +-- Aggregate queries +-- + +SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0; +SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0, non_existent; +SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY non_existent; +SELECT COUNT(f64), COUNT(f64) + COUNT(f64), COUNT(f64) * 3 FROM m0; +-- non-existent columns in an aggregate should evaluate to NULL +SELECT COUNT(f64) as the_count, SUM(non_existent) as foo FROM m0; +-- non-existent columns in an aggregate expression should evaluate to NULL +SELECT COUNT(f64) as the_count, SUM(f64) + SUM(non_existent) as foo FROM m0; + +SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none); +-- supports offset parameter +SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none); + +SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk; +SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none); +SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu; +SELECT COUNT(usage_idle) as count_usage_idle, COUNT(bytes_free) as count_bytes_free FROM cpu, disk WHERE cpu = 'cpu0' OR device = 'disk1s1' GROUP BY cpu; + +-- measurements without any matching fields are omitted from the result set +SELECT SUM(usage_idle) FROM cpu, disk WHERE cpu = 'cpu0' GROUP BY cpu; +SELECT SUM(usage_idle) FROM cpu, disk GROUP BY cpu; + +-- Fallible cases + +-- Mixing aggregate and non-aggregate columns +SELECT COUNT(usage_idle) + usage_idle FROM cpu; +SELECT COUNT(usage_idle), usage_idle FROM cpu; + +-- Unimplemented cases + +-- TODO(sgc): No gap filling +-- Default FILL(null) when FILL is omitted +SELECT COUNT(usage_idle) FROM cpu GROUP BY TIME(30s); +SELECT COUNT(usage_idle) FROM cpu GROUP BY TIME(30s) FILL(previous); + +-- LIMIT and OFFSET aren't supported with aggregates and groups +SELECT COUNT(usage_idle) FROM cpu GROUP BY cpu LIMIT 1; +SELECT COUNT(usage_idle) FROM cpu GROUP BY cpu OFFSET 1; \ No newline at end of file diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected index c14a6e8574..ee9c18344e 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected +++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected @@ -455,6 +455,28 @@ | cpu | 2022-10-31T02:00:00Z | 1.98 | cpu1 | | cpu | 2022-10-31T02:00:10Z | 1.99 | cpu1 | +------------------+----------------------+------------+-----------+ +-- InfluxQL: SELECT usage_idle FROM cpu GROUP BY cpu, non_existent; ++------------------+----------------------+-----------+--------------+------------+ +| iox::measurement | time | cpu | non_existent | usage_idle | ++------------------+----------------------+-----------+--------------+------------+ +| cpu | 2022-10-31T02:00:00Z | cpu-total | | 2.98 | +| cpu | 2022-10-31T02:00:10Z | cpu-total | | 2.99 | +| cpu | 2022-10-31T02:00:00Z | cpu0 | | 0.98 | +| cpu | 2022-10-31T02:00:10Z | cpu0 | | 0.99 | +| cpu | 2022-10-31T02:00:00Z | cpu1 | | 1.98 | +| cpu | 2022-10-31T02:00:10Z | cpu1 | | 1.99 | ++------------------+----------------------+-----------+--------------+------------+ +-- InfluxQL: SELECT usage_idle, non_existent FROM cpu GROUP BY cpu, non_existent; ++------------------+----------------------+-----------+------------+--------------+ +| iox::measurement | time | cpu | usage_idle | non_existent | ++------------------+----------------------+-----------+------------+--------------+ +| cpu | 2022-10-31T02:00:00Z | cpu-total | 2.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu-total | 2.99 | | +| cpu | 2022-10-31T02:00:00Z | cpu0 | 0.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu0 | 0.99 | | +| cpu | 2022-10-31T02:00:00Z | cpu1 | 1.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu1 | 1.99 | | ++------------------+----------------------+-----------+------------+--------------+ -- InfluxQL: SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu; +------------------+----------------------+-----------+------------+------------+ | iox::measurement | time | cpu | usage_idle | bytes_free | @@ -472,6 +494,23 @@ | disk | 2022-10-31T02:00:10Z | | | 2239.0 | | disk | 2022-10-31T02:00:10Z | | | 3239.0 | +------------------+----------------------+-----------+------------+------------+ +-- InfluxQL: SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, non_existent; ++------------------+----------------------+-----------+--------------+------------+------------+ +| iox::measurement | time | cpu | non_existent | usage_idle | bytes_free | ++------------------+----------------------+-----------+--------------+------------+------------+ +| cpu | 2022-10-31T02:00:00Z | cpu-total | | 2.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu-total | | 2.99 | | +| cpu | 2022-10-31T02:00:00Z | cpu0 | | 0.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu0 | | 0.99 | | +| cpu | 2022-10-31T02:00:00Z | cpu1 | | 1.98 | | +| cpu | 2022-10-31T02:00:10Z | cpu1 | | 1.99 | | +| disk | 2022-10-31T02:00:00Z | | | | 1234.0 | +| disk | 2022-10-31T02:00:00Z | | | | 2234.0 | +| disk | 2022-10-31T02:00:00Z | | | | 3234.0 | +| disk | 2022-10-31T02:00:10Z | | | | 1239.0 | +| disk | 2022-10-31T02:00:10Z | | | | 2239.0 | +| disk | 2022-10-31T02:00:10Z | | | | 3239.0 | ++------------------+----------------------+-----------+--------------+------------+------------+ -- InfluxQL: SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, device; +------------------+----------------------+-----------+---------+------------+------------+ | iox::measurement | time | cpu | device | usage_idle | bytes_free | @@ -522,4 +561,116 @@ | disk | 2022-10-31T02:00:10Z | | 2239.0 | disk1s2 | | | disk | 2022-10-31T02:00:00Z | | 3234.0 | disk1s5 | | | disk | 2022-10-31T02:00:10Z | | 3239.0 | disk1s5 | | -+------------------+----------------------+------------+------------+---------+-----------+ \ No newline at end of file ++------------------+----------------------+------------+------------+---------+-----------+ +-- InfluxQL: SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0; ++------------------+----------------------+-------+-------+------+-------------------+ +| iox::measurement | time | tag0 | count | sum | stddev | ++------------------+----------------------+-------+-------+------+-------------------+ +| m0 | 1970-01-01T00:00:00Z | val00 | 5 | 80.6 | 5.085961069453836 | +| m0 | 1970-01-01T00:00:00Z | val01 | 1 | 11.3 | | +| m0 | 1970-01-01T00:00:00Z | val02 | 1 | 10.4 | | ++------------------+----------------------+-------+-------+------+-------------------+ +-- InfluxQL: SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY tag0, non_existent; ++------------------+----------------------+--------------+-------+-------+------+-------------------+ +| iox::measurement | time | non_existent | tag0 | count | sum | stddev | ++------------------+----------------------+--------------+-------+-------+------+-------------------+ +| m0 | 1970-01-01T00:00:00Z | | val00 | 5 | 80.6 | 5.085961069453836 | +| m0 | 1970-01-01T00:00:00Z | | val01 | 1 | 11.3 | | +| m0 | 1970-01-01T00:00:00Z | | val02 | 1 | 10.4 | | ++------------------+----------------------+--------------+-------+-------+------+-------------------+ +-- InfluxQL: SELECT COUNT(f64), SUM(f64), stddev(f64) FROM m0 GROUP BY non_existent; ++------------------+----------------------+--------------+-------+--------------------+--------------------+ +| iox::measurement | time | non_existent | count | sum | stddev | ++------------------+----------------------+--------------+-------+--------------------+--------------------+ +| m0 | 1970-01-01T00:00:00Z | | 7 | 102.30000000000001 | 4.8912945019454614 | ++------------------+----------------------+--------------+-------+--------------------+--------------------+ +-- InfluxQL: SELECT COUNT(f64), COUNT(f64) + COUNT(f64), COUNT(f64) * 3 FROM m0; ++------------------+----------------------+-------+---------------------+-----------+ +| iox::measurement | time | count | count_f64_count_f64 | count_f64 | ++------------------+----------------------+-------+---------------------+-----------+ +| m0 | 1970-01-01T00:00:00Z | 7 | 14 | 21 | ++------------------+----------------------+-------+---------------------+-----------+ +-- InfluxQL: SELECT COUNT(f64) as the_count, SUM(non_existent) as foo FROM m0; ++------------------+----------------------+-----------+-----+ +| iox::measurement | time | the_count | foo | ++------------------+----------------------+-----------+-----+ +| m0 | 1970-01-01T00:00:00Z | 7 | | ++------------------+----------------------+-----------+-----+ +-- InfluxQL: SELECT COUNT(f64) as the_count, SUM(f64) + SUM(non_existent) as foo FROM m0; ++------------------+----------------------+-----------+-----+ +| iox::measurement | time | the_count | foo | ++------------------+----------------------+-----------+-----+ +| m0 | 1970-01-01T00:00:00Z | 7 | | ++------------------+----------------------+-----------+-----+ +-- InfluxQL: SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s) FILL(none); ++------------------+----------------------+-------+------+ +| iox::measurement | time | count | sum | ++------------------+----------------------+-------+------+ +| m0 | 2022-10-31T02:00:00Z | 6 | 83.1 | +| m0 | 2022-10-31T02:00:30Z | 1 | 19.2 | ++------------------+----------------------+-------+------+ +-- InfluxQL: SELECT COUNT(f64), SUM(f64) FROM m0 GROUP BY TIME(30s, 1s) FILL(none); ++------------------+----------------------+-------+--------------------+ +| iox::measurement | time | count | sum | ++------------------+----------------------+-------+--------------------+ +| m0 | 2022-10-31T01:59:31Z | 3 | 31.799999999999997 | +| m0 | 2022-10-31T02:00:01Z | 4 | 70.5 | ++------------------+----------------------+-------+--------------------+ +-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk; ++------------------+----------------------+-------+---------+ +| iox::measurement | time | count | count_1 | ++------------------+----------------------+-------+---------+ +| cpu | 1970-01-01T00:00:00Z | 6 | | +| disk | 1970-01-01T00:00:00Z | | 6 | ++------------------+----------------------+-------+---------+ +-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY TIME(1s) FILL(none); ++------------------+----------------------+-------+---------+ +| iox::measurement | time | count | count_1 | ++------------------+----------------------+-------+---------+ +| cpu | 2022-10-31T02:00:00Z | 3 | | +| cpu | 2022-10-31T02:00:10Z | 3 | | +| disk | 2022-10-31T02:00:00Z | | 3 | +| disk | 2022-10-31T02:00:10Z | | 3 | ++------------------+----------------------+-------+---------+ +-- InfluxQL: SELECT COUNT(usage_idle), COUNT(bytes_free) FROM cpu, disk GROUP BY cpu; ++------------------+----------------------+-----------+-------+---------+ +| iox::measurement | time | cpu | count | count_1 | ++------------------+----------------------+-----------+-------+---------+ +| cpu | 1970-01-01T00:00:00Z | cpu-total | 2 | | +| cpu | 1970-01-01T00:00:00Z | cpu0 | 2 | | +| cpu | 1970-01-01T00:00:00Z | cpu1 | 2 | | +| disk | 1970-01-01T00:00:00Z | | | 6 | ++------------------+----------------------+-----------+-------+---------+ +-- InfluxQL: SELECT COUNT(usage_idle) as count_usage_idle, COUNT(bytes_free) as count_bytes_free FROM cpu, disk WHERE cpu = 'cpu0' OR device = 'disk1s1' GROUP BY cpu; ++------------------+----------------------+------+------------------+------------------+ +| iox::measurement | time | cpu | count_usage_idle | count_bytes_free | ++------------------+----------------------+------+------------------+------------------+ +| cpu | 1970-01-01T00:00:00Z | cpu0 | 2 | | +| disk | 1970-01-01T00:00:00Z | | | 2 | ++------------------+----------------------+------+------------------+------------------+ +-- InfluxQL: SELECT SUM(usage_idle) FROM cpu, disk WHERE cpu = 'cpu0' GROUP BY cpu; ++------------------+----------------------+------+------+ +| iox::measurement | time | cpu | sum | ++------------------+----------------------+------+------+ +| cpu | 1970-01-01T00:00:00Z | cpu0 | 1.97 | ++------------------+----------------------+------+------+ +-- InfluxQL: SELECT SUM(usage_idle) FROM cpu, disk GROUP BY cpu; ++------------------+----------------------+-----------+--------------------+ +| iox::measurement | time | cpu | sum | ++------------------+----------------------+-----------+--------------------+ +| cpu | 1970-01-01T00:00:00Z | cpu-total | 5.970000000000001 | +| cpu | 1970-01-01T00:00:00Z | cpu0 | 1.97 | +| cpu | 1970-01-01T00:00:00Z | cpu1 | 3.9699999999999998 | ++------------------+----------------------+-----------+--------------------+ +-- InfluxQL: SELECT COUNT(usage_idle) + usage_idle FROM cpu; +Error while planning query: Error during planning: mixing aggregate and non-aggregate columns is not supported +-- InfluxQL: SELECT COUNT(usage_idle), usage_idle FROM cpu; +Error while planning query: Error during planning: mixing aggregate and non-aggregate columns is not supported +-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu GROUP BY TIME(30s); +Error while planning query: This feature is not implemented: FILL(NULL) +-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu GROUP BY TIME(30s) FILL(previous); +Error while planning query: This feature is not implemented: FILL(PREVIOUS) +-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu GROUP BY cpu LIMIT 1; +Error while planning query: This feature is not implemented: GROUP BY combined with LIMIT or OFFSET clause +-- InfluxQL: SELECT COUNT(usage_idle) FROM cpu GROUP BY cpu OFFSET 1; +Error while planning query: This feature is not implemented: GROUP BY combined with LIMIT or OFFSET clause \ No newline at end of file diff --git a/iox_query_influxql/src/frontend/planner.rs b/iox_query_influxql/src/frontend/planner.rs index a586a29011..27a778e377 100644 --- a/iox_query_influxql/src/frontend/planner.rs +++ b/iox_query_influxql/src/frontend/planner.rs @@ -9,8 +9,8 @@ use std::sync::Arc; use crate::plan::{parse_regex, InfluxQLToLogicalPlan, SchemaProvider}; use datafusion::common::Statistics; use datafusion::datasource::provider_as_source; -use datafusion::execution::context::TaskContext; -use datafusion::logical_expr::{LogicalPlan, TableSource}; +use datafusion::execution::context::{SessionState, TaskContext}; +use datafusion::logical_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource}; use datafusion::physical_expr::PhysicalSortExpr; use datafusion::physical_plan::{Partitioning, SendableRecordBatchStream}; use datafusion::{ @@ -25,11 +25,12 @@ use iox_query::exec::IOxSessionContext; use observability_deps::tracing::debug; use schema::Schema; -struct ContextSchemaProvider { +struct ContextSchemaProvider<'a> { + state: &'a SessionState, tables: HashMap, Schema)>, } -impl SchemaProvider for ContextSchemaProvider { +impl<'a> SchemaProvider for ContextSchemaProvider<'a> { fn get_table_provider(&self, name: &str) -> Result> { self.tables .get(name) @@ -37,6 +38,14 @@ impl SchemaProvider for ContextSchemaProvider { .ok_or_else(|| DataFusionError::Plan(format!("measurement does not exist: {name}"))) } + fn get_function_meta(&self, name: &str) -> Option> { + self.state.scalar_functions().get(name).cloned() + } + + fn get_aggregate_meta(&self, name: &str) -> Option> { + self.state.aggregate_functions().get(name).cloned() + } + fn table_names(&self) -> Vec<&'_ str> { self.tables.keys().map(|k| k.as_str()).collect::>() } @@ -171,6 +180,7 @@ impl InfluxQLQueryPlanner { let query_tables = find_all_measurements(&statement, &names)?; let mut sp = ContextSchemaProvider { + state: &ctx.inner().state(), tables: HashMap::with_capacity(query_tables.len()), }; diff --git a/iox_query_influxql/src/plan/field_mapper.rs b/iox_query_influxql/src/plan/field_mapper.rs index 751b315b4c..3fdc8398e9 100644 --- a/iox_query_influxql/src/plan/field_mapper.rs +++ b/iox_query_influxql/src/plan/field_mapper.rs @@ -36,12 +36,10 @@ pub(crate) fn map_type( field: &str, ) -> Result> { match s.table_schema(measurement_name) { - Some(iox) => Ok(match iox.find_index_of(field) { - Some(i) => match iox.field(i).0 { - InfluxColumnType::Field(ft) => Some(field_type_to_var_ref_data_type(ft)), - InfluxColumnType::Tag => Some(VarRefDataType::Tag), - InfluxColumnType::Timestamp => None, - }, + Some(iox) => Ok(match iox.field_by_name(field) { + Some((InfluxColumnType::Field(ft), _)) => Some(field_type_to_var_ref_data_type(ft)), + Some((InfluxColumnType::Tag, _)) => Some(VarRefDataType::Tag), + Some((InfluxColumnType::Timestamp, _)) => Some(VarRefDataType::Timestamp), None => None, }), None => Ok(None), @@ -87,6 +85,10 @@ mod test { map_type(&namespace, "cpu", "host").unwrap(), Some(VarRefDataType::Tag) ); + assert_matches!( + map_type(&namespace, "cpu", "time").unwrap(), + Some(VarRefDataType::Timestamp) + ); // Returns None for nonexistent field assert!(map_type(&namespace, "cpu", "nonexistent") .unwrap() diff --git a/iox_query_influxql/src/plan/mod.rs b/iox_query_influxql/src/plan/mod.rs index d40197a594..abe2c9879c 100644 --- a/iox_query_influxql/src/plan/mod.rs +++ b/iox_query_influxql/src/plan/mod.rs @@ -8,6 +8,7 @@ mod rewriter; mod test_utils; mod timestamp; mod util; +mod util_copy; mod var_ref; pub use planner::InfluxQLToLogicalPlan; diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index 54a6ed8e6f..e54137369a 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -1,29 +1,39 @@ +mod select; + +use crate::plan::planner::select::{ + check_exprs_satisfy_columns, make_tag_key_column_meta, plan_with_sort, +}; use crate::plan::planner_rewrite_expression::{rewrite_conditional, rewrite_expr}; -use crate::plan::planner_time_range_expression::time_range_to_df_expr; +use crate::plan::planner_time_range_expression::{ + duration_expr_to_nanoseconds, expr_to_df_interval_dt, time_range_to_df_expr, +}; use crate::plan::rewriter::rewrite_statement; use crate::plan::util::{binary_operator_to_df_operator, Schemas}; +use crate::plan::util_copy::rebase_expr; use crate::plan::var_ref::{column_type_to_var_ref_data_type, var_ref_data_type_to_data_type}; use arrow::datatypes::DataType; +use chrono_tz::Tz; use datafusion::catalog::TableReference; use datafusion::common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, ToDFSchema}; use datafusion::logical_expr::expr_rewriter::{normalize_col, ExprRewritable, ExprRewriter}; use datafusion::logical_expr::logical_plan::builder::project; use datafusion::logical_expr::logical_plan::Analyze; +use datafusion::logical_expr::utils::{expr_as_column_expr, find_aggregate_exprs}; use datafusion::logical_expr::{ - binary_expr, lit, BinaryExpr, BuiltinScalarFunction, Explain, Expr, ExprSchemable, LogicalPlan, - LogicalPlanBuilder, Operator, PlanType, Projection, TableSource, ToStringifiedPlan, + binary_expr, date_bin, expr, lit, lit_timestamp_nano, AggregateFunction, AggregateUDF, + BinaryExpr, BuiltinScalarFunction, Explain, Expr, ExprSchemable, LogicalPlan, + LogicalPlanBuilder, Operator, PlanType, ScalarUDF, TableSource, ToStringifiedPlan, }; use datafusion_util::{lit_dict, AsExpr}; -use generated_types::influxdata::iox::querier::v1::{ - influx_ql_metadata::TagKeyColumn, InfluxQlMetadata, -}; -use influxdb_influxql_parser::common::OrderByClause; +use generated_types::influxdata::iox::querier::v1::InfluxQlMetadata; use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement}; use influxdb_influxql_parser::expression::walk::walk_expr; use influxdb_influxql_parser::expression::{ BinaryOperator, ConditionalExpression, ConditionalOperator, VarRefDataType, }; -use influxdb_influxql_parser::select::{Dimension, SLimitClause, SOffsetClause}; +use influxdb_influxql_parser::select::{ + FillClause, GroupByClause, SLimitClause, SOffsetClause, TimeZoneClause, +}; use influxdb_influxql_parser::{ common::{LimitClause, MeasurementName, OffsetClause, WhereClause}, expression::Expr as IQLExpr, @@ -39,9 +49,8 @@ use schema::{ InfluxColumnType, InfluxFieldType, Schema, INFLUXQL_MEASUREMENT_COLUMN_NAME, INFLUXQL_METADATA_KEY, }; -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashSet, VecDeque}; use std::fmt::Debug; -use std::iter; use std::ops::{ControlFlow, Deref}; use std::str::FromStr; use std::sync::Arc; @@ -55,6 +64,12 @@ pub trait SchemaProvider { /// Getter for a datasource fn get_table_provider(&self, name: &str) -> Result>; + /// Getter for a UDF description + fn get_function_meta(&self, name: &str) -> Option>; + + /// Getter for a UDAF description + fn get_aggregate_meta(&self, name: &str) -> Option>; + /// The collection of tables for this schema. fn table_names(&self) -> Vec<&'_ str>; @@ -72,16 +87,64 @@ pub trait SchemaProvider { /// /// Specifically, the scope of available functions is narrowed to mathematical scalar functions /// when processing the `WHERE` clause. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Default, Clone, Copy, PartialEq)] enum ExprScope { /// Signals that expressions should be transformed in the context of /// the `WHERE` clause. + #[default] Where, /// Signals that expressions should be transformed in the context of /// the `SELECT` projection list. Projection, } +/// State used to inform the planner. +#[allow(dead_code)] +#[derive(Debug, Default, Clone)] +struct Context<'a> { + /// `true` if this is a subquery `SELECT` statement. + is_subquery: bool, + scope: ExprScope, + tz: Option, + + // + is_aggregate: bool, + + // GROUP BY information + group_by: Option<&'a GroupByClause>, + fill: Option, +} + +impl<'a> Context<'a> { + fn new() -> Self { + Default::default() + } + + fn with_scope(&self, scope: ExprScope) -> Self { + Self { scope, ..*self } + } + + fn with_timezone(&self, timezone: Option) -> Self { + let tz = timezone.as_deref().cloned(); + Self { tz, ..*self } + } + + fn with_group_by_fill(&self, select: &'a SelectStatement) -> Self { + Self { + group_by: select.group_by.as_ref(), + fill: select.fill, + ..*self + } + } + + fn with_is_aggregate(&self, is_aggregate: bool) -> Self { + Self { + is_aggregate, + ..*self + } + } +} + #[allow(missing_debug_implementations)] /// InfluxQL query planner pub struct InfluxQLToLogicalPlan<'a> { @@ -103,9 +166,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> { Err(DataFusionError::NotImplemented("DROP MEASUREMENT".into())) } Statement::Explain(explain) => self.explain_statement_to_plan(*explain), - Statement::Select(select) => { - self.select_statement_to_plan(&self.rewrite_select_statement(*select)?) - } + Statement::Select(select) => self.select_statement_to_plan( + &Context::new(), + &self.rewrite_select_statement(*select)?, + ), Statement::ShowDatabases(_) => { Err(DataFusionError::NotImplemented("SHOW DATABASES".into())) } @@ -128,8 +192,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> { } fn explain_statement_to_plan(&self, explain: ExplainStatement) -> Result { - let plan = - self.select_statement_to_plan(&self.rewrite_select_statement(*explain.select)?)?; + let plan = self.select_statement_to_plan( + &Context::new(), + &self.rewrite_select_statement(*explain.select)?, + )?; let plan = Arc::new(plan); let schema = LogicalPlan::explain_schema(); let schema = schema.to_dfschema_ref()?; @@ -164,200 +230,134 @@ impl<'a> InfluxQLToLogicalPlan<'a> { } /// Create a [`LogicalPlan`] from the specified InfluxQL `SELECT` statement. - fn select_statement_to_plan(&self, select: &SelectStatement) -> Result { + fn select_statement_to_plan( + &self, + ctx: &Context<'_>, + select: &SelectStatement, + ) -> Result { let mut plans = self.plan_from_tables(&select.from)?; - // Aggregate functions are currently not supported. - // - // See: https://github.com/influxdata/influxdb_iox/issues/6919 - if has_aggregate_exprs(&select.fields) { - return Err(DataFusionError::NotImplemented( - "aggregate functions".to_owned(), - )); - } - - let mut meta = InfluxQlMetadata { - measurement_column_index: MEASUREMENT_COLUMN_INDEX, - tag_key_columns: Vec::new(), - }; + let ctx = ctx + .with_timezone(select.timezone) + .with_group_by_fill(select) + .with_is_aggregate( + has_aggregate_exprs(&select.fields) + || (select.group_by.is_some() + && select.group_by.as_ref().unwrap().time_dimension().is_some()), + ); // The `time` column is always present in the result set - let mut fields = if !has_time_column(&select.fields) { + let mut fields = if find_time_column_index(&select.fields).is_none() { vec![Field { expr: IQLExpr::VarRef { name: "time".into(), data_type: Some(VarRefDataType::Timestamp), }, - alias: None, + alias: Some("time".into()), }] } else { vec![] }; - let (group_by_tag_set, projection_tag_set) = if let Some(group_by) = &select.group_by { - let mut tag_columns = find_tag_columns::>(&select.fields); + // group_by_tag_set : a list of tag columns specified in the GROUP BY clause + // projection_tag_set : a list of tag columns specified exclusively in the SELECT projection + // is_projected : a list of booleans indicating whether matching elements in the + // group_by_tag_set are also projected in the query + let (group_by_tag_set, projection_tag_set, is_projected) = + if let Some(group_by) = &select.group_by { + let mut tag_columns = + find_tag_and_unknown_columns(&select.fields).collect::>(); - // Contains the list of tag keys specified in the `GROUP BY` clause - let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by - .iter() - .map(|dimension| match dimension { - Dimension::Tag(t) => { - Ok((t.deref().as_str(), tag_columns.contains(t.deref().as_str()))) - } - // TODO(sgc): https://github.com/influxdata/influxdb_iox/issues/6915 - Dimension::Time { .. } => { - Err(DataFusionError::NotImplemented("GROUP BY time".to_owned())) - } - // Inconsistent state, as these variants should have been expanded by `rewrite_select_statement` - Dimension::Regex(_) | Dimension::Wildcard => Err(DataFusionError::Internal( - "unexpected regular expression or wildcard found in GROUP BY".into(), - )), - }) - .collect::>>()? - .into_iter() - // We sort the tag set, to ensure correct ordering of the results. The tag columns - // referenced in the `tag_set` variable are added to the sort operator in - // lexicographically ascending order. - .sorted_by(|a, b| a.0.cmp(b.0)) - .unzip(); + // Find the list of tag keys specified in the `GROUP BY` clause, and + // whether any of the tag keys are also projected in the SELECT list. + let (tag_set, is_projected): (Vec<_>, Vec<_>) = group_by + .tags() + .map(|t| t.deref().as_str()) + .map(|s| (s, tag_columns.contains(s))) + // We sort the tag set, to ensure correct ordering of the results. The tag columns + // referenced in the `tag_set` variable are added to the sort operator in + // lexicographically ascending order. + .sorted_by(|a, b| a.0.cmp(b.0)) + .unzip(); - // Tags specified in the `GROUP BY` clause that are not already added to the - // projection must be projected, so they key be used in the group key. - // - // At the end of the loop, the `tag_columns` set will contain the tag columns that - // exist in the projection and not in the `GROUP BY`. - for col in &tag_set { - if tag_columns.remove(*col) { - continue; - } + // Tags specified in the `GROUP BY` clause that are not already added to the + // projection must be projected, so they can be used in the group key. + // + // At the end of the loop, the `tag_columns` set will contain the tag columns that + // exist in the projection and not in the `GROUP BY`. + fields.extend( + tag_set + .iter() + .filter_map(|col| match tag_columns.remove(*col) { + true => None, + false => Some(Field { + expr: IQLExpr::VarRef { + name: (*col).into(), + data_type: Some(VarRefDataType::Tag), + }, + alias: Some((*col).into()), + }), + }), + ); - fields.push(Field { - expr: IQLExpr::VarRef { - name: (*col).into(), - data_type: Some(VarRefDataType::Tag), - }, - alias: Some((*col).into()), - }); - } - - // Add the remaining columns to be projected - fields.extend(select.fields.iter().cloned()); - - /// There is always a [INFLUXQL_MEASUREMENT_COLUMN_NAME] column projected in the LogicalPlan, - /// therefore the start index is 1 for determining the offsets of the - /// tag key columns in the column projection list. - const START_INDEX: usize = 1; - - // Create a map of tag key columns to their respective index in the projection - let index_map = fields - .iter() - .enumerate() - .filter_map(|(index, f)| match &f.expr { - IQLExpr::VarRef { - name, - data_type: Some(VarRefDataType::Tag), - } => Some((name.deref().as_str(), index + START_INDEX)), - _ => None, - }) - .collect::>(); - - // tag_set was previously sorted, so tag_key_columns will be in the correct order - meta.tag_key_columns = tag_set - .iter() - .zip(is_projected) - .map(|(tag_key, is_projected)| TagKeyColumn { - tag_key: (*tag_key).to_owned(), - column_index: *index_map.get(*tag_key).unwrap() as u32, + ( + tag_set, + tag_columns.into_iter().sorted().collect::>(), is_projected, - }) - .collect(); + ) + } else { + let tag_columns = find_tag_and_unknown_columns(&select.fields) + .sorted() + .collect::>(); + (vec![], tag_columns, vec![]) + }; - ( - tag_set, - tag_columns.into_iter().sorted().collect::>(), - ) - } else { - let mut tag_columns = find_tag_columns::>(&select.fields); - tag_columns.sort(); - // Add the remaining columns to be projected - fields.extend(select.fields.iter().cloned()); - (vec![], tag_columns) - }; + fields.extend(select.fields.iter().cloned()); - let Some(plan) = plans.pop_front() else { return LogicalPlanBuilder::empty(false).build(); }; - let plan = self.project_select(plan, select, &fields)?; - - // If there are multiple measurements, we need to sort by the measurement column - // NOTE: Ideally DataFusion would maintain the order of the UNION ALL, which would eliminate - // the need to sort by measurement. - // See: https://github.com/influxdata/influxdb_iox/issues/7062 - let mut series_sort = if !plans.is_empty() { - vec![Expr::sort( - INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr(), - true, - false, - )] - } else { - vec![] + // Build the first non-empty plan + let plan = { + loop { + match plans.pop_front() { + Some((plan, proj)) => match self.project_select( + &ctx, + plan, + proj, + select, + &fields, + &group_by_tag_set, + )? { + LogicalPlan::EmptyRelation(_) => continue, + plan => break plan, + }, + None => return LogicalPlanBuilder::empty(false).build(), + } + } }; // UNION the remaining plans - let plan = plans.into_iter().try_fold(plan, |prev, next| { - let next = self.project_select(next, select, &fields)?; - LogicalPlanBuilder::from(prev).union(next)?.build() + let plan = plans.into_iter().try_fold(plan, |prev, (next, proj)| { + let next = self.project_select(&ctx, next, proj, select, &fields, &group_by_tag_set)?; + if let LogicalPlan::EmptyRelation(_) = next { + // No sense union-ing an empty plan, so drop it + Ok(prev) + } else { + LogicalPlanBuilder::from(prev).union(next)?.build() + } })?; - let plan = plan_with_metadata(plan, &meta)?; + let plan = plan_with_metadata( + plan, + &InfluxQlMetadata { + measurement_column_index: MEASUREMENT_COLUMN_INDEX, + tag_key_columns: make_tag_key_column_meta( + &fields, + &group_by_tag_set, + &is_projected, + ), + }, + )?; - // Construct the sort logical operator - // - // The ordering of the results is as follows: - // - // iox::measurement, [group by tag 0, .., group by tag n], time, [projection tag 0, .., projection tag n] - // - // NOTE: - // - // Sort expressions referring to tag keys are always specified in lexicographically ascending order. - let plan = { - if !group_by_tag_set.is_empty() { - // Adding `LIMIT` or `OFFSET` with a `GROUP BY tag, ...` clause is not supported - // - // See: https://github.com/influxdata/influxdb_iox/issues/6920 - if !group_by_tag_set.is_empty() - && (select.offset.is_some() || select.limit.is_some()) - { - return Err(DataFusionError::NotImplemented( - "GROUP BY combined with LIMIT or OFFSET clause".to_owned(), - )); - } - - series_sort.extend( - group_by_tag_set - .into_iter() - .map(|f| Expr::sort(f.as_expr(), true, false)), - ); - }; - - series_sort.push(Expr::sort( - "time".as_expr(), - match select.order_by { - // Default behaviour is to sort by time in ascending order if there is no ORDER BY - None | Some(OrderByClause::Ascending) => true, - Some(OrderByClause::Descending) => false, - }, - false, - )); - - if !projection_tag_set.is_empty() { - series_sort.extend( - projection_tag_set - .into_iter() - .map(|f| Expr::sort(f.as_expr(), true, false)), - ); - } - - LogicalPlanBuilder::from(plan).sort(series_sort)?.build() - }?; + let plan = plan_with_sort(plan, select, &group_by_tag_set, &projection_tag_set)?; let plan = self.limit(plan, select.offset, select.limit)?; @@ -368,28 +368,154 @@ impl<'a> InfluxQLToLogicalPlan<'a> { fn project_select( &self, - plan: LogicalPlan, + ctx: &Context<'_>, + input: LogicalPlan, + proj: Vec, select: &SelectStatement, fields: &[Field], + group_by_tag_set: &[&str], ) -> Result { - let (proj, plan) = match plan { - LogicalPlan::Projection(Projection { expr, input, .. }) => { - (expr, input.deref().clone()) - } - // TODO: Review when we support subqueries, as this shouldn't be the case - _ => (vec![], plan), - }; + let schemas = Schemas::new(input.schema())?; - let schemas = Schemas::new(plan.schema())?; + // To be consistent with InfluxQL, exclude measurements + // when there are no matching fields. + if !fields.iter().any(|f| { + // Walk the expression tree for the field + // looking for a reference to one column that + // is a field + walk_expr(&f.expr, &mut |e| match e { + IQLExpr::VarRef { name, .. } => { + match schemas.iox_schema.field_by_name(name.deref().as_str()) { + Some((InfluxColumnType::Field(_), _)) => ControlFlow::Break(()), + _ => ControlFlow::Continue(()), + } + } + _ => ControlFlow::Continue(()), + }) + .is_break() + }) { + return LogicalPlanBuilder::empty(false).build(); + } - let tz = select.timezone.as_deref().cloned(); - let plan = self.plan_where_clause(&select.condition, plan, &schemas, tz)?; + let plan = self.plan_where_clause(ctx, &select.condition, input, &schemas)?; - // Process and validate the field expressions in the SELECT projection list - let select_exprs = self.field_list_to_exprs(&plan, fields, &schemas)?; + // Transform InfluxQL AST field expressions to a list of DataFusion expressions. + let select_exprs = self.field_list_to_exprs(ctx, &plan, fields, &schemas)?; + + let (plan, select_exprs_post_aggr) = self.select_aggregate( + plan, + fields, + select_exprs, + select.group_by.as_ref(), + group_by_tag_set, + &schemas, + )?; // Wrap the plan in a `LogicalPlan::Projection` from the select expressions - project(plan, proj.into_iter().chain(select_exprs.into_iter())) + project( + plan, + // proj includes the `iox::measurement` column + proj.into_iter().chain(select_exprs_post_aggr.into_iter()), + ) + } + + fn select_aggregate( + &self, + input: LogicalPlan, + fields: &[Field], + select_exprs: Vec, + group_by: Option<&GroupByClause>, + group_by_tag_set: &[&str], + schemas: &Schemas, + ) -> Result<(LogicalPlan, Vec)> { + // Find a list of unique aggregate expressions from the projection. + // + // For example, a projection such as: + // + // SELECT SUM(foo), SUM(foo) / COUNT(foo) .. + // + // will produce two aggregate expressions: + // + // [SUM(foo), COUNT(foo)] + let aggr_exprs = find_aggregate_exprs(&select_exprs); + if aggr_exprs.is_empty() { + return Ok((input, select_exprs)); + } + + let aggr_group_by_exprs = if let Some(group_by) = group_by { + let mut group_by_exprs = Vec::new(); + + if group_by.time_dimension().is_some() { + // Include the GROUP BY TIME(..) expression + if let Some(index) = find_time_column_index(fields) { + group_by_exprs.push(select_exprs[index].clone()); + } + } + + // Exclude tags that do not exist in the current table schema. + group_by_exprs.extend(group_by_tag_set.iter().filter_map(|name| { + if schemas + .iox_schema + .field_by_name(name) + .map_or(false, |(dt, _)| dt == InfluxColumnType::Tag) + { + Some(name.as_expr()) + } else { + None + } + })); + + group_by_exprs + } else { + vec![] + }; + + let plan = LogicalPlanBuilder::from(input) + .aggregate(aggr_group_by_exprs.clone(), aggr_exprs.clone())? + .build()?; + + // Combine the aggregate columns and group by expressions, which represents + // the final projection from the aggregate operator. + let aggr_projection_exprs = [aggr_group_by_exprs, aggr_exprs].concat(); + + // Replace any expressions that are not a column with a column referencing + // an output column from the aggregate schema. + let column_exprs_post_aggr = aggr_projection_exprs + .iter() + .map(|expr| expr_as_column_expr(expr, &plan)) + .collect::>>()?; + + // Rewrite the aggregate columns from the projection, so that the expressions + // refer to the columns from the aggregate projection + let select_exprs_post_aggr = select_exprs + .iter() + .map(|expr| rebase_expr(expr, &aggr_projection_exprs, &plan)) + .collect::>>()?; + + // Strip the NULL columns, which are tags that do not exist in the aggregate + // table schema. The NULL columns are projected as scalar values in the final + // projection. + let select_exprs_post_aggr_no_nulls = select_exprs_post_aggr + .iter() + .filter(|expr| match expr { + Expr::Alias(expr, _) => !matches!(**expr, Expr::Literal(ScalarValue::Null)), + _ => true, + }) + .cloned() + .collect::>(); + + // Finally, we ensure that the re-written projection can be resolved + // from the aggregate output columns and that there are no + // column references that are not aggregates. + // + // This will identify issues such as: + // + // SELECT COUNT(field), field FROM foo + // + // where the field without the aggregate is not valid. + check_exprs_satisfy_columns(&column_exprs_post_aggr, &select_exprs_post_aggr_no_nulls)?; + + Ok((plan, select_exprs_post_aggr)) } /// Optionally wrap the input logical plan in a [`LogicalPlan::Limit`] node using the specified @@ -434,13 +560,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// Map the InfluxQL `SELECT` projection list into a list of DataFusion expressions. fn field_list_to_exprs( &self, + ctx: &Context<'_>, plan: &LogicalPlan, fields: &[Field], schemas: &Schemas, ) -> Result> { fields .iter() - .map(|field| self.field_to_df_expr(field, plan, schemas)) + .map(|field| self.field_to_df_expr(ctx, field, plan, schemas)) .collect() } @@ -449,11 +576,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// A [`Field`] is analogous to a column in a SQL `SELECT` projection. fn field_to_df_expr( &self, + ctx: &Context<'_>, field: &Field, plan: &LogicalPlan, schemas: &Schemas, ) -> Result { - let expr = self.expr_to_df_expr(ExprScope::Projection, &field.expr, schemas)?; + let expr = + self.expr_to_df_expr(&ctx.with_scope(ExprScope::Projection), &field.expr, schemas)?; let expr = rewrite_field_expr(expr, schemas)?; normalize_col( if let Some(alias) = &field.alias { @@ -468,29 +597,27 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// Map an InfluxQL [`ConditionalExpression`] to a DataFusion [`Expr`]. fn conditional_to_df_expr( &self, + ctx: &Context<'_>, iql: &ConditionalExpression, schemas: &Schemas, - tz: Option, ) -> Result { match iql { - ConditionalExpression::Expr(expr) => { - self.expr_to_df_expr(ExprScope::Where, expr, schemas) - } + ConditionalExpression::Expr(expr) => self.expr_to_df_expr(ctx, expr, schemas), ConditionalExpression::Binary { lhs, op, rhs } => { - self.binary_conditional_to_df_expr(lhs, *op, rhs, schemas, tz) + self.binary_conditional_to_df_expr(ctx, lhs, *op, rhs, schemas) } - ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(e, schemas, tz), + ConditionalExpression::Grouped(e) => self.conditional_to_df_expr(ctx, e, schemas), } } /// Map an InfluxQL binary conditional expression to a DataFusion [`Expr`]. fn binary_conditional_to_df_expr( &self, + ctx: &Context<'_>, lhs: &ConditionalExpression, op: ConditionalOperator, rhs: &ConditionalExpression, schemas: &Schemas, - tz: Option, ) -> Result { let op = conditional_op_to_operator(op)?; @@ -509,19 +636,19 @@ impl<'a> InfluxQLToLogicalPlan<'a> { { if lhs_time { ( - self.conditional_to_df_expr(lhs, schemas, tz)?, - time_range_to_df_expr(find_expr(rhs)?, tz)?, + self.conditional_to_df_expr(ctx, lhs, schemas)?, + time_range_to_df_expr(find_expr(rhs)?, ctx.tz)?, ) } else { ( - time_range_to_df_expr(find_expr(lhs)?, tz)?, - self.conditional_to_df_expr(rhs, schemas, tz)?, + time_range_to_df_expr(find_expr(lhs)?, ctx.tz)?, + self.conditional_to_df_expr(ctx, rhs, schemas)?, ) } } else { ( - self.conditional_to_df_expr(lhs, schemas, tz)?, - self.conditional_to_df_expr(rhs, schemas, tz)?, + self.conditional_to_df_expr(ctx, lhs, schemas)?, + self.conditional_to_df_expr(ctx, rhs, schemas)?, ) }; @@ -529,7 +656,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { } /// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`]. - fn expr_to_df_expr(&self, scope: ExprScope, iql: &IQLExpr, schemas: &Schemas) -> Result { + fn expr_to_df_expr(&self, ctx: &Context<'_>, iql: &IQLExpr, schemas: &Schemas) -> Result { let iox_schema = &schemas.iox_schema; match iql { // rewriter is expected to expand wildcard expressions @@ -542,19 +669,50 @@ impl<'a> InfluxQLToLogicalPlan<'a> { } => { let name = normalize_identifier(name); Ok( - // Per the Go implementation, the time column is case-insensitive in the - // `WHERE` clause and disregards any postfix type cast operator. - // - // See: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L5751-L5753 - if scope == ExprScope::Where && name.eq_ignore_ascii_case("time") { + if ctx.scope == ExprScope::Where && name.eq_ignore_ascii_case("time") { + // Per the Go implementation, the time column is case-insensitive in the + // `WHERE` clause and disregards any postfix type cast operator. + // + // See: https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L5751-L5753 "time".as_expr() + } else if ctx.scope == ExprScope::Projection && name == "time" { + if ctx.is_aggregate { + // In the projection, determine whether the query is projecting the time column + // or binning the time. + if let Some(group_by) = ctx.group_by { + if let Some(dim) = group_by.time_dimension() { + // Not supported until date_bin_gapfill is complete + let fill = ctx.fill.unwrap_or_default(); + if fill != FillClause::None { + return Err(DataFusionError::NotImplemented(format!( + "{fill}" + ))); + } + + let stride = expr_to_df_interval_dt(&dim.interval)?; + let offset = if let Some(offset) = &dim.offset { + duration_expr_to_nanoseconds(offset)? + } else { + 0 + }; + + return Ok(date_bin( + stride, + "time".as_expr(), + lit(ScalarValue::TimestampNanosecond(Some(offset), None)), + )); + } + } + lit_timestamp_nano(0) + } else { + "time".as_expr() + } } else { - match iox_schema.find_index_of(&name) { - Some(idx) => { + match iox_schema.field_by_name(&name) { + Some((col_type, _)) => { let column = name.as_expr(); match opt_dst_type { Some(dst_type) => { - let (col_type, _) = iox_schema.field(idx); let src_type = column_type_to_var_ref_data_type(col_type); if src_type == *dst_type { column @@ -596,7 +754,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { Literal::Duration(_) => { Err(DataFusionError::NotImplemented("duration literal".into())) } - Literal::Regex(re) => match scope { + Literal::Regex(re) => match ctx.scope { // a regular expression in a projection list is unexpected, // as it should have been expanded by the rewriter. ExprScope::Projection => Err(DataFusionError::Internal( @@ -606,46 +764,111 @@ impl<'a> InfluxQLToLogicalPlan<'a> { }, }, IQLExpr::Distinct(_) => Err(DataFusionError::NotImplemented("DISTINCT".into())), - IQLExpr::Call { name, args } => self.call_to_df_expr(scope, name, args, schemas), + IQLExpr::Call { name, args } => self.call_to_df_expr(ctx, name, args, schemas), IQLExpr::Binary { lhs, op, rhs } => { - self.arithmetic_expr_to_df_expr(scope, lhs, *op, rhs, schemas) + self.arithmetic_expr_to_df_expr(ctx, lhs, *op, rhs, schemas) } - IQLExpr::Nested(e) => self.expr_to_df_expr(scope, e, schemas), + IQLExpr::Nested(e) => self.expr_to_df_expr(ctx, e, schemas), } } /// Map an InfluxQL function call to a DataFusion expression. + /// + /// A full list of supported functions available via the [InfluxQL documentation][docs]. + /// + /// > **Note** + /// > + /// > These are not necessarily implemented, and are tracked by the following + /// > issues: + /// > + /// > * + /// > * + /// > * + /// > * + /// > * + /// + /// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/ fn call_to_df_expr( &self, - scope: ExprScope, + ctx: &Context<'_>, name: &str, args: &[IQLExpr], schemas: &Schemas, ) -> Result { if is_scalar_math_function(name) { - self.scalar_math_func_to_df_expr(scope, name, args, schemas) - } else { - match scope { - ExprScope::Projection => Err(DataFusionError::NotImplemented( - "aggregate and selector functions in projection list".into(), - )), - ExprScope::Where => { - if name.eq_ignore_ascii_case("now") { - Err(DataFusionError::NotImplemented("now".into())) - } else { - Err(DataFusionError::External( - format!("invalid function call in condition: {name}").into(), - )) - } + return self.scalar_math_func_to_df_expr(ctx, name, args, schemas); + } + + match ctx.scope { + ExprScope::Where => { + if name.eq_ignore_ascii_case("now") { + Err(DataFusionError::NotImplemented("now".into())) + } else { + Err(DataFusionError::External( + format!("invalid function call in condition: {name}").into(), + )) } } + ExprScope::Projection => self.function_to_df_expr(ctx, name, args, schemas), + } + } + + fn function_to_df_expr( + &self, + ctx: &Context<'_>, + name: &str, + args: &[IQLExpr], + schemas: &Schemas, + ) -> Result { + fn check_arg_count(name: &str, args: &[IQLExpr], count: usize) -> Result<()> { + let got = args.len(); + if got != count { + Err(DataFusionError::Plan(format!( + "invalid number of arguments for {name}: expected {count}, got {got}" + ))) + } else { + Ok(()) + } + } + + match name { + "count" => { + // TODO(sgc): Handle `COUNT DISTINCT` variants + let distinct = false; + + check_arg_count("count", args, 1)?; + let expr = self.expr_to_df_expr(ctx, &args[0], schemas)?; + match &expr { + Expr::Literal(ScalarValue::Null) => Ok(expr), + _ => Ok(Expr::AggregateFunction(expr::AggregateFunction::new( + AggregateFunction::Count, + vec![expr], + distinct, + None, + ))), + } + } + "sum" | "stddev" | "mean" | "median" => { + check_arg_count(name, args, 1)?; + let expr = self.expr_to_df_expr(ctx, &args[0], schemas)?; + match &expr { + Expr::Literal(ScalarValue::Null) => Ok(expr), + _ => Ok(Expr::AggregateFunction(expr::AggregateFunction::new( + AggregateFunction::from_str(name)?, + vec![expr], + false, + None, + ))), + } + } + _ => Err(DataFusionError::Plan(format!("Invalid function '{name}'"))), } } /// Map the InfluxQL scalar function call to a DataFusion scalar function expression. fn scalar_math_func_to_df_expr( &self, - scope: ExprScope, + ctx: &Context<'_>, name: &str, args: &[IQLExpr], schemas: &Schemas, @@ -653,7 +876,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { let fun = BuiltinScalarFunction::from_str(name)?; let args = args .iter() - .map(|e| self.expr_to_df_expr(scope, e, schemas)) + .map(|e| self.expr_to_df_expr(ctx, e, schemas)) .collect::>>()?; Ok(Expr::ScalarFunction { fun, args }) } @@ -661,16 +884,16 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// Map an InfluxQL arithmetic expression to a DataFusion [`Expr`]. fn arithmetic_expr_to_df_expr( &self, - scope: ExprScope, + ctx: &Context<'_>, lhs: &IQLExpr, op: BinaryOperator, rhs: &IQLExpr, schemas: &Schemas, ) -> Result { Ok(binary_expr( - self.expr_to_df_expr(scope, lhs, schemas)?, + self.expr_to_df_expr(ctx, lhs, schemas)?, binary_operator_to_df_operator(op), - self.expr_to_df_expr(scope, rhs, schemas)?, + self.expr_to_df_expr(ctx, rhs, schemas)?, )) } @@ -678,14 +901,18 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// optional InfluxQL conditional expression. fn plan_where_clause( &self, + ctx: &Context<'_>, condition: &Option, plan: LogicalPlan, schemas: &Schemas, - tz: Option, ) -> Result { match condition { Some(where_clause) => { - let filter_expr = self.conditional_to_df_expr(where_clause, schemas, tz)?; + let filter_expr = self.conditional_to_df_expr( + &ctx.with_scope(ExprScope::Where), + where_clause, + schemas, + )?; let filter_expr = rewrite_conditional_expr(filter_expr, schemas)?; let plan = LogicalPlanBuilder::from(plan) .filter(filter_expr)? @@ -698,10 +925,14 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// Generate a list of logical plans for each of the tables references in the `FROM` /// clause. - fn plan_from_tables(&self, from: &FromMeasurementClause) -> Result> { - let mut plans = VecDeque::new(); + fn plan_from_tables( + &self, + from: &FromMeasurementClause, + ) -> Result)>> { + // A list of scans and their initial projections + let mut table_projs = VecDeque::new(); for ms in from.iter() { - let Some(plan) = match ms { + let Some(table_proj) = match ms { MeasurementSelection::Name(qn) => match qn.name { MeasurementName::Name(ref ident) => { self.create_table_ref(normalize_identifier(ident)) @@ -715,22 +946,22 @@ impl<'a> InfluxQLToLogicalPlan<'a> { "subquery in FROM clause".into(), )), }? else { continue }; - plans.push_back(plan); + table_projs.push_back(table_proj); } - Ok(plans) + Ok(table_projs) } /// Create a [LogicalPlan] that refers to the specified `table_name`. /// /// Normally, this functions will not return a `None`, as tables have been matched] /// by the [`rewrite_statement`] function. - fn create_table_ref(&self, table_name: String) -> Result> { + fn create_table_ref(&self, table_name: String) -> Result)>> { Ok(if let Ok(source) = self.s.get_table_provider(&table_name) { let table_ref = TableReference::bare(table_name.to_string()); - Some(project( + Some(( LogicalPlanBuilder::scan(table_ref, source, None)?.build()?, - iter::once(lit_dict(&table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)), - )?) + vec![lit_dict(&table_name).alias(INFLUXQL_MEASUREMENT_COLUMN_NAME)], + )) } else { None }) @@ -768,6 +999,7 @@ fn plan_with_metadata(plan: LogicalPlan, metadata: &InfluxQlMetadata) -> Result< u.schema = make_schema(u.schema, metadata)?; LogicalPlan::Union(u) } + LogicalPlan::EmptyRelation(p) => LogicalPlan::EmptyRelation(p), _ => { return Err(DataFusionError::Internal( "unexpected LogicalPlan".to_owned(), @@ -787,22 +1019,16 @@ fn has_aggregate_exprs(fields: &FieldList) -> bool { }) } -/// Find all the tag columns projected in the `SELECT` from the field list. -fn find_tag_columns<'a, T: FromIterator<&'a str>>(fields: &'a FieldList) -> T { - fields - .iter() - .filter_map(|f| { - if let IQLExpr::VarRef { - name, - data_type: Some(VarRefDataType::Tag), - } = &f.expr - { - Some(name.deref().as_str()) - } else { - None - } - }) - .collect() +/// Find all the columns where the resolved data type +/// is a tag or is [`None`], which is unknown. +fn find_tag_and_unknown_columns(fields: &FieldList) -> impl Iterator { + fields.iter().filter_map(|f| match &f.expr { + IQLExpr::VarRef { + name, + data_type: Some(VarRefDataType::Tag) | None, + } => Some(name.deref().as_str()), + _ => None, + }) } /// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior @@ -834,25 +1060,18 @@ impl<'a> ExprRewriter for FixRegularExpressions<'a> { right, }) => { if let Expr::Column(ref col) = *left { - if let Some(idx) = self.schemas.iox_schema.find_index_of(&col.name) { - let (col_type, _) = self.schemas.iox_schema.field(idx); - match col_type { - InfluxColumnType::Tag => { - // Regular expressions expect to be compared with a Utf8 - let left = Box::new( - left.cast_to(&DataType::Utf8, &self.schemas.df_schema)?, - ); - Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) - } - InfluxColumnType::Field(InfluxFieldType::String) => { - Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) - } - // Any other column type should evaluate to false - _ => Ok(lit(false)), + match self.schemas.iox_schema.field_by_name(&col.name) { + Some((InfluxColumnType::Tag, _)) => { + // Regular expressions expect to be compared with a Utf8 + let left = + Box::new(left.cast_to(&DataType::Utf8, &self.schemas.df_schema)?); + Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) } - } else { - // If the field does not exist, evaluate to false - Ok(lit(false)) + Some((InfluxColumnType::Field(InfluxFieldType::String), _)) => { + Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) + } + // Any other column type should evaluate to false + _ => Ok(lit(false)), } } else { // If this is not a simple column expression, evaluate to false, @@ -902,95 +1121,82 @@ fn normalize_identifier(ident: &Identifier) -> String { ident.deref().clone() } -/// Returns true if the field list contains a `time` column. +/// Find the index of the time column in the fields list. /// /// > **Note** /// > /// > To match InfluxQL, the `time` column must not exist as part of a /// > complex expression. -fn has_time_column(fields: &[Field]) -> bool { +pub(crate) fn find_time_column_index(fields: &[Field]) -> Option { fields .iter() - .any(|f| matches!(&f.expr, IQLExpr::VarRef { name, .. } if name.deref() == "time")) + .find_position( + |f| matches!(&f.expr, IQLExpr::VarRef { name, .. } if name.deref() == "time"), + ) + .map(|(i, _)| i) } -static SCALAR_MATH_FUNCTIONS: Lazy> = Lazy::new(|| { - HashSet::from([ - "abs", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "exp", "log", "ln", "log2", - "log10", "sqrt", "pow", "floor", "ceil", "round", - ]) -}); - /// Returns `true` if `name` is a mathematical scalar function /// supported by InfluxQL. fn is_scalar_math_function(name: &str) -> bool { - SCALAR_MATH_FUNCTIONS.contains(name) -} + static FUNCTIONS: Lazy> = Lazy::new(|| { + HashSet::from([ + "abs", "sin", "cos", "tan", "asin", "acos", "atan", "atan2", "exp", "log", "ln", + "log2", "log10", "sqrt", "pow", "floor", "ceil", "round", + ]) + }); -/// A list of valid aggregate and aggregate-like functions supported by InfluxQL. -/// -/// A full list is available via the [InfluxQL documentation][docs]. -/// -/// > **Note** -/// > -/// > These are not necessarily implemented, and are tracked by the following -/// > issues: -/// > -/// > * -/// > * -/// > * -/// > * -/// > * -/// -/// [docs]: https://docs.influxdata.com/influxdb/v1.8/query_language/functions/ -static AGGREGATE_FUNCTIONS: Lazy> = Lazy::new(|| { - HashSet::from([ - // Scalar-like functions - "cumulative_sum", - "derivative", - "difference", - "elapsed", - "moving_average", - "non_negative_derivative", - "non_negative_difference", - // Selector functions - "bottom", - "first", - "last", - "max", - "min", - "percentile", - "sample", - "top", - // Aggregate functions - "count", - "count", - "integral", - "mean", - "median", - "mode", - "spread", - "stddev", - "sum", - // Prediction functions - "holt_winters", - "holt_winters_with_fit", - // Technical analysis functions - "chande_momentum_oscillator", - "exponential_moving_average", - "double_exponential_moving_average", - "kaufmans_efficiency_ratio", - "kaufmans_adaptive_moving_average", - "triple_exponential_moving_average", - "triple_exponential_derivative", - "relative_strength_index", - ]) -}); + FUNCTIONS.contains(name) +} /// Returns `true` if `name` is an aggregate or aggregate function /// supported by InfluxQL. fn is_aggregate_function(name: &str) -> bool { - AGGREGATE_FUNCTIONS.contains(name) + static FUNCTIONS: Lazy> = Lazy::new(|| { + HashSet::from([ + // Scalar-like functions + "cumulative_sum", + "derivative", + "difference", + "elapsed", + "moving_average", + "non_negative_derivative", + "non_negative_difference", + // Selector functions + "bottom", + "first", + "last", + "max", + "min", + "percentile", + "sample", + "top", + // Aggregate functions + "count", + "count", + "integral", + "mean", + "median", + "mode", + "spread", + "stddev", + "sum", + // Prediction functions + "holt_winters", + "holt_winters_with_fit", + // Technical analysis functions + "chande_momentum_oscillator", + "exponential_moving_average", + "double_exponential_moving_average", + "kaufmans_efficiency_ratio", + "kaufmans_adaptive_moving_average", + "triple_exponential_moving_average", + "triple_exponential_derivative", + "relative_strength_index", + ]) + }); + + FUNCTIONS.contains(name) } /// Returns true if the conditional expression is a single node that @@ -1125,45 +1331,46 @@ mod test { } // validate metadata is empty when there is no group by - let md = metadata("SELECT free FROM disk").unwrap(); + let md = metadata("SELECT bytes_free FROM disk").unwrap(); assert_eq!(md.measurement_column_index, 0); assert!(md.tag_key_columns.is_empty()); - let md = metadata("SELECT free FROM disk, cpu").unwrap(); + let md = metadata("SELECT bytes_free FROM disk, cpu").unwrap(); assert_eq!(md.measurement_column_index, 0); assert!(md.tag_key_columns.is_empty()); - let md = metadata("SELECT free FROM disk GROUP BY device").unwrap(); + let md = metadata("SELECT bytes_free FROM disk GROUP BY device").unwrap(); assert_eq!(md.measurement_column_index, 0); assert_tag_keys!(md, ("device", 2, false)); // validate tag in projection is not included in metadata - let md = - metadata("SELECT cpu, usage_idle, free FROM cpu, disk GROUP BY device").unwrap(); + let md = metadata("SELECT cpu, usage_idle, bytes_free FROM cpu, disk GROUP BY device") + .unwrap(); assert_eq!(md.measurement_column_index, 0); assert_tag_keys!(md, ("device", 2, false)); // validate multiple tags from different measurements - let md = - metadata("SELECT usage_idle, free FROM cpu, disk GROUP BY cpu, device").unwrap(); + let md = metadata("SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, device") + .unwrap(); assert_eq!(md.measurement_column_index, 0); assert_tag_keys!(md, ("cpu", 2, false), ("device", 3, false)); // validate multiple tags from different measurements, and key order is maintained - let md = - metadata("SELECT usage_idle, free FROM cpu, disk GROUP BY device, cpu").unwrap(); + let md = metadata("SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY device, cpu") + .unwrap(); assert_eq!(md.measurement_column_index, 0); assert_tag_keys!(md, ("cpu", 2, false), ("device", 3, false)); // validate that with cpu tag explicitly listed in project, tag-key order is maintained and column index // is valid - let md = metadata("SELECT usage_idle, free, cpu FROM cpu, disk GROUP BY cpu, device") - .unwrap(); + let md = + metadata("SELECT usage_idle, bytes_free, cpu FROM cpu, disk GROUP BY cpu, device") + .unwrap(); assert_eq!(md.measurement_column_index, 0); assert_tag_keys!(md, ("cpu", 5, true), ("device", 2, false)); // validate region tag, shared by both measurements, is still correctly handled let md = metadata( - "SELECT region, usage_idle, free, cpu FROM cpu, disk GROUP BY region, cpu, device", + "SELECT region, usage_idle, bytes_free, cpu FROM cpu, disk GROUP BY region, cpu, device", ) .unwrap(); assert_eq!(md.measurement_column_index, 0); @@ -1179,29 +1386,29 @@ mod test { #[test] fn test_from_zero_to_many() { assert_snapshot!(plan("SELECT host, cpu, device, usage_idle, bytes_used FROM cpu, disk"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu.time ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] + Sort: iox::measurement ASC NULLS LAST, time ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.host AS host, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_used [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.host AS host, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_used [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, disk.host AS host, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, disk.bytes_used AS bytes_used [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, disk.host AS host, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, disk.bytes_used AS bytes_used [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_used:Int64;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); // nonexistent assert_snapshot!(plan("SELECT host, usage_idle FROM non_existent"), @"EmptyRelation []"); assert_snapshot!(plan("SELECT host, usage_idle FROM cpu, non_existent"), @r###" - Sort: cpu.time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); // multiple of same measurement assert_snapshot!(plan("SELECT host, usage_idle FROM cpu, cpu"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu.time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: iox::measurement ASC NULLS LAST, time ASC NULLS LAST, host ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.host AS host, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), host:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); } @@ -1210,16 +1417,16 @@ mod test { fn test_time_range_in_where() { assert_snapshot!( plan("SELECT foo, f64_field FROM data where time > now() - 10s"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: data.time > now() - IntervalMonthDayNano("10000000000") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "### ); assert_snapshot!( plan("SELECT foo, f64_field FROM data where time > '2004-04-09T02:33:45Z'"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: data.time > TimestampNanosecond(1081478025000000000, None) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "### @@ -1231,8 +1438,8 @@ mod test { // time on the right-hand side assert_snapshot!( plan("SELECT foo, f64_field FROM data where now() - 10s < time"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: now() - IntervalMonthDayNano("10000000000") < data.time [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "### @@ -1241,16 +1448,16 @@ mod test { // Regular expression equality tests assert_snapshot!(plan("SELECT foo, f64_field FROM data where foo =~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: CAST(data.foo AS Utf8) ~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); // regular expression for a numeric field is rewritten to `false` assert_snapshot!(plan("SELECT foo, f64_field FROM data where f64_field =~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); @@ -1258,8 +1465,8 @@ mod test { // regular expression for a non-existent field is rewritten to `false` assert_snapshot!( plan("SELECT foo, f64_field FROM data where non_existent =~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "### @@ -1268,16 +1475,16 @@ mod test { // Regular expression inequality tests assert_snapshot!(plan("SELECT foo, f64_field FROM data where foo !~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: CAST(data.foo AS Utf8) !~ Utf8("f") [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); // regular expression for a numeric field is rewritten to `false` assert_snapshot!(plan("SELECT foo, f64_field FROM data where f64_field !~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); @@ -1285,8 +1492,8 @@ mod test { // regular expression for a non-existent field is rewritten to `false` assert_snapshot!( plan("SELECT foo, f64_field FROM data where non_existent !~ /f/"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] Filter: Boolean(false) [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "### @@ -1297,49 +1504,41 @@ mod test { fn test_column_matching_rules() { // Cast between numeric types assert_snapshot!(plan("SELECT f64_field::integer FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, CAST(data.f64_field AS Int64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, CAST(data.f64_field AS Int64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT i64_field::float FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, CAST(data.i64_field AS Float64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, CAST(data.i64_field AS Float64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); // use field selector assert_snapshot!(plan("SELECT bool_field::field FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.bool_field AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.bool_field AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); - // invalid column reverence - assert_snapshot!(plan("SELECT not_exists::tag FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), not_exists:Null;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, NULL AS not_exists [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), not_exists:Null;N] - TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] - "###); - assert_snapshot!(plan("SELECT not_exists::field FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), not_exists:Null;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, NULL AS not_exists [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), not_exists:Null;N] - TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] - "###); + // invalid column reference + assert_snapshot!(plan("SELECT not_exists::tag FROM data"), @"EmptyRelation []"); + assert_snapshot!(plan("SELECT not_exists::field FROM data"), @"EmptyRelation []"); // Returns NULL for invalid casts assert_snapshot!(plan("SELECT f64_field::string FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT f64_field::boolean FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT str_field::boolean FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } @@ -1348,26 +1547,26 @@ mod test { fn test_explain() { assert_snapshot!(plan("EXPLAIN SELECT foo, f64_field FROM data"), @r###" Explain [plan_type:Utf8, plan:Utf8] - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("EXPLAIN VERBOSE SELECT foo, f64_field FROM data"), @r###" Explain [plan_type:Utf8, plan:Utf8] - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("EXPLAIN ANALYZE SELECT foo, f64_field FROM data"), @r###" Analyze [plan_type:Utf8, plan:Utf8] - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("EXPLAIN ANALYZE VERBOSE SELECT foo, f64_field FROM data"), @r###" Analyze [plan_type:Utf8, plan:Utf8] - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } @@ -1376,162 +1575,259 @@ mod test { fn test_select_cast_postfix_operator() { // Float casting assert_snapshot!(plan("SELECT f64_field::float FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, all_types.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, all_types.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT f64_field::unsigned FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:UInt64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.f64_field AS UInt64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:UInt64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:UInt64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.f64_field AS UInt64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT f64_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.f64_field AS Int64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.f64_field AS Int64) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Int64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT f64_field::string FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT f64_field::boolean FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); // Integer casting assert_snapshot!(plan("SELECT i64_field::float FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.i64_field AS Float64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.i64_field AS Float64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Float64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT i64_field::unsigned FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:UInt64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.i64_field AS UInt64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:UInt64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:UInt64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.i64_field AS UInt64) AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT i64_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, all_types.i64_field AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, all_types.i64_field AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Int64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT i64_field::string FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT i64_field::boolean FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), i64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); // Unsigned casting assert_snapshot!(plan("SELECT u64_field::float FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.u64_field AS Float64) AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Float64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.u64_field AS Float64) AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Float64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT u64_field::unsigned FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:UInt64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, all_types.u64_field AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:UInt64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:UInt64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, all_types.u64_field AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:UInt64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT u64_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.u64_field AS Int64) AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.u64_field AS Int64) AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Int64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT u64_field::string FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT u64_field::boolean FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), u64_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); // String casting assert_snapshot!(plan("SELECT str_field::float FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT str_field::unsigned FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT str_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT str_field::string FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Utf8;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, all_types.str_field AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Utf8;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Utf8;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, all_types.str_field AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Utf8;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT str_field::boolean FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), str_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); // Boolean casting assert_snapshot!(plan("SELECT bool_field::float FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT bool_field::unsigned FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT bool_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT bool_field::string FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT bool_field::boolean FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, all_types.bool_field AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, all_types.bool_field AS bool_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), bool_field:Boolean;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); // Validate various projection expressions with casts assert_snapshot!(plan("SELECT f64_field::integer + i64_field + u64_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_u64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, CAST(all_types.f64_field AS Int64) + all_types.i64_field + CAST(all_types.u64_field AS Int64) AS f64_field_i64_field_u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_u64_field:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_u64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, CAST(all_types.f64_field AS Int64) + all_types.i64_field + CAST(all_types.u64_field AS Int64) AS f64_field_i64_field_u64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_u64_field:Int64;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); assert_snapshot!(plan("SELECT f64_field::integer + i64_field + str_field::integer FROM all_types"), @r###" - Sort: all_types.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_str_field:Null;N] - Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time, NULL AS f64_field_i64_field_str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_str_field:Null;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_str_field:Null;N] + Projection: Dictionary(Int32, Utf8("all_types")) AS iox::measurement, all_types.time AS time, NULL AS f64_field_i64_field_str_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field_i64_field_str_field:Null;N] TableScan: all_types [bool_field:Boolean;N, f64_field:Float64;N, i64_field:Int64;N, str_field:Utf8;N, tag0:Dictionary(Int32, Utf8);N, tag1:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), u64_field:UInt64;N] "###); } } - /// Tests to validate InfluxQL `SELECT` statements that utilise aggregate functions. + /// Tests to validate InfluxQL `SELECT` statements that project aggregate functions, such as `COUNT` or `SUM`. mod select_aggregate { use super::*; #[test] - fn test_aggregates_are_not_yet_supported() { - assert_snapshot!(plan("SELECT count(f64_field) FROM data"), @"This feature is not implemented: aggregate functions"); + fn test_single_measurement() { + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY non_existent"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), non_existent:Null;N, count:Int64;N] + Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo"), @r###" + Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, count:Int64;N] + Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + + // The `COUNT(f64_field)` aggregate is only projected ones in the Aggregate and reused in the projection + assert_snapshot!(plan("SELECT COUNT(f64_field), COUNT(f64_field) + COUNT(f64_field), COUNT(f64_field) * 3 FROM data"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count, COUNT(data.f64_field) + COUNT(data.f64_field) AS count_f64_field_count_f64_field, COUNT(data.f64_field) * Int64(3) AS count_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N, count_f64_field_count_f64_field:Int64;N, count_f64_field:Int64;N] + Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + + // non-existent tags are excluded from the Aggregate groupBy and Sort operators + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY foo, non_existent"), @r###" + Sort: foo ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, data.foo AS foo, NULL AS non_existent, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, non_existent:Null;N, count:Int64;N] + Aggregate: groupBy=[[data.foo]], aggr=[[COUNT(data.f64_field)]] [foo:Dictionary(Int32, Utf8);N, COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + + // Fallible + + // Cannot combine aggregate and non-aggregate columns in the projection + assert_snapshot!(plan("SELECT COUNT(f64_field), f64_field FROM data"), @"Error during planning: mixing aggregate and non-aggregate columns is not supported"); + assert_snapshot!(plan("SELECT COUNT(f64_field) + f64_field FROM data"), @"Error during planning: mixing aggregate and non-aggregate columns is not supported"); + } + + #[test] + fn test_single_measurement_group_by_time() { + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(none)"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] + Aggregate: groupBy=[[datebin(IntervalDayTime("10000"), data.time, TimestampNanosecond(0, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + + // supports offset parameter + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s, 5s) FILL(none)"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None);N, count:Int64;N] + Aggregate: groupBy=[[datebin(IntervalDayTime("10000"), data.time, TimestampNanosecond(5000000000, None)) AS time]], aggr=[[COUNT(data.f64_field)]] [time:Timestamp(Nanosecond, None);N, COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + } + + /// These tests validate the planner returns an error when using features that + /// are not implemented. + mod not_implemented { + use super::*; + + #[test] + fn test_with_limit_or_offset() { + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data LIMIT 1"), @r###" + Limit: skip=0, fetch=1 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data OFFSET 1"), @r###" + Limit: skip=1, fetch=None [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, TimestampNanosecond(0, None) AS time, COUNT(data.f64_field) AS count [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), count:Int64;N] + Aggregate: groupBy=[[]], aggr=[[COUNT(data.f64_field)]] [COUNT(data.f64_field):Int64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + "###); + } + + #[test] + fn test_group_by_time_precision() { + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10u) FILL(none)"), @"This feature is not implemented: interval limited to a precision of milliseconds. See https://github.com/influxdata/influxdb_iox/issues/7204"); + } + + #[test] + fn test_single_measurement_group_by_time_gapfill() { + // Default is FILL(null) + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s)"), @"This feature is not implemented: FILL(NULL)"); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(null)"), @"This feature is not implemented: FILL(NULL)"); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(linear)"), @"This feature is not implemented: FILL(LINEAR)"); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(previous)"), @"This feature is not implemented: FILL(PREVIOUS)"); + assert_snapshot!(plan("SELECT COUNT(f64_field) FROM data GROUP BY TIME(10s) FILL(0)"), @"This feature is not implemented: FILL(0)"); + } } } @@ -1544,8 +1840,8 @@ mod test { #[test] fn test_single_measurement() { assert_snapshot!(plan("SELECT f64_field FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT time, f64_field FROM data"), @r###" @@ -1560,28 +1856,28 @@ mod test { TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, f64_field FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, f64_field, i64_field FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field AS f64_field, data.i64_field AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field AS f64_field, data.i64_field AS i64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N, i64_field:Int64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT /^f/ FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.f64_field AS f64_field, data.foo AS foo [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.f64_field AS f64_field, data.foo AS foo [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT * FROM data"), @r###" - Sort: data.time ASC NULLS LAST, bar ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.TIME AS TIME, data.bar AS bar, data.bool_field AS bool_field, data.f64_field AS f64_field, data.foo AS foo, data.i64_field AS i64_field, data.mixedCase AS mixedCase, data.str_field AS str_field, data.with space AS with space [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N] + Sort: time ASC NULLS LAST, bar ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.TIME AS TIME, data.bar AS bar, data.bool_field AS bool_field, data.f64_field AS f64_field, data.foo AS foo, data.i64_field AS i64_field, data.mixedCase AS mixedCase, data.str_field AS str_field, data.with space AS with space [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, with space:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT TIME FROM data"), @r###" - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.TIME AS TIME [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N] + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.TIME AS TIME [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), TIME:Boolean;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); // TIME is a field } @@ -1590,23 +1886,23 @@ mod test { #[test] fn test_simple_arithmetic_in_projection() { assert_snapshot!(plan("SELECT foo, f64_field + f64_field FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field + data.f64_field AS f64_field_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field + data.f64_field AS f64_field_f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field_f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, sin(f64_field) FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, sin(data.f64_field) AS sin [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, sin(data.f64_field) AS sin [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, sin:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, atan2(f64_field, 2) FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, atan2(data.f64_field, Int64(2)) AS atan2 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, atan2(data.f64_field, Int64(2)) AS atan2 [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, atan2:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, f64_field + 0.5 FROM data"), @r###" - Sort: data.time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time, data.foo AS foo, data.f64_field + Float64(0.5) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS time, data.foo AS foo, data.f64_field + Float64(0.5) AS f64_field [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); } @@ -1615,36 +1911,57 @@ mod test { fn test_select_single_measurement_group_by() { // Sort should be cpu, time assert_snapshot!(plan("SELECT usage_idle FROM cpu GROUP BY cpu"), @r###" - Sort: cpu ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.cpu AS cpu, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); // Sort should be cpu, time assert_snapshot!(plan("SELECT cpu, usage_idle FROM cpu GROUP BY cpu"), @r###" - Sort: cpu ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.cpu AS cpu, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); // Sort should be cpu, region, time assert_snapshot!(plan("SELECT usage_idle FROM cpu GROUP BY cpu, region"), @r###" - Sort: cpu ASC NULLS LAST, region ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: cpu ASC NULLS LAST, region ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); // Sort should be cpu, region, time assert_snapshot!(plan("SELECT usage_idle FROM cpu GROUP BY region, cpu"), @r###" - Sort: cpu ASC NULLS LAST, region ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: cpu ASC NULLS LAST, region ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); // Sort should be cpu, time, region assert_snapshot!(plan("SELECT region, usage_idle FROM cpu GROUP BY cpu"), @r###" - Sort: cpu ASC NULLS LAST, cpu.time ASC NULLS LAST, region ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST, region ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.region AS region, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, usage_idle:Float64;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + "###); + + // If a tag specified in a GROUP BY does not exist in the measurement, it should be omitted from the sort + assert_snapshot!(plan("SELECT usage_idle FROM cpu GROUP BY cpu, non_existent"), @r###" + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, non_existent:Null;N, usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, NULL AS non_existent, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, non_existent:Null;N, usage_idle:Float64;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + "###); + + // If a tag specified in a projection does not exist in the measurement, it should be omitted from the sort + assert_snapshot!(plan("SELECT usage_idle, cpu, non_existent FROM cpu GROUP BY cpu"), @r###" + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N, cpu:Dictionary(Int32, Utf8);N, non_existent:Null;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS usage_idle, cpu.cpu AS cpu, NULL AS non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N, cpu:Dictionary(Int32, Utf8);N, non_existent:Null;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + "###); + + // If a non-existent field is included in the GROUP BY and projection, it should not be duplicated + assert_snapshot!(plan("SELECT usage_idle, non_existent FROM cpu GROUP BY cpu, non_existent"), @r###" + Sort: cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N, non_existent:Null;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.cpu AS cpu, cpu.usage_idle AS usage_idle, NULL AS non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Dictionary(Int32, Utf8);N, usage_idle:Float64;N, non_existent:Null;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] "###); } @@ -1652,52 +1969,72 @@ mod test { #[test] fn test_select_multiple_measurements_group_by() { // Sort should be iox::measurement, cpu, time - assert_snapshot!(plan("SELECT usage_idle, free FROM cpu, disk GROUP BY cpu"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + assert_snapshot!(plan("SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY cpu"), @r###" + Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); // Sort should be iox::measurement, cpu, device, time - assert_snapshot!(plan("SELECT usage_idle, free FROM cpu, disk GROUP BY device, cpu"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] - Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] + assert_snapshot!(plan("SELECT usage_idle, bytes_free FROM cpu, disk GROUP BY device, cpu"), @r###" + Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); // Sort should be iox::measurement, cpu, time, device - assert_snapshot!(plan("SELECT device, usage_idle, free FROM cpu, disk GROUP BY cpu"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, cpu.time ASC NULLS LAST, device ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] - Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] + assert_snapshot!(plan("SELECT device, usage_idle, bytes_free FROM cpu, disk GROUP BY cpu"), @r###" + Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, time ASC NULLS LAST, device ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(cpu.cpu AS Utf8) AS cpu, CAST(NULL AS Utf8) AS device, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, free:Null;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(NULL AS Utf8) AS cpu, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), cpu:Utf8;N, device:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); // Sort should be iox::measurement, cpu, device, time - assert_snapshot!(plan("SELECT cpu, usage_idle, free FROM cpu, disk GROUP BY cpu, device"), @r###" - Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, cpu.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, CAST(NULL AS Utf8) AS device, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + assert_snapshot!(plan("SELECT cpu, usage_idle, bytes_free FROM cpu, disk GROUP BY cpu, device"), @r###" + Sort: iox::measurement ASC NULLS LAST, cpu ASC NULLS LAST, device ASC NULLS LAST, time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(NULL AS Utf8) AS device, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); // Sort should be iox::measurement, device, time, cpu - assert_snapshot!(plan("SELECT cpu, usage_idle, free FROM cpu, disk GROUP BY device"), @r###" - Sort: iox::measurement ASC NULLS LAST, device ASC NULLS LAST, cpu.time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] - Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time, CAST(NULL AS Utf8) AS device, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + assert_snapshot!(plan("SELECT cpu, usage_idle, bytes_free FROM cpu, disk GROUP BY device"), @r###" + Sort: iox::measurement ASC NULLS LAST, device ASC NULLS LAST, time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(NULL AS Utf8) AS device, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] - Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, NULL AS free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, free:Null;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] + "###); + + // If a tag specified in a GROUP BY does not exist across all measurements, it should be omitted from the sort + assert_snapshot!(plan("SELECT cpu, usage_idle, bytes_free FROM cpu, disk GROUP BY device, non_existent"), @r###" + Sort: iox::measurement ASC NULLS LAST, device ASC NULLS LAST, time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, non_existent:Null;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, non_existent:Null;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(NULL AS Utf8) AS device, NULL AS non_existent, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, non_existent:Null;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(disk.device AS Utf8) AS device, NULL AS non_existent, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, non_existent:Null;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N] + TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] + "###); + + // If a tag specified in a projection does not exist across all measurements, it should be omitted from the sort + assert_snapshot!(plan("SELECT cpu, usage_idle, bytes_free, non_existent FROM cpu, disk GROUP BY device"), @r###" + Sort: iox::measurement ASC NULLS LAST, device ASC NULLS LAST, time ASC NULLS LAST, cpu ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N, non_existent:Null;N] + Union [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N, non_existent:Null;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, CAST(NULL AS Utf8) AS device, CAST(cpu.cpu AS Utf8) AS cpu, cpu.usage_idle AS usage_idle, CAST(NULL AS Int64) AS bytes_free, NULL AS non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N, non_existent:Null;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + Projection: Dictionary(Int32, Utf8("disk")) AS iox::measurement, disk.time AS time, CAST(disk.device AS Utf8) AS device, CAST(NULL AS Utf8) AS cpu, CAST(NULL AS Float64) AS usage_idle, disk.bytes_free AS bytes_free, NULL AS non_existent [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), device:Utf8;N, cpu:Utf8;N, usage_idle:Float64;N, bytes_free:Int64;N, non_existent:Null;N] TableScan: disk [bytes_free:Int64;N, bytes_used:Int64;N, device:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None)] "###); } diff --git a/iox_query_influxql/src/plan/planner/select.rs b/iox_query_influxql/src/plan/planner/select.rs new file mode 100644 index 0000000000..ceb846dec3 --- /dev/null +++ b/iox_query_influxql/src/plan/planner/select.rs @@ -0,0 +1,141 @@ +use arrow::datatypes::DataType; +use datafusion::common::{DFSchemaRef, DataFusionError, Result}; +use datafusion::logical_expr::utils::find_column_exprs; +use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder}; +use datafusion_util::AsExpr; +use generated_types::influxdata::iox::querier::v1::influx_ql_metadata::TagKeyColumn; +use influxdb_influxql_parser::common::OrderByClause; +use influxdb_influxql_parser::expression::{Expr as IQLExpr, VarRefDataType}; +use influxdb_influxql_parser::select::{Field, SelectStatement}; +use schema::INFLUXQL_MEASUREMENT_COLUMN_NAME; +use std::collections::HashMap; +use std::ops::Deref; + +/// Determines that all [`Expr::Column`] references in `exprs` refer to a +/// column in `columns`. +pub(crate) fn check_exprs_satisfy_columns(columns: &[Expr], exprs: &[Expr]) -> Result<()> { + if !columns.iter().all(|c| matches!(c, Expr::Column(_))) { + return Err(DataFusionError::Internal( + "expected Expr::Column".to_owned(), + )); + } + let column_exprs = find_column_exprs(exprs); + if column_exprs.iter().any(|expr| !columns.contains(expr)) { + return Err(DataFusionError::Plan( + "mixing aggregate and non-aggregate columns is not supported".to_owned(), + )); + } + Ok(()) +} + +pub(super) fn make_tag_key_column_meta( + fields: &[Field], + tag_set: &[&str], + is_projected: &[bool], +) -> Vec { + /// There is always a [INFLUXQL_MEASUREMENT_COLUMN_NAME] and `time` column projected in the LogicalPlan, + /// therefore the start index is 2 for determining the offsets of the + /// tag key columns in the column projection list. + const START_INDEX: usize = 1; + + // Create a map of tag key columns to their respective index in the projection + let index_map = fields + .iter() + .enumerate() + .filter_map(|(index, f)| match &f.expr { + IQLExpr::VarRef { + name, + data_type: Some(VarRefDataType::Tag) | None, + } => Some((name.deref().as_str(), index + START_INDEX)), + _ => None, + }) + .collect::>(); + + // tag_set was previously sorted, so tag_key_columns will be in the correct order + tag_set + .iter() + .zip(is_projected) + .map(|(tag_key, is_projected)| TagKeyColumn { + tag_key: (*tag_key).to_owned(), + column_index: *index_map.get(*tag_key).unwrap() as _, + is_projected: *is_projected, + }) + .collect() +} + +/// Create a plan that sorts the input plan. +/// +/// The ordering of the results is as follows: +/// +/// iox::measurement, [group by tag 0, .., group by tag n], time, [projection tag 0, .., projection tag n] +/// +/// ## NOTE +/// +/// Sort expressions referring to tag keys are always specified in lexicographically ascending order. +pub(super) fn plan_with_sort( + plan: LogicalPlan, + select: &SelectStatement, + group_by_tag_set: &[&str], + projection_tag_set: &[&str], +) -> Result { + // If there are multiple measurements, we need to sort by the measurement column + // NOTE: Ideally DataFusion would maintain the order of the UNION ALL, which would eliminate + // the need to sort by measurement. + // See: https://github.com/influxdata/influxdb_iox/issues/7062 + let mut series_sort = if matches!(plan, LogicalPlan::Union(_)) { + vec![Expr::sort( + INFLUXQL_MEASUREMENT_COLUMN_NAME.as_expr(), + true, + false, + )] + } else { + vec![] + }; + + /// Map the fields to DataFusion [`Expr::Sort`] expressions, excluding those columns that + /// are [`DataType::Null`]'s, as sorting these column types is not supported and unnecessary. + fn map_to_expr<'a>( + schema: &'a DFSchemaRef, + fields: &'a [&str], + ) -> impl Iterator + 'a { + fields + .iter() + .filter(|f| { + if let Ok(df) = schema.field_with_unqualified_name(f) { + *df.data_type() != DataType::Null + } else { + false + } + }) + .map(|f| Expr::sort(f.as_expr(), true, false)) + } + + let schema = plan.schema(); + + if !group_by_tag_set.is_empty() { + // Adding `LIMIT` or `OFFSET` with a `GROUP BY tag, ...` clause is not supported + // + // See: https://github.com/influxdata/influxdb_iox/issues/6920 + if select.offset.is_some() || select.limit.is_some() { + return Err(DataFusionError::NotImplemented( + "GROUP BY combined with LIMIT or OFFSET clause".to_owned(), + )); + } + + series_sort.extend(map_to_expr(schema, group_by_tag_set)); + }; + + series_sort.push(Expr::sort( + "time".as_expr(), + match select.order_by { + // Default behaviour is to sort by time in ascending order if there is no ORDER BY + None | Some(OrderByClause::Ascending) => true, + Some(OrderByClause::Descending) => false, + }, + false, + )); + + series_sort.extend(map_to_expr(schema, projection_tag_set)); + + LogicalPlanBuilder::from(plan).sort(series_sort)?.build() +} diff --git a/iox_query_influxql/src/plan/planner_time_range_expression.rs b/iox_query_influxql/src/plan/planner_time_range_expression.rs index 573dbfb385..0ad1a70301 100644 --- a/iox_query_influxql/src/plan/planner_time_range_expression.rs +++ b/iox_query_influxql/src/plan/planner_time_range_expression.rs @@ -1,5 +1,6 @@ use crate::plan::timestamp::parse_timestamp; use crate::plan::util::binary_operator_to_df_operator; +use arrow::temporal_conversions::MILLISECONDS_IN_DAY; use datafusion::common::{DataFusionError, Result, ScalarValue}; use datafusion::logical_expr::{binary_expr, lit, now, BinaryExpr, Expr as DFExpr, Operator}; use influxdb_influxql_parser::expression::BinaryOperator; @@ -70,6 +71,48 @@ pub(in crate::plan) fn time_range_to_df_expr(expr: &Expr, tz: Option ExprResult { + let v = duration_expr_to_nanoseconds(expr)?; + if v % 1_000_000 != 0 { + Err(DataFusionError::NotImplemented("interval limited to a precision of milliseconds. See https://github.com/influxdata/influxdb_iox/issues/7204".to_owned())) + } else { + let v = v / 1_000_000; + let days = v / MILLISECONDS_IN_DAY; + // keep the sign on `days` and remove it from `millis` + let millis = (v - days * MILLISECONDS_IN_DAY).abs(); + + // It is not possible for an InfluxQL duration to overflow an IntervalDayTime. + // An InfluxQL duration encodes a number of nanoseconds into a 64-bit signed integer, + // which is a maximum of 15,250.2845 days. An IntervalDayTime can encode days + // as a signed 32-bit number. + Ok(lit(ScalarValue::new_interval_dt( + days as i32, + millis as i32, + ))) + } +} + +/// Reduces an InfluxQL duration `expr` to a nanosecond interval. +pub(super) fn duration_expr_to_nanoseconds(expr: &Expr) -> Result { + let df_expr = reduce_expr(expr, None)?; + match df_expr { + DFExpr::Literal(ScalarValue::IntervalMonthDayNano(Some(v))) => Ok(v as i64), + DFExpr::Literal(ScalarValue::Float64(Some(v))) => Ok(v as i64), + DFExpr::Literal(ScalarValue::Int64(Some(v))) => Ok(v), + _ => Err(DataFusionError::Plan("invalid duration expression".into())), + } +} + fn map_expr_err(expr: &Expr) -> impl Fn(DataFusionError) -> DataFusionError + '_ { move |err| { DataFusionError::Plan(format!( @@ -393,6 +436,7 @@ fn parse_timestamp_df_expr(s: &str, tz: Option) -> ExprResult { #[cfg(test)] mod test { use super::*; + use assert_matches::assert_matches; use influxdb_influxql_parser::expression::ConditionalExpression; use test_helpers::assert_error; @@ -545,4 +589,33 @@ mod test { "TimestampNanosecond(1081505100123456789, None)" // 2004-04-09T10:05:00.123456789Z ); } + + #[test] + fn test_expr_to_df_interval_dt() { + fn parse(s: &str) -> ExprResult { + let expr = s + .parse::() + .unwrap() + .expr() + .unwrap() + .clone(); + expr_to_df_interval_dt(&expr) + } + + use ScalarValue::IntervalDayTime; + + assert_matches!(parse("10s").unwrap(), DFExpr::Literal(IntervalDayTime(v)) if IntervalDayTime(v) == ScalarValue::new_interval_dt(0, 10_000)); + assert_matches!(parse("10s + 1d").unwrap(), DFExpr::Literal(IntervalDayTime(v)) if IntervalDayTime(v) == ScalarValue::new_interval_dt(1, 10_000)); + assert_matches!(parse("5d10ms").unwrap(), DFExpr::Literal(IntervalDayTime(v)) if IntervalDayTime(v) == ScalarValue::new_interval_dt(5, 10)); + assert_matches!(parse("-2d10ms").unwrap(), DFExpr::Literal(IntervalDayTime(v)) if IntervalDayTime(v) == ScalarValue::new_interval_dt(-2, 10)); + + // Fallible + + use DataFusionError::NotImplemented; + + // Don't support a precision greater than milliseconds. + // + // See: https://github.com/influxdata/influxdb_iox/issues/7204 + assert_error!(parse("-2d10ns"), NotImplemented(ref s) if s == "interval limited to a precision of milliseconds. See https://github.com/influxdata/influxdb_iox/issues/7204"); + } } diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index a49ef51ba9..2ede32d22f 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -180,9 +180,11 @@ fn has_wildcards(stmt: &SelectStatement) -> (bool, bool) { /// Rewrite the projection list and GROUP BY of the specified `SELECT` statement. /// -/// Wildcards and regular expressions in the `SELECT` projection list and `GROUP BY` are expanded. -/// Any fields with no type specifier are rewritten with the appropriate type, if they exist in the -/// underlying schema. +/// The following transformations are performed: +/// +/// * Wildcards and regular expressions in the `SELECT` projection list and `GROUP BY` are expanded. +/// * Any fields with no type specifier are rewritten with the appropriate type, if they exist in the +/// underlying schema. /// /// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L1185). fn rewrite_field_list(s: &dyn SchemaProvider, stmt: &mut SelectStatement) -> Result<()> { diff --git a/iox_query_influxql/src/plan/test_utils.rs b/iox_query_influxql/src/plan/test_utils.rs index 45e22ac9d6..17fdf416ac 100644 --- a/iox_query_influxql/src/plan/test_utils.rs +++ b/iox_query_influxql/src/plan/test_utils.rs @@ -5,7 +5,7 @@ use crate::plan::SchemaProvider; use datafusion::common::{DataFusionError, Result as DataFusionResult}; use datafusion::datasource::empty::EmptyTable; use datafusion::datasource::provider_as_source; -use datafusion::logical_expr::TableSource; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF, TableSource}; use influxdb_influxql_parser::parse_statements; use influxdb_influxql_parser::select::{Field, SelectStatement}; use influxdb_influxql_parser::statement::Statement; @@ -159,6 +159,14 @@ impl SchemaProvider for MockSchemaProvider { .ok_or_else(|| DataFusionError::Plan(format!("measurement does not exist: {name}"))) } + fn get_function_meta(&self, _name: &str) -> Option> { + None + } + + fn get_aggregate_meta(&self, _name: &str) -> Option> { + None + } + fn table_names(&self) -> Vec<&'_ str> { self.tables .keys() diff --git a/iox_query_influxql/src/plan/util_copy.rs b/iox_query_influxql/src/plan/util_copy.rs new file mode 100644 index 0000000000..94b8bfd2a1 --- /dev/null +++ b/iox_query_influxql/src/plan/util_copy.rs @@ -0,0 +1,337 @@ +// NOTE: This code is copied from DataFusion, as it is not public, +// so all warnings are disabled. +#![allow(warnings)] +#![allow(clippy::all)] +//! A collection of utility functions copied from DataFusion. +//! +//! If these APIs are stabilised and made public, they can be removed from IOx. +//! +//! NOTE +use datafusion::common::{DataFusionError, Result}; +use datafusion::logical_expr::{ + expr::{ + AggregateFunction, Between, BinaryExpr, Case, Cast, Expr, GetIndexedField, GroupingSet, + Like, Sort, TryCast, WindowFunction, + }, + utils::expr_as_column_expr, + LogicalPlan, +}; + +/// Rebuilds an `Expr` as a projection on top of a collection of `Expr`'s. +/// +/// For example, the expression `a + b < 1` would require, as input, the 2 +/// individual columns, `a` and `b`. But, if the base expressions already +/// contain the `a + b` result, then that may be used in lieu of the `a` and +/// `b` columns. +/// +/// This is useful in the context of a query like: +/// +/// SELECT a + b < 1 ... GROUP BY a + b +/// +/// where post-aggregation, `a + b` need not be a projection against the +/// individual columns `a` and `b`, but rather it is a projection against the +/// `a + b` found in the GROUP BY. +/// +/// Source: +pub(crate) fn rebase_expr(expr: &Expr, base_exprs: &[Expr], plan: &LogicalPlan) -> Result { + clone_with_replacement(expr, &|nested_expr| { + if base_exprs.contains(nested_expr) { + Ok(Some(expr_as_column_expr(nested_expr, plan)?)) + } else { + Ok(None) + } + }) +} + +/// Returns a cloned `Expr`, but any of the `Expr`'s in the tree may be +/// replaced/customized by the replacement function. +/// +/// The replacement function is called repeatedly with `Expr`, starting with +/// the argument `expr`, then descending depth-first through its +/// descendants. The function chooses to replace or keep (clone) each `Expr`. +/// +/// The function's return type is `Result>>`, where: +/// +/// * `Ok(Some(replacement_expr))`: A replacement `Expr` is provided; it is +/// swapped in at the particular node in the tree. Any nested `Expr` are +/// not subject to cloning/replacement. +/// * `Ok(None)`: A replacement `Expr` is not provided. The `Expr` is +/// recreated, with all of its nested `Expr`'s subject to +/// cloning/replacement. +/// * `Err(err)`: Any error returned by the function is returned as-is by +/// `clone_with_replacement()`. +/// +/// Source: +fn clone_with_replacement(expr: &Expr, replacement_fn: &F) -> Result +where + F: Fn(&Expr) -> Result>, +{ + let replacement_opt = replacement_fn(expr)?; + + match replacement_opt { + // If we were provided a replacement, use the replacement. Do not + // descend further. + Some(replacement) => Ok(replacement), + // No replacement was provided, clone the node and recursively call + // clone_with_replacement() on any nested expressions. + None => { + match expr { + Expr::AggregateFunction(AggregateFunction { + fun, + args, + distinct, + filter, + }) => Ok(Expr::AggregateFunction(AggregateFunction::new( + fun.clone(), + args.iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + *distinct, + filter.clone(), + ))), + Expr::WindowFunction(WindowFunction { + fun, + args, + partition_by, + order_by, + window_frame, + }) => Ok(Expr::WindowFunction(WindowFunction::new( + fun.clone(), + args.iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + partition_by + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + order_by + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + window_frame.clone(), + ))), + Expr::AggregateUDF { fun, args, filter } => Ok(Expr::AggregateUDF { + fun: fun.clone(), + args: args + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + filter: filter.clone(), + }), + Expr::Alias(nested_expr, alias_name) => Ok(Expr::Alias( + Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + alias_name.clone(), + )), + Expr::Between(Between { + expr, + negated, + low, + high, + }) => Ok(Expr::Between(Between::new( + Box::new(clone_with_replacement(expr, replacement_fn)?), + *negated, + Box::new(clone_with_replacement(low, replacement_fn)?), + Box::new(clone_with_replacement(high, replacement_fn)?), + ))), + Expr::InList { + expr: nested_expr, + list, + negated, + } => Ok(Expr::InList { + expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + list: list + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + negated: *negated, + }), + Expr::BinaryExpr(BinaryExpr { left, right, op }) => { + Ok(Expr::BinaryExpr(BinaryExpr::new( + Box::new(clone_with_replacement(left, replacement_fn)?), + *op, + Box::new(clone_with_replacement(right, replacement_fn)?), + ))) + } + Expr::Like(Like { + negated, + expr, + pattern, + escape_char, + }) => Ok(Expr::Like(Like::new( + *negated, + Box::new(clone_with_replacement(expr, replacement_fn)?), + Box::new(clone_with_replacement(pattern, replacement_fn)?), + *escape_char, + ))), + Expr::ILike(Like { + negated, + expr, + pattern, + escape_char, + }) => Ok(Expr::ILike(Like::new( + *negated, + Box::new(clone_with_replacement(expr, replacement_fn)?), + Box::new(clone_with_replacement(pattern, replacement_fn)?), + *escape_char, + ))), + Expr::SimilarTo(Like { + negated, + expr, + pattern, + escape_char, + }) => Ok(Expr::SimilarTo(Like::new( + *negated, + Box::new(clone_with_replacement(expr, replacement_fn)?), + Box::new(clone_with_replacement(pattern, replacement_fn)?), + *escape_char, + ))), + Expr::Case(case) => Ok(Expr::Case(Case::new( + match &case.expr { + Some(case_expr) => { + Some(Box::new(clone_with_replacement(case_expr, replacement_fn)?)) + } + None => None, + }, + case.when_then_expr + .iter() + .map(|(a, b)| { + Ok(( + Box::new(clone_with_replacement(a, replacement_fn)?), + Box::new(clone_with_replacement(b, replacement_fn)?), + )) + }) + .collect::>>()?, + match &case.else_expr { + Some(else_expr) => { + Some(Box::new(clone_with_replacement(else_expr, replacement_fn)?)) + } + None => None, + }, + ))), + Expr::ScalarFunction { fun, args } => Ok(Expr::ScalarFunction { + fun: fun.clone(), + args: args + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + }), + Expr::ScalarUDF { fun, args } => Ok(Expr::ScalarUDF { + fun: fun.clone(), + args: args + .iter() + .map(|arg| clone_with_replacement(arg, replacement_fn)) + .collect::>>()?, + }), + Expr::Negative(nested_expr) => Ok(Expr::Negative(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::Not(nested_expr) => Ok(Expr::Not(Box::new(clone_with_replacement( + nested_expr, + replacement_fn, + )?))), + Expr::IsNotNull(nested_expr) => Ok(Expr::IsNotNull(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::IsNull(nested_expr) => Ok(Expr::IsNull(Box::new(clone_with_replacement( + nested_expr, + replacement_fn, + )?))), + Expr::IsTrue(nested_expr) => Ok(Expr::IsTrue(Box::new(clone_with_replacement( + nested_expr, + replacement_fn, + )?))), + Expr::IsFalse(nested_expr) => Ok(Expr::IsFalse(Box::new(clone_with_replacement( + nested_expr, + replacement_fn, + )?))), + Expr::IsUnknown(nested_expr) => Ok(Expr::IsUnknown(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::IsNotTrue(nested_expr) => Ok(Expr::IsNotTrue(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::IsNotFalse(nested_expr) => Ok(Expr::IsNotFalse(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::IsNotUnknown(nested_expr) => Ok(Expr::IsNotUnknown(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), + Expr::Cast(Cast { expr, data_type }) => Ok(Expr::Cast(Cast::new( + Box::new(clone_with_replacement(expr, replacement_fn)?), + data_type.clone(), + ))), + Expr::TryCast(TryCast { + expr: nested_expr, + data_type, + }) => Ok(Expr::TryCast(TryCast::new( + Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + data_type.clone(), + ))), + Expr::Sort(Sort { + expr: nested_expr, + asc, + nulls_first, + }) => Ok(Expr::Sort(Sort::new( + Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + *asc, + *nulls_first, + ))), + Expr::Column { .. } + | Expr::OuterReferenceColumn(_, _) + | Expr::Literal(_) + | Expr::ScalarVariable(_, _) + | Expr::Exists { .. } + | Expr::ScalarSubquery(_) => Ok(expr.clone()), + Expr::InSubquery { + expr: nested_expr, + subquery, + negated, + } => Ok(Expr::InSubquery { + expr: Box::new(clone_with_replacement(nested_expr, replacement_fn)?), + subquery: subquery.clone(), + negated: *negated, + }), + Expr::Wildcard => Ok(Expr::Wildcard), + Expr::QualifiedWildcard { .. } => Ok(expr.clone()), + Expr::GetIndexedField(GetIndexedField { key, expr }) => { + Ok(Expr::GetIndexedField(GetIndexedField::new( + Box::new(clone_with_replacement(expr.as_ref(), replacement_fn)?), + key.clone(), + ))) + } + Expr::GroupingSet(set) => match set { + GroupingSet::Rollup(exprs) => Ok(Expr::GroupingSet(GroupingSet::Rollup( + exprs + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + ))), + GroupingSet::Cube(exprs) => Ok(Expr::GroupingSet(GroupingSet::Cube( + exprs + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + ))), + GroupingSet::GroupingSets(lists_of_exprs) => { + let mut new_lists_of_exprs = vec![]; + for exprs in lists_of_exprs { + new_lists_of_exprs.push( + exprs + .iter() + .map(|e| clone_with_replacement(e, replacement_fn)) + .collect::>>()?, + ); + } + Ok(Expr::GroupingSet(GroupingSet::GroupingSets( + new_lists_of_exprs, + ))) + } + }, + Expr::Placeholder { id, data_type } => Ok(Expr::Placeholder { + id: id.clone(), + data_type: data_type.clone(), + }), + } + } + } +} diff --git a/schema/src/lib.rs b/schema/src/lib.rs index 6dd3fa284d..61bb8086fe 100644 --- a/schema/src/lib.rs +++ b/schema/src/lib.rs @@ -282,6 +282,12 @@ impl Schema { ) } + /// Return the InfluxDB data model type, if any, and underlying arrow + /// schema field for the column identified by `name`. + pub fn field_by_name(&self, name: &str) -> Option<(InfluxColumnType, &ArrowField)> { + self.find_index_of(name).map(|index| self.field(index)) + } + /// Find the index of the column with the given name, if any. pub fn find_index_of(&self, name: &str) -> Option { self.inner.index_of(name).ok() diff --git a/test_helpers_end_to_end/src/client.rs b/test_helpers_end_to_end/src/client.rs index 818f1795cb..eb844d321b 100644 --- a/test_helpers_end_to_end/src/client.rs +++ b/test_helpers_end_to_end/src/client.rs @@ -11,6 +11,7 @@ use influxdb_iox_client::{ }; use mutable_batch_lp::lines_to_batches; use mutable_batch_pb::encode::encode_write; +use std::fmt::Display; use tonic::IntoRequest; /// Writes the line protocol to the write_base/api/v2/write endpoint (typically on the router) @@ -129,11 +130,11 @@ pub async fn run_sql( /// /// Use [`try_run_influxql`] if you want to check the error manually. pub async fn run_influxql( - influxql: impl Into, + influxql: impl Into + Clone + Display, namespace: impl Into, querier_connection: Connection, ) -> Vec { - try_run_influxql(influxql, namespace, querier_connection) + try_run_influxql(influxql.clone(), namespace, querier_connection) .await - .expect("Error executing influxql query") + .unwrap_or_else(|_| panic!("Error executing InfluxQL query: {influxql}")) } diff --git a/test_helpers_end_to_end/src/snapshot_comparison.rs b/test_helpers_end_to_end/src/snapshot_comparison.rs index 44a4bc3c92..83861d6d22 100644 --- a/test_helpers_end_to_end/src/snapshot_comparison.rs +++ b/test_helpers_end_to_end/src/snapshot_comparison.rs @@ -1,12 +1,14 @@ mod queries; -use crate::{run_influxql, run_sql, snapshot_comparison::queries::TestQueries, MiniCluster}; +use crate::{run_sql, snapshot_comparison::queries::TestQueries, try_run_influxql, MiniCluster}; +use arrow_flight::error::FlightError; use snafu::{OptionExt, ResultExt, Snafu}; use std::{ fmt::{Display, Formatter}, fs, path::{Path, PathBuf}, }; +use tonic::Code; use self::queries::Query; @@ -227,12 +229,21 @@ async fn run_query( .await } Language::InfluxQL => { - run_influxql( + match try_run_influxql( query_text, cluster.namespace(), cluster.querier().querier_grpc_connection(), ) .await + { + Ok(results) => results, + Err(influxdb_iox_client::flight::Error::ArrowFlightError(FlightError::Tonic( + status, + ))) if status.code() == Code::InvalidArgument => { + return Ok(vec![status.message().to_owned()]) + } + Err(err) => return Ok(vec![err.to_string()]), + } } };