From b9024582b0957620e8225c9e722457841edfbe7d Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 28 Apr 2023 09:35:56 +1000 Subject: [PATCH 001/119] fix: Ensure InfluxQL internal errors have a distinct message Closes #7606 --- iox_query_influxql/src/plan/error.rs | 19 ++++++++++++++++++- iox_query_influxql/src/plan/rewriter.rs | 10 +++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/iox_query_influxql/src/plan/error.rs b/iox_query_influxql/src/plan/error.rs index c6797ce5c9..6b12535ec9 100644 --- a/iox_query_influxql/src/plan/error.rs +++ b/iox_query_influxql/src/plan/error.rs @@ -24,7 +24,11 @@ pub(crate) mod map { #[derive(Debug, Error)] enum PlannerError { /// An unexpected error that represents a bug in IOx. - #[error("internal: {0}")] + /// + /// The message is prefixed with `InfluxQL internal error: `, + /// which may be used by clients to identify internal InfluxQL + /// errors. + #[error("InfluxQL internal error: {0}")] Internal(String), } @@ -42,4 +46,17 @@ pub(crate) mod map { pub(crate) fn not_implemented(feature: impl Into) -> DataFusionError { DataFusionError::NotImplemented(feature.into()) } + + #[cfg(test)] + mod test { + use crate::plan::error::map::PlannerError; + + #[test] + fn test_planner_error_display() { + // The InfluxQL internal error: + assert!(PlannerError::Internal("****".to_owned()) + .to_string() + .starts_with("InfluxQL internal error: ")) + } + } } diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index 08a05f9bc0..da8f0f7f7c 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -1530,15 +1530,15 @@ mod test { let sel = parse_select("SELECT count(distinct('foo')) FROM cpu"); assert_error!(select_statement_info(&sel), DataFusionError::Plan(ref s) if s == "expected field argument in distinct()"); let sel = parse_select("SELECT count(distinct foo) FROM cpu"); - assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected distinct clause in count"); + assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected distinct clause in count"); // Test rules for math functions let sel = parse_select("SELECT abs(usage_idle) FROM cpu"); select_statement_info(&sel).unwrap(); let sel = parse_select("SELECT abs(*) + ceil(foo) FROM cpu"); - assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard"); + assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard"); let sel = parse_select("SELECT abs(/f/) + ceil(foo) FROM cpu"); - assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected regex"); + assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected regex"); // Fallible @@ -1560,11 +1560,11 @@ mod test { // wildcard expansion is not supported in binary expressions for aggregates let sel = parse_select("SELECT count(*) + count(foo) FROM cpu"); - assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard or regex"); + assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex"); // regex expansion is not supported in binary expressions let sel = parse_select("SELECT sum(/foo/) + count(foo) FROM cpu"); - assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "internal: unexpected wildcard or regex"); + assert_error!(select_statement_info(&sel), DataFusionError::External(ref s) if s.to_string() == "InfluxQL internal error: unexpected wildcard or regex"); // aggregate functions require a field reference let sel = parse_select("SELECT sum(1) FROM cpu"); From 482221a7d4d29b7dd90acfb1487059a11ab9ec41 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 28 Apr 2023 09:56:07 +1000 Subject: [PATCH 002/119] chore: Redundant attribute --- iox_query_influxql/src/plan/rewriter.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index da8f0f7f7c..220426f79f 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -1,5 +1,3 @@ -#![allow(dead_code)] - use crate::plan::expr_type_evaluator::evaluate_type; use crate::plan::field::{field_by_name, field_name}; use crate::plan::field_mapper::{field_and_dimensions, FieldTypeMap, TagSet}; From 89143a72fa427f21a771735df9e8d72d6a1fa749 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Fri, 28 Apr 2023 10:08:30 +1000 Subject: [PATCH 003/119] chore: Remove schema::Schema dependency Subquery support cannot rely on IOx schema, as this metadata does not propagate through DataFusion `LogicalPlan` nodes. --- iox_query_influxql/src/plan/planner.rs | 20 ++++++++-------- .../src/plan/planner_rewrite_expression.rs | 24 +++++++------------ iox_query_influxql/src/plan/util.rs | 23 +++++++----------- iox_query_influxql/src/plan/var_ref.rs | 21 ++++++++++------ 4 files changed, 42 insertions(+), 46 deletions(-) diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index 6e3a16ab58..1cc424ffd9 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -11,7 +11,7 @@ use crate::plan::rewriter::{ rewrite_statement, select_statement_info, ProjectionType, SelectStatementInfo, }; use crate::plan::util::{binary_operator_to_df_operator, rebase_expr, Schemas}; -use crate::plan::var_ref::{column_type_to_var_ref_data_type, var_ref_data_type_to_data_type}; +use crate::plan::var_ref::{data_type_to_var_ref_data_type, var_ref_data_type_to_data_type}; use crate::plan::{error, planner_rewrite_expression}; use arrow::array::{StringBuilder, StringDictionaryBuilder}; use arrow::datatypes::{DataType, Field as ArrowField, Int32Type, Schema as ArrowSchema}; @@ -599,11 +599,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { // Exclude tags that do not exist in the current table schema. group_by_exprs.extend(group_by_tag_set.iter().filter_map(|name| { - if schemas - .iox_schema - .field_by_name(name) - .map_or(false, |(dt, _)| dt == InfluxColumnType::Tag) - { + if schemas.is_tag_field(name) { Some(name.as_expr()) } else { None @@ -991,7 +987,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { /// Map an InfluxQL [`IQLExpr`] to a DataFusion [`Expr`]. fn expr_to_df_expr(&self, ctx: &Context<'_>, iql: &IQLExpr, schemas: &Schemas) -> Result { - let iox_schema = &schemas.iox_schema; + let schema = &schemas.df_schema; match iql { // rewriter is expected to expand wildcard expressions IQLExpr::Wildcard(_) => error::internal("unexpected wildcard in projection"), @@ -1009,12 +1005,16 @@ impl<'a> InfluxQLToLogicalPlan<'a> { "time".as_expr() } (ExprScope::Projection, "time") => "time".as_expr(), - (_, name) => match iox_schema.field_by_name(name) { - Some((col_type, _)) => { + (_, name) => match schema + .fields_with_unqualified_name(name) + .first() + .map(|f| f.data_type().clone()) + { + Some(col_type) => { let column = name.as_expr(); + let src_type = data_type_to_var_ref_data_type(col_type)?; match opt_dst_type { Some(dst_type) => { - let src_type = column_type_to_var_ref_data_type(col_type); if src_type == *dst_type { column } else if src_type.is_numeric_type() diff --git a/iox_query_influxql/src/plan/planner_rewrite_expression.rs b/iox_query_influxql/src/plan/planner_rewrite_expression.rs index b0b4dc12fb..08a0189a7b 100644 --- a/iox_query_influxql/src/plan/planner_rewrite_expression.rs +++ b/iox_query_influxql/src/plan/planner_rewrite_expression.rs @@ -132,7 +132,6 @@ use datafusion::logical_expr::{ binary_expr, cast, coalesce, lit, BinaryExpr, Expr, ExprSchemable, Operator, }; use datafusion::optimizer::utils::{conjunction, disjunction}; -use schema::{InfluxColumnType, InfluxFieldType}; /// Perform a series of passes to rewrite `expr` in compliance with InfluxQL behavior /// in an effort to ensure the query executes without error. @@ -770,19 +769,17 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> { op: op @ (Operator::RegexMatch | Operator::RegexNotMatch), right, }) => { - if let Expr::Column(ref col) = *left { - match self.schemas.iox_schema.field_by_name(&col.name) { - Some((InfluxColumnType::Tag, _)) => { + Ok(if let Expr::Column(ref col) = *left { + match self.schemas.df_schema.field_from_column(col)?.data_type() { + DataType::Dictionary(..) => { // Regular expressions expect to be compared with a Utf8 let left = Box::new(left.cast_to(&DataType::Utf8, &self.schemas.df_schema)?); - Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) - } - Some((InfluxColumnType::Field(InfluxFieldType::String), _)) => { - Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) + Expr::BinaryExpr(BinaryExpr { left, op, right }) } + DataType::Utf8 => Expr::BinaryExpr(BinaryExpr { left, op, right }), // Any other column type should evaluate to false - _ => Ok(lit(false)), + _ => lit(false), } } else { // If this is not a simple column expression, evaluate to false, @@ -798,8 +795,8 @@ impl<'a> TreeNodeRewriter for FixRegularExpressions<'a> { // Reference example: // // * `SELECT f64 FROM m0 WHERE tag0 = '' + tag0` - Ok(lit(false)) - } + lit(false) + }) } _ => Ok(expr), } @@ -829,10 +826,7 @@ mod test { .build() .expect("schema failed"); let df_schema: DFSchemaRef = Arc::clone(iox_schema.inner()).to_dfschema_ref().unwrap(); - Schemas { - df_schema, - iox_schema, - } + Schemas { df_schema } } #[test] diff --git a/iox_query_influxql/src/plan/util.rs b/iox_query_influxql/src/plan/util.rs index 5ddd9d5894..afd0d507d1 100644 --- a/iox_query_influxql/src/plan/util.rs +++ b/iox_query_influxql/src/plan/util.rs @@ -1,6 +1,6 @@ use crate::plan::{error, util_copy}; use arrow::datatypes::{DataType, TimeUnit}; -use datafusion::common::{DFSchema, DFSchemaRef, Result}; +use datafusion::common::{DFSchemaRef, Result}; use datafusion::logical_expr::utils::expr_as_column_expr; use datafusion::logical_expr::{lit, Expr, ExprSchemable, LogicalPlan, Operator}; use datafusion::scalar::ScalarValue; @@ -9,7 +9,6 @@ use influxdb_influxql_parser::literal::Number; use influxdb_influxql_parser::string::Regex; use query_functions::clean_non_meta_escapes; use query_functions::coalesce_struct::coalesce_struct; -use schema::Schema; use std::sync::Arc; pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Operator { @@ -25,29 +24,25 @@ pub(in crate::plan) fn binary_operator_to_df_operator(op: BinaryOperator) -> Ope } } -/// Return the IOx schema for the specified DataFusion schema. -pub(in crate::plan) fn schema_from_df(schema: &DFSchema) -> Result { - let s: Arc = Arc::new(schema.into()); - s.try_into().map_err(|err| { - error::map::internal(format!( - "unable to convert DataFusion schema to IOx schema: {err}" - )) - }) -} - /// Container for both the DataFusion and equivalent IOx schema. pub(in crate::plan) struct Schemas { pub(in crate::plan) df_schema: DFSchemaRef, - pub(in crate::plan) iox_schema: Schema, } impl Schemas { pub(in crate::plan) fn new(df_schema: &DFSchemaRef) -> Result { Ok(Self { df_schema: Arc::clone(df_schema), - iox_schema: schema_from_df(df_schema)?, }) } + + /// Returns `true` if the field `name` is a tag type. + pub(super) fn is_tag_field(&self, name: &str) -> bool { + self.df_schema + .fields() + .iter() + .any(|f| f.name() == name && matches!(f.data_type(), DataType::Dictionary(..))) + } } /// Sanitize an InfluxQL regular expression and create a compiled [`regex::Regex`]. diff --git a/iox_query_influxql/src/plan/var_ref.rs b/iox_query_influxql/src/plan/var_ref.rs index cb5f41db16..0e658072d3 100644 --- a/iox_query_influxql/src/plan/var_ref.rs +++ b/iox_query_influxql/src/plan/var_ref.rs @@ -1,6 +1,8 @@ +use crate::plan::error; use arrow::datatypes::DataType; +use datafusion::common::Result; use influxdb_influxql_parser::expression::VarRefDataType; -use schema::{InfluxColumnType, InfluxFieldType}; +use schema::InfluxFieldType; pub(crate) fn var_ref_data_type_to_data_type(v: VarRefDataType) -> Option { match v { @@ -25,12 +27,17 @@ pub(crate) fn field_type_to_var_ref_data_type(v: InfluxFieldType) -> VarRefDataT } } -/// Maps an [`InfluxColumnType`] to a [`VarRefDataType`]. -pub(crate) fn column_type_to_var_ref_data_type(v: InfluxColumnType) -> VarRefDataType { - match v { - InfluxColumnType::Tag => VarRefDataType::Tag, - InfluxColumnType::Field(ft) => field_type_to_var_ref_data_type(ft), - InfluxColumnType::Timestamp => VarRefDataType::Timestamp, +/// Maps an Arrow [`DataType`] to a [`VarRefDataType`]. +pub(crate) fn data_type_to_var_ref_data_type(dt: DataType) -> Result { + match dt { + DataType::Dictionary(..) => Ok(VarRefDataType::Tag), + DataType::Timestamp(..) => Ok(VarRefDataType::Timestamp), + DataType::Utf8 => Ok(VarRefDataType::String), + DataType::Int64 => Ok(VarRefDataType::Integer), + DataType::UInt64 => Ok(VarRefDataType::Unsigned), + DataType::Float64 => Ok(VarRefDataType::Float), + DataType::Boolean => Ok(VarRefDataType::Boolean), + _ => error::internal(format!("unable to map Arrow type {dt} to VarRefDataType")), } } From 24378bd460ccd167f4717cf198c57be4ccdfe3f1 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Wed, 3 May 2023 07:47:14 +1000 Subject: [PATCH 004/119] chore: `time` is always present and at first position of projection --- .../src/expression/arithmetic.rs | 16 +++ iox_query_influxql/src/plan/rewriter.rs | 127 +++++++++++++++--- 2 files changed, 126 insertions(+), 17 deletions(-) diff --git a/influxdb_influxql_parser/src/expression/arithmetic.rs b/influxdb_influxql_parser/src/expression/arithmetic.rs index 3c3e784c4c..f36037f006 100644 --- a/influxdb_influxql_parser/src/expression/arithmetic.rs +++ b/influxdb_influxql_parser/src/expression/arithmetic.rs @@ -194,6 +194,22 @@ impl Display for Expr { } } +/// Traits to help creating InfluxQL [`Expr`]s containing +/// a [`VarRef`]. +pub trait AsVarRefExpr { + /// Creates an InfluxQL [`VarRef`] expression. + fn to_var_ref_expr(&self) -> Expr; +} + +impl AsVarRefExpr for str { + fn to_var_ref_expr(&self) -> Expr { + Expr::VarRef(VarRef { + name: self.into(), + data_type: None, + }) + } +} + /// Specifies the data type of a wildcard (`*`) when using the `::` operator. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum WildcardType { diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index 220426f79f..227e3ead89 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -5,7 +5,9 @@ use crate::plan::{error, util, SchemaProvider}; use datafusion::common::{DataFusionError, Result}; use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName}; use influxdb_influxql_parser::expression::walk::{walk_expr, walk_expr_mut}; -use influxdb_influxql_parser::expression::{Call, Expr, VarRef, VarRefDataType, WildcardType}; +use influxdb_influxql_parser::expression::{ + AsVarRefExpr, Call, Expr, VarRef, VarRefDataType, WildcardType, +}; use influxdb_influxql_parser::functions::is_scalar_math_function; use influxdb_influxql_parser::identifier::Identifier; use influxdb_influxql_parser::literal::Literal; @@ -18,6 +20,73 @@ use std::borrow::Borrow; use std::collections::{HashMap, HashSet}; use std::ops::{ControlFlow, Deref}; +/// Recursively rewrite the specified [`SelectStatement`] by performing a series of passes +/// to validate and normalize the statement. +pub(crate) fn rewrite_statement( + s: &dyn SchemaProvider, + q: &SelectStatement, +) -> Result { + let mut stmt = q.clone(); + from_expand_wildcards(s, &mut stmt)?; + field_list_expand_wildcards(s, &mut stmt)?; + from_drop_empty(s, &mut stmt); + field_list_normalize_time(&mut stmt); + field_list_rewrite_aliases(&mut stmt.fields)?; + + Ok(stmt) +} + +/// Ensure the time field is added to all projections, +/// and is moved to the first position, which is a requirement +/// for InfluxQL compatibility. +fn field_list_normalize_time(stmt: &mut SelectStatement) { + fn normalize_time(stmt: &mut SelectStatement, is_subquery: bool) { + let mut fields = stmt.fields.take(); + + if let Some(f) = match fields + .iter() + .find_position( + |f| matches!(&f.expr, Expr::VarRef(VarRef { name, .. }) if name.deref() == "time"), + ) + .map(|(i, _)| i) + { + Some(0) => None, + Some(idx) => Some(fields.remove(idx)), + None => Some(Field { + expr: "time".to_var_ref_expr(), + alias: None, + }), + } { + fields.insert(0, f) + } + + let f = &mut fields[0]; + + // time aliases in subqueries is ignored + if f.alias.is_none() || is_subquery { + f.alias = Some("time".into()) + } + + if let Expr::VarRef(VarRef { + ref mut data_type, .. + }) = f.expr + { + *data_type = Some(VarRefDataType::Timestamp); + } + + stmt.fields.replace(fields); + } + + normalize_time(stmt, false); + + for stmt in stmt.from.iter_mut().filter_map(|ms| match ms { + MeasurementSelection::Subquery(stmt) => Some(stmt), + _ => None, + }) { + normalize_time(stmt, true) + } +} + /// Recursively expand the `from` clause of `stmt` and any subqueries. fn from_expand_wildcards(s: &dyn SchemaProvider, stmt: &mut SelectStatement) -> Result<()> { let mut new_from = Vec::new(); @@ -535,21 +604,6 @@ fn field_list_rewrite_aliases(field_list: &mut FieldList) -> Result<()> { Ok(()) } -/// Recursively rewrite the specified [`SelectStatement`], expanding any wildcards or regular expressions -/// found in the projection list, `FROM` clause or `GROUP BY` clause. -pub(crate) fn rewrite_statement( - s: &dyn SchemaProvider, - q: &SelectStatement, -) -> Result { - let mut stmt = q.clone(); - from_expand_wildcards(s, &mut stmt)?; - field_list_expand_wildcards(s, &mut stmt)?; - from_drop_empty(s, &mut stmt); - field_list_rewrite_aliases(&mut stmt.fields)?; - - Ok(stmt) -} - /// Check the length of the arguments slice is within /// the expected bounds. macro_rules! check_exp_args { @@ -1265,13 +1319,52 @@ pub(crate) fn select_statement_info(q: &SelectStatement) -> Result Date: Wed, 3 May 2023 09:23:15 +1000 Subject: [PATCH 005/119] chore: add additional tests --- iox_query_influxql/src/plan/rewriter.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index 227e3ead89..07721395bc 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -1345,6 +1345,7 @@ mod test { "SELECT time::timestamp AS time, foo, bar FROM cpu" ); + // Maintains alias for time column let mut sel = parse_select("SELECT time as ts, foo, bar FROM cpu"); field_list_normalize_time(&mut sel); assert_eq!( @@ -1354,7 +1355,7 @@ mod test { // subqueries - // adds time to to first position + // adds time to to first position of root and subquery let mut sel = parse_select("SELECT foo FROM (SELECT foo, bar FROM cpu)"); field_list_normalize_time(&mut sel); assert_eq!( @@ -1362,7 +1363,15 @@ mod test { "SELECT time::timestamp AS time, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)" ); - // TODO(sgc): add remaining subquery tests + // Removes and ignores alias of time column within subquery, ignores alias in root and adds time column + // + // Whilst confusing, this matching InfluxQL behaviour + let mut sel = parse_select("SELECT ts, foo FROM (SELECT time as ts, foo, bar FROM cpu)"); + field_list_normalize_time(&mut sel); + assert_eq!( + sel.to_string(), + "SELECT time::timestamp AS time, ts, foo FROM (SELECT time::timestamp AS time, foo, bar FROM cpu)" + ); } #[test] From 19ea80390a1aa873d4b65eb49701724562c18912 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Wed, 3 May 2023 11:04:54 +1000 Subject: [PATCH 006/119] chore: ignore time column when processing SELECT --- .../cases/in/issue_6112.influxql.expected | 22 +++--- iox_query_influxql/src/plan/planner.rs | 26 +++---- iox_query_influxql/src/plan/rewriter.rs | 70 ++++++++++--------- 3 files changed, 56 insertions(+), 62 deletions(-) diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected index 9581b93e23..c69c7137eb 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected +++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected @@ -70,17 +70,17 @@ name: m0 +---------------------+------+-------+ -- InfluxQL: SELECT f64, tag0, time FROM m0; name: m0 -+------+-------+---------------------+ -| f64 | tag0 | time | -+------+-------+---------------------+ -| 10.1 | val00 | 2022-10-31T02:00:00 | -| 11.3 | val01 | 2022-10-31T02:00:00 | -| 10.4 | val02 | 2022-10-31T02:00:00 | -| 21.2 | val00 | 2022-10-31T02:00:10 | -| 18.9 | val00 | 2022-10-31T02:00:10 | -| 11.2 | val00 | 2022-10-31T02:00:20 | -| 19.2 | val00 | 2022-10-31T02:00:30 | -+------+-------+---------------------+ ++---------------------+------+-------+ +| time | f64 | tag0 | ++---------------------+------+-------+ +| 2022-10-31T02:00:00 | 10.1 | val00 | +| 2022-10-31T02:00:00 | 11.3 | val01 | +| 2022-10-31T02:00:00 | 10.4 | val02 | +| 2022-10-31T02:00:10 | 21.2 | val00 | +| 2022-10-31T02:00:10 | 18.9 | val00 | +| 2022-10-31T02:00:20 | 11.2 | val00 | +| 2022-10-31T02:00:30 | 19.2 | val00 | ++---------------------+------+-------+ -- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0; name: m0 +---------------------+------+-------+-----+---------+ diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index 1cc424ffd9..e5ed74fe67 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -59,7 +59,7 @@ use influxdb_influxql_parser::{ expression::Expr as IQLExpr, identifier::Identifier, literal::Literal, - select::{Field, FieldList, FromMeasurementClause, MeasurementSelection, SelectStatement}, + select::{Field, FromMeasurementClause, MeasurementSelection, SelectStatement}, statement::Statement, }; use iox_query::config::{IoxConfigExt, MetadataCutoff}; @@ -284,18 +284,10 @@ impl<'a> InfluxQLToLogicalPlan<'a> { .with_timezone(select.timezone) .with_group_by_fill(select); - // The `time` column is always present in the result set - let mut fields = if find_time_column_index(&select.fields).is_none() { - vec![Field { - expr: IQLExpr::VarRef(VarRef { - name: "time".into(), - data_type: Some(VarRefDataType::Timestamp), - }), - alias: Some("time".into()), - }] - } else { - vec![] - }; + // Skip the `time` column + let fields_no_time = &select.fields[1..]; + // always start with the time column + let mut fields = vec![select.fields.first().cloned().unwrap()]; // group_by_tag_set : a list of tag columns specified in the GROUP BY clause // projection_tag_set : a list of tag columns specified exclusively in the SELECT projection @@ -304,7 +296,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { let (group_by_tag_set, projection_tag_set, is_projected) = if let Some(group_by) = &select.group_by { let mut tag_columns = - find_tag_and_unknown_columns(&select.fields).collect::>(); + find_tag_and_unknown_columns(fields_no_time).collect::>(); // Find the list of tag keys specified in the `GROUP BY` clause, and // whether any of the tag keys are also projected in the SELECT list. @@ -344,13 +336,13 @@ impl<'a> InfluxQLToLogicalPlan<'a> { is_projected, ) } else { - let tag_columns = find_tag_and_unknown_columns(&select.fields) + let tag_columns = find_tag_and_unknown_columns(fields_no_time) .sorted() .collect::>(); (vec![], tag_columns, vec![]) }; - fields.extend(select.fields.iter().cloned()); + fields.extend(fields_no_time.iter().cloned()); // Build the first non-empty plan let plan = { @@ -2095,7 +2087,7 @@ fn is_aggregate_field(f: &Field) -> bool { /// Find all the columns where the resolved data type /// is a tag or is [`None`], which is unknown. -fn find_tag_and_unknown_columns(fields: &FieldList) -> impl Iterator { +fn find_tag_and_unknown_columns(fields: &[Field]) -> impl Iterator { fields.iter().filter_map(|f| match &f.expr { IQLExpr::VarRef(VarRef { name, diff --git a/iox_query_influxql/src/plan/rewriter.rs b/iox_query_influxql/src/plan/rewriter.rs index 07721395bc..9042cc04e9 100644 --- a/iox_query_influxql/src/plan/rewriter.rs +++ b/iox_query_influxql/src/plan/rewriter.rs @@ -779,6 +779,8 @@ impl FieldChecker { impl FieldChecker { fn check_expr(&mut self, e: &Expr) -> Result<()> { match e { + // The `time` column is ignored + Expr::VarRef(VarRef { name, .. }) if name.deref() == "time" => Ok(()), Expr::VarRef(_) => { self.has_non_aggregate_fields = true; Ok(()) @@ -1679,7 +1681,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_user::float AS usage_user FROM cpu" + "SELECT time::timestamp AS time, usage_user::float AS usage_user FROM cpu" ); // Duplicate columns do not have conflicting aliases @@ -1687,7 +1689,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu" + "SELECT time::timestamp AS time, usage_user::float AS usage_user, usage_user::float AS usage_user_1 FROM cpu" ); // Multiple aliases with no conflicts @@ -1695,21 +1697,21 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu" + "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user FROM cpu" ); // Multiple aliases with conflicts let stmt = parse_select("SELECT usage_user as usage_user_1, usage_user, usage_user, usage_user as usage_user_2, usage_user, usage_user_2 FROM cpu"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); - assert_eq!(stmt.to_string(), "SELECT usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu"); + assert_eq!(stmt.to_string(), "SELECT time::timestamp AS time, usage_user::float AS usage_user_1, usage_user::float AS usage_user, usage_user::float AS usage_user_3, usage_user::float AS usage_user_2, usage_user::float AS usage_user_4, usage_user_2 AS usage_user_2_1 FROM cpu"); // Only include measurements with at least one field projection let stmt = parse_select("SELECT usage_idle FROM cpu, disk"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle FROM cpu" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu" ); // Rewriting FROM clause @@ -1719,7 +1721,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio" + "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_read::integer AS bytes_read FROM disk, diskio" ); // Regex matches multiple measurement, but only one has a matching field @@ -1727,7 +1729,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer AS bytes_free FROM disk" + "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM disk" ); // Exact, no match @@ -1747,14 +1749,14 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu" + "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu" ); let stmt = parse_select("SELECT * FROM cpu, disk"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk" + "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, cpu::tag AS cpu, device::tag AS device, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk" ); // Regular expression selects fields from multiple measurements @@ -1762,7 +1764,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk" + "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free, bytes_used::integer AS bytes_used, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu, disk" ); // Selective wildcard for tags @@ -1770,7 +1772,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu" + "SELECT time::timestamp AS time, cpu::tag AS cpu, host::tag AS host, region::tag AS region, usage_idle::float AS usage_idle FROM cpu" ); // Selective wildcard for tags only should not select any measurements @@ -1783,7 +1785,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu" ); // Mixed fields and wildcards @@ -1791,7 +1793,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, cpu::tag AS cpu, host::tag AS host, region::tag AS region FROM cpu" ); // GROUP BY expansion @@ -1800,14 +1802,14 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY host" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY host" ); let stmt = parse_select("SELECT usage_idle FROM cpu GROUP BY *"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu GROUP BY cpu, host, region" ); // Does not include tags in projection when expanded in GROUP BY @@ -1815,7 +1817,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region" ); // Does include explicitly listed tags in projection @@ -1823,7 +1825,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region" + "SELECT time::timestamp AS time, host::tag AS host, usage_idle::float AS usage_idle, usage_system::float AS usage_system, usage_user::float AS usage_user FROM cpu GROUP BY cpu, host, region" ); // Fallible @@ -1840,7 +1842,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle FROM (SELECT usage_idle::float FROM cpu)" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM (SELECT time::timestamp AS time, usage_idle::float FROM cpu)" ); // Subquery, regex, match @@ -1848,7 +1850,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer AS bytes_free FROM (SELECT bytes_free::integer, bytes_read::integer FROM disk, diskio)" + "SELECT time::timestamp AS time, bytes_free::integer AS bytes_free FROM (SELECT time::timestamp AS time, bytes_free::integer, bytes_read::integer FROM disk, diskio)" ); // Subquery, exact, no match @@ -1866,7 +1868,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT usage_system::float + usage_idle::float FROM cpu)" + "SELECT time::timestamp AS time, usage_system_usage_idle::float AS usage_system_usage_idle FROM (SELECT time::timestamp AS time, usage_system::float + usage_idle::float FROM cpu)" ); // Subquery, no fields projected should be dropped @@ -1874,7 +1876,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT usage_idle::float AS usage_idle FROM cpu" + "SELECT time::timestamp AS time, usage_idle::float AS usage_idle FROM cpu" ); // Outer query are permitted to project tags only, as long as there are other fields @@ -1883,7 +1885,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT cpu::tag AS cpu FROM (SELECT cpu::tag, usage_system::float FROM cpu)" + "SELECT time::timestamp AS time, cpu::tag AS cpu FROM (SELECT time::timestamp AS time, cpu::tag, usage_system::float FROM cpu)" ); // Outer FROM should be empty, as the subquery does not project any fields @@ -1896,7 +1898,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk" + "SELECT time::timestamp AS time, bytes_free::integer + bytes_used::integer AS bytes_free_bytes_used FROM disk" ); // Unary expressions @@ -1904,7 +1906,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT -1 * bytes_free::integer AS bytes_free FROM disk" + "SELECT time::timestamp AS time, -1 * bytes_free::integer AS bytes_free FROM disk" ); // DISTINCT clause @@ -1914,14 +1916,14 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(distinct(bytes_free::integer)) AS count FROM disk" + "SELECT time::timestamp AS time, count(distinct(bytes_free::integer)) AS count FROM disk" ); let stmt = parse_select("SELECT DISTINCT bytes_free FROM disk"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT distinct(bytes_free::integer) AS \"distinct\" FROM disk" + "SELECT time::timestamp AS time, distinct(bytes_free::integer) AS \"distinct\" FROM disk" ); // Call expressions @@ -1930,7 +1932,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(field_i64::integer) AS count FROM temp_01" + "SELECT time::timestamp AS time, count(field_i64::integer) AS count FROM temp_01" ); // Duplicate aggregate columns @@ -1938,14 +1940,14 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01" + "SELECT time::timestamp AS time, count(field_i64::integer) AS count, count(field_i64::integer) AS count_1 FROM temp_01" ); let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(field_f64::float) AS count FROM temp_01" + "SELECT time::timestamp AS time, count(field_f64::float) AS count FROM temp_01" ); // Expands all fields @@ -1953,7 +1955,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01" + "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_str::string) AS count_field_str, count(field_u64::unsigned) AS count_field_u64, count(shared_field0::float) AS count_shared_field0 FROM temp_01" ); // Expands matching fields @@ -1961,7 +1963,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01" + "SELECT time::timestamp AS time, count(field_f64::float) AS count_field_f64, count(field_i64::integer) AS count_field_i64, count(field_u64::unsigned) AS count_field_u64 FROM temp_01" ); // Expands only numeric fields @@ -1969,14 +1971,14 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01" + "SELECT time::timestamp AS time, sum(field_f64::float) AS sum_field_f64, sum(field_i64::integer) AS sum_field_i64, sum(field_u64::unsigned) AS sum_field_u64, sum(shared_field0::float) AS sum_shared_field0 FROM temp_01" ); let stmt = parse_select("SELECT * FROM merge_00, merge_01"); let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01" + "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1, col1::float AS col1, col1::tag AS col1_1, col2::string AS col2, col3::string AS col3 FROM merge_00, merge_01" ); // This should only select merge_01, as col0 is a tag in merge_00 @@ -1984,7 +1986,7 @@ mod test { let stmt = rewrite_statement(&namespace, &stmt).unwrap(); assert_eq!( stmt.to_string(), - "SELECT col0::float AS col0, col0::tag AS col0_1 FROM merge_01" + "SELECT time::timestamp AS time, col0::float AS col0, col0::tag AS col0_1 FROM merge_01" ); // Fallible cases From 43baecbb1a142b5ccd6e1d4efd53f77648a39dc4 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Wed, 3 May 2023 12:04:24 +1000 Subject: [PATCH 007/119] chore: handle aliased time column in sort expression --- .../query_tests2/cases/in/issue_6112.influxql | 8 ++- .../cases/in/issue_6112.influxql.expected | 26 ++++++++++ iox_query_influxql/src/plan/planner.rs | 50 +++++++++++++++---- iox_query_influxql/src/plan/planner/select.rs | 20 -------- 4 files changed, 74 insertions(+), 30 deletions(-) diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql index 2af3ffb2ff..b9d94dfeb4 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql +++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql @@ -24,9 +24,15 @@ SELECT /64|tag0/ FROM m0; -- Projection specific tags and fields SELECT f64, tag0 FROM m0; --- Explicitly select time column +-- Explicitly select time column, should appear in first column SELECT f64, tag0, time FROM m0; +-- Alias time column +SELECT f64, tag0, time as timestamp FROM m0; + +-- Alias field and tag columns +SELECT f64 as f, tag0 as t FROM m0; + -- arithmetic operators SELECT f64, f64 * 2, i64, i64 + i64 FROM m0; diff --git a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected index c69c7137eb..f813926124 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected +++ b/influxdb_iox/tests/query_tests2/cases/in/issue_6112.influxql.expected @@ -81,6 +81,32 @@ name: m0 | 2022-10-31T02:00:20 | 11.2 | val00 | | 2022-10-31T02:00:30 | 19.2 | val00 | +---------------------+------+-------+ +-- InfluxQL: SELECT f64, tag0, time as timestamp FROM m0; +name: m0 ++---------------------+------+-------+ +| timestamp | f64 | tag0 | ++---------------------+------+-------+ +| 2022-10-31T02:00:00 | 10.1 | val00 | +| 2022-10-31T02:00:00 | 11.3 | val01 | +| 2022-10-31T02:00:00 | 10.4 | val02 | +| 2022-10-31T02:00:10 | 21.2 | val00 | +| 2022-10-31T02:00:10 | 18.9 | val00 | +| 2022-10-31T02:00:20 | 11.2 | val00 | +| 2022-10-31T02:00:30 | 19.2 | val00 | ++---------------------+------+-------+ +-- InfluxQL: SELECT f64 as f, tag0 as t FROM m0; +name: m0 ++---------------------+------+-------+ +| time | f | t | ++---------------------+------+-------+ +| 2022-10-31T02:00:00 | 10.1 | val00 | +| 2022-10-31T02:00:00 | 11.3 | val01 | +| 2022-10-31T02:00:00 | 10.4 | val02 | +| 2022-10-31T02:00:10 | 21.2 | val00 | +| 2022-10-31T02:00:10 | 18.9 | val00 | +| 2022-10-31T02:00:20 | 11.2 | val00 | +| 2022-10-31T02:00:30 | 19.2 | val00 | ++---------------------+------+-------+ -- InfluxQL: SELECT f64, f64 * 2, i64, i64 + i64 FROM m0; name: m0 +---------------------+------+-------+-----+---------+ diff --git a/iox_query_influxql/src/plan/planner.rs b/iox_query_influxql/src/plan/planner.rs index e5ed74fe67..6ee3d756af 100644 --- a/iox_query_influxql/src/plan/planner.rs +++ b/iox_query_influxql/src/plan/planner.rs @@ -1,8 +1,7 @@ mod select; use crate::plan::planner::select::{ - check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, - plan_with_sort, ToSortExpr, + check_exprs_satisfy_columns, fields_to_exprs_no_nulls, make_tag_key_column_meta, plan_with_sort, }; use crate::plan::planner_time_range_expression::{ duration_expr_to_nanoseconds, expr_to_df_interval_dt, time_range_to_df_expr, @@ -34,7 +33,7 @@ use datafusion::logical_expr::{ use datafusion::prelude::{cast, sum, when, Column}; use datafusion_util::{lit_dict, AsExpr}; use generated_types::influxdata::iox::querier::v1::InfluxQlMetadata; -use influxdb_influxql_parser::common::{LimitClause, OffsetClause}; +use influxdb_influxql_parser::common::{LimitClause, OffsetClause, OrderByClause}; use influxdb_influxql_parser::explain::{ExplainOption, ExplainStatement}; use influxdb_influxql_parser::expression::walk::walk_expr; use influxdb_influxql_parser::expression::{ @@ -419,9 +418,26 @@ impl<'a> InfluxQLToLogicalPlan<'a> { // The UNION operator indicates the result set produces multiple tables or measurements. let is_multiple_measurements = matches!(plan, LogicalPlan::Union(_)); + // the sort planner node must refer to the time column using + // the alias that was specified + let time_alias = fields[0] + .alias + .as_ref() + .map(|id| id.deref().as_str()) + .unwrap_or("time"); + + let time_sort_expr = time_alias.as_expr().sort( + match select.order_by { + // Default behaviour is to sort by time in ascending order if there is no ORDER BY + None | Some(OrderByClause::Ascending) => true, + Some(OrderByClause::Descending) => false, + }, + false, + ); + let plan = plan_with_sort( plan, - vec![select.order_by.to_sort_expr()], + vec![time_sort_expr.clone()], is_multiple_measurements, &group_by_tag_set, &projection_tag_set, @@ -431,7 +447,7 @@ impl<'a> InfluxQLToLogicalPlan<'a> { plan, select.offset, select.limit, - vec![select.order_by.to_sort_expr()], + vec![time_sort_expr], is_multiple_measurements, &group_by_tag_set, &projection_tag_set, @@ -2649,6 +2665,23 @@ mod test { mod select { use super::*; + #[test] + fn test_time_column() { + // validate time column is explicitly projected + assert_snapshot!(plan("SELECT usage_idle, time FROM cpu"), @r###" + Sort: time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS time, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), usage_idle:Float64;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + "###); + + // validate time column may be aliased + assert_snapshot!(plan("SELECT usage_idle, time AS timestamp FROM cpu"), @r###" + Sort: timestamp ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), usage_idle:Float64;N] + Projection: Dictionary(Int32, Utf8("cpu")) AS iox::measurement, cpu.time AS timestamp, cpu.usage_idle AS usage_idle [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), usage_idle:Float64;N] + TableScan: cpu [cpu:Dictionary(Int32, Utf8);N, host:Dictionary(Int32, Utf8);N, region:Dictionary(Int32, Utf8);N, time:Timestamp(Nanosecond, None), usage_idle:Float64;N, usage_system:Float64;N, usage_user:Float64;N] + "###); + } + /// Tests for the `DISTINCT` clause and `DISTINCT` function #[test] fn test_distinct() { @@ -3477,10 +3510,9 @@ mod test { TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT time as timestamp, f64_field FROM data"), @r###" - Projection: iox::measurement, timestamp, f64_field [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N] - Sort: data.time ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N, time:Timestamp(Nanosecond, None)] - Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS timestamp, data.f64_field AS f64_field, data.time [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N, time:Timestamp(Nanosecond, None)] - TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] + Sort: timestamp ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N] + Projection: Dictionary(Int32, Utf8("data")) AS iox::measurement, data.time AS timestamp, data.f64_field AS f64_field [iox::measurement:Dictionary(Int32, Utf8), timestamp:Timestamp(Nanosecond, None), f64_field:Float64;N] + TableScan: data [TIME:Boolean;N, bar:Dictionary(Int32, Utf8);N, bool_field:Boolean;N, f64_field:Float64;N, foo:Dictionary(Int32, Utf8);N, i64_field:Int64;N, mixedCase:Float64;N, str_field:Utf8;N, time:Timestamp(Nanosecond, None), with space:Float64;N] "###); assert_snapshot!(plan("SELECT foo, f64_field FROM data"), @r###" Sort: time ASC NULLS LAST, foo ASC NULLS LAST [iox::measurement:Dictionary(Int32, Utf8), time:Timestamp(Nanosecond, None), foo:Dictionary(Int32, Utf8);N, f64_field:Float64;N] diff --git a/iox_query_influxql/src/plan/planner/select.rs b/iox_query_influxql/src/plan/planner/select.rs index 0f03ca39c1..dd568c8983 100644 --- a/iox_query_influxql/src/plan/planner/select.rs +++ b/iox_query_influxql/src/plan/planner/select.rs @@ -5,7 +5,6 @@ use datafusion::logical_expr::utils::find_column_exprs; use datafusion::logical_expr::{Expr, LogicalPlan, LogicalPlanBuilder}; use datafusion_util::AsExpr; use generated_types::influxdata::iox::querier::v1::influx_ql_metadata::TagKeyColumn; -use influxdb_influxql_parser::common::OrderByClause; use influxdb_influxql_parser::expression::{Expr as IQLExpr, VarRef, VarRefDataType}; use influxdb_influxql_parser::select::Field; use schema::INFLUXQL_MEASUREMENT_COLUMN_NAME; @@ -121,25 +120,6 @@ pub(super) fn plan_with_sort( LogicalPlanBuilder::from(plan).sort(series_sort)?.build() } -/// Trait to convert the receiver to a [`Expr::Sort`] expression. -pub(super) trait ToSortExpr { - /// Create a sort expression. - fn to_sort_expr(&self) -> Expr; -} - -impl ToSortExpr for Option { - fn to_sort_expr(&self) -> Expr { - "time".as_expr().sort( - match self { - // Default behaviour is to sort by time in ascending order if there is no ORDER BY - None | Some(OrderByClause::Ascending) => true, - Some(OrderByClause::Descending) => false, - }, - false, - ) - } -} - /// Map the fields to DataFusion [`Expr::Column`] expressions, excluding those columns that /// are [`DataType::Null`]'s. pub(super) fn fields_to_exprs_no_nulls<'a>( From 231e0f48ab82e0705cc97db3a2c4e445643f8c55 Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Wed, 3 May 2023 14:46:51 +0200 Subject: [PATCH 008/119] test: add test for InfluxQL md queries w/ `FROM ""` (#7728) See https://github.com/influxdata/idpe/issues/17559 . Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- .../cases/in/influxql_metadata.influxql | 3 +++ .../cases/in/influxql_metadata.influxql.expected | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql index c8a44f8033..0cdf0f0048 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql +++ b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql @@ -36,6 +36,7 @@ SHOW FIELD KEYS FROM cpu,disk,cpu; SHOW FIELD KEYS FROM /m.*/; SHOW FIELD KEYS FROM /d\isk/; SHOW FIELD KEYS FROM does_not_exist; +SHOW FIELD KEYS FROM ""; -- unimplemented features in `SHOW FIELD KEYS` SHOW FIELD KEYS ON my_db; @@ -59,6 +60,7 @@ SHOW TAG VALUES FROM m1,m0,m1 WITH KEY = "tag0"; SHOW TAG VALUES FROM /m.*/ WITH KEY = "tag0"; SHOW TAG VALUES FROM /d\isk/ WITH KEY = "device"; SHOW TAG VALUES FROM does_not_exist WITH KEY = "tag0"; +SHOW TAG VALUES FROM "" WITH KEY = "tag0"; SHOW TAG VALUES WITH KEY = "tt_tag"; SHOW TAG VALUES WITH KEY = "tt_tag" WHERE time >= '1990-01-01T00:00:00Z'; SHOW TAG VALUES WITH KEY = "tt_tag" WHERE time >= '2022-10-31T02:00:00Z'; @@ -83,6 +85,7 @@ SHOW TAG KEYS FROM cpu,disk,cpu; SHOW TAG KEYS FROM /m.*/; SHOW TAG KEYS FROM /d\isk/; SHOW TAG KEYS FROM does_not_exist; +SHOW TAG KEYS FROM ""; SHOW TAG KEYS FROM time_test WHERE time >= '1990-01-01T00:00:00Z'; SHOW TAG KEYS FROM time_test WHERE time >= '2022-10-31T02:00:00Z'; SHOW TAG KEYS FROM time_test WHERE time >= '1970-01-01T01:00:00Z'; diff --git a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected index 1351c6e3ec..015ee6c829 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected +++ b/influxdb_iox/tests/query_tests2/cases/in/influxql_metadata.influxql.expected @@ -408,6 +408,11 @@ name: disk | fieldKey | fieldType | +----------+-----------+ +----------+-----------+ +-- InfluxQL: SHOW FIELD KEYS FROM ""; ++----------+-----------+ +| fieldKey | fieldType | ++----------+-----------+ ++----------+-----------+ -- InfluxQL: SHOW FIELD KEYS ON my_db; Error while planning query: This feature is not implemented: SHOW FIELD KEYS ON -- InfluxQL: SHOW FIELD KEYS FROM x.my_db; @@ -923,6 +928,11 @@ name: disk | key | value | +-----+-------+ +-----+-------+ +-- InfluxQL: SHOW TAG VALUES FROM "" WITH KEY = "tag0"; ++-----+-------+ +| key | value | ++-----+-------+ ++-----+-------+ -- InfluxQL: SHOW TAG VALUES WITH KEY = "tt_tag"; name: time_test +--------+-------------------+ @@ -1284,6 +1294,11 @@ name: disk | tagKey | +--------+ +--------+ +-- InfluxQL: SHOW TAG KEYS FROM ""; ++--------+ +| tagKey | ++--------+ ++--------+ -- InfluxQL: SHOW TAG KEYS FROM time_test WHERE time >= '1990-01-01T00:00:00Z'; name: time_test +--------------------------+ From dfa184e296ec92091e721dc579f81abfb44c6001 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 27 Feb 2023 14:12:21 -0500 Subject: [PATCH 009/119] fix: Make ingester UUID an expected, required field of IngesterPartition --- querier/src/ingester/mod.rs | 44 ++++++++++++++-------------------- querier/src/table/mod.rs | 26 +++++++------------- querier/src/table/test_util.rs | 2 +- 3 files changed, 28 insertions(+), 44 deletions(-) diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs index 4344d8d84c..5339f2fbb0 100644 --- a/querier/src/ingester/mod.rs +++ b/querier/src/ingester/mod.rs @@ -567,16 +567,10 @@ impl IngesterStreamDecoder { // columns that the sort key must cover. let partition_sort_key = None; - let ingester_uuid = if md.ingester_uuid.is_empty() { - // Using the write buffer path, no UUID specified - None - } else { - Some( - Uuid::parse_str(&md.ingester_uuid).context(IngesterUuidSnafu { - ingester_uuid: md.ingester_uuid, - })?, - ) - }; + let ingester_uuid = + Uuid::parse_str(&md.ingester_uuid).context(IngesterUuidSnafu { + ingester_uuid: md.ingester_uuid, + })?; let partition = IngesterPartition::new( ingester_uuid, @@ -767,17 +761,15 @@ impl IngesterConnection for IngesterConnectionImpl { /// more than one IngesterPartition for each table the ingester knows about. #[derive(Debug, Clone)] pub struct IngesterPartition { - /// If using ingester2/rpc write path, the ingester UUID will be present and will identify - /// whether this ingester has restarted since the last time it was queried or not. - /// - /// When we fully switch over to always using the RPC write path, the `Option` in this type can - /// be removed. - ingester_uuid: Option, + /// The ingester UUID that identifies whether this ingester has restarted since the last time + /// it was queried or not, which affects whether we can compare the + /// `completed_persistence_count` with a previous count for this ingester to know if we need + /// to refresh the catalog cache or not. + ingester_uuid: Uuid, partition_id: PartitionId, - /// If using ingester2/rpc write path, this will be the number of Parquet files this ingester - /// UUID has persisted for this partition. + /// The number of Parquet files this ingester UUID has persisted for this partition. completed_persistence_count: u64, /// Maximum sequence number of parquet files the ingester has @@ -795,7 +787,7 @@ impl IngesterPartition { /// `RecordBatches` into the correct types #[allow(clippy::too_many_arguments)] pub fn new( - ingester_uuid: Option, + ingester_uuid: Uuid, partition_id: PartitionId, completed_persistence_count: u64, parquet_max_sequence_number: Option, @@ -860,7 +852,7 @@ impl IngesterPartition { Ok(self) } - pub(crate) fn ingester_uuid(&self) -> Option { + pub(crate) fn ingester_uuid(&self) -> Uuid { self.ingester_uuid } @@ -1200,7 +1192,7 @@ mod tests { assert_eq!(p.partition_id.get(), 1); assert_eq!(p.parquet_max_sequence_number, None); assert_eq!(p.chunks.len(), 0); - assert_eq!(p.ingester_uuid.unwrap(), ingester_uuid); + assert_eq!(p.ingester_uuid, ingester_uuid); assert_eq!(p.completed_persistence_count, 5); } @@ -1540,7 +1532,7 @@ mod tests { assert_eq!(partitions.len(), 3); let p1 = &partitions[0]; - assert_eq!(p1.ingester_uuid.unwrap(), ingester_uuid1); + assert_eq!(p1.ingester_uuid, ingester_uuid1); assert_eq!(p1.completed_persistence_count, 0); assert_eq!(p1.partition_id.get(), 1); assert_eq!( @@ -1549,7 +1541,7 @@ mod tests { ); let p2 = &partitions[1]; - assert_eq!(p2.ingester_uuid.unwrap(), ingester_uuid1); + assert_eq!(p2.ingester_uuid, ingester_uuid1); assert_eq!(p2.completed_persistence_count, 42); assert_eq!(p2.partition_id.get(), 2); assert_eq!( @@ -1558,7 +1550,7 @@ mod tests { ); let p3 = &partitions[2]; - assert_eq!(p3.ingester_uuid.unwrap(), ingester_uuid2); + assert_eq!(p3.ingester_uuid, ingester_uuid2); assert_eq!(p3.completed_persistence_count, 9000); assert_eq!(p3.partition_id.get(), 3); assert_eq!( @@ -1824,7 +1816,7 @@ mod tests { let parquet_max_sequence_number = None; // Construct a partition and ensure it doesn't error let ingester_partition = IngesterPartition::new( - Some(ingester_uuid), + ingester_uuid, PartitionId::new(1), 0, parquet_max_sequence_number, @@ -1853,7 +1845,7 @@ mod tests { let parquet_max_sequence_number = None; let err = IngesterPartition::new( - Some(ingester_uuid), + ingester_uuid, PartitionId::new(1), 0, parquet_max_sequence_number, diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs index ddc8981693..90b489e76a 100644 --- a/querier/src/table/mod.rs +++ b/querier/src/table/mod.rs @@ -441,15 +441,13 @@ impl QuerierTable { // therefore needs to refresh its view of the catalog. fn collect_persisted_file_counts( capacity: usize, - partitions: impl Iterator, u64)>, + partitions: impl Iterator, ) -> HashMap { partitions.fold( HashMap::with_capacity(capacity), |mut map, (uuid, count)| { - if let Some(uuid) = uuid { - let sum = map.entry(uuid).or_default(); - *sum += count; - } + let sum = map.entry(uuid).or_default(); + *sum += count; map }, ) @@ -481,19 +479,13 @@ mod tests { "Expected output to be empty, instead was: {output:?}" ); - // If there's no UUIDs, don't count anything - let input = [(None, 10)]; + let uuid1 = Uuid::new_v4(); + let uuid2 = Uuid::new_v4(); + let input = [(uuid1, 20), (uuid1, 22), (uuid2, 30)]; let output = collect_persisted_file_counts(input.len(), input.into_iter()); - assert!( - output.is_empty(), - "Expected output to be empty, instead was: {output:?}" - ); - - let uuid = Uuid::new_v4(); - let input = [(Some(uuid), 20), (Some(uuid), 22), (None, 10)]; - let output = collect_persisted_file_counts(input.len(), input.into_iter()); - assert_eq!(output.len(), 1); - assert_eq!(*output.get(&uuid).unwrap(), 42); + assert_eq!(output.len(), 2); + assert_eq!(*output.get(&uuid1).unwrap(), 42); + assert_eq!(*output.get(&uuid2).unwrap(), 30); } #[tokio::test] diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs index ecf7f40873..f1b61b2554 100644 --- a/querier/src/table/test_util.rs +++ b/querier/src/table/test_util.rs @@ -107,7 +107,7 @@ impl IngesterPartitionBuilder { let data = self.lp.iter().map(|lp| lp_to_record_batch(lp)).collect(); IngesterPartition::new( - Some(Uuid::new_v4()), + Uuid::new_v4(), self.partition.partition.id, 0, parquet_max_sequence_number, From 621caab2e92efb71dc50cd78d202511d2f45c9d3 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Mon, 27 Feb 2023 15:03:31 -0500 Subject: [PATCH 010/119] fix: Remove unused parquet_max_sequence_number metadata --- .../src/components/parquet_file_sink/mock.rs | 5 +- .../parquet_file_sink/object_store.rs | 6 +- compactor2_test_utils/src/simulator.rs | 5 +- data_types/src/lib.rs | 23 -- docs/ingester_querier_protocol.md | 70 +++---- garbage_collector/src/objectstore/checker.rs | 3 +- .../iox/catalog/v1/parquet_file.proto | 4 +- .../iox/ingester/v1/parquet_metadata.proto | 6 +- .../influxdata/iox/ingester/v1/query.proto | 19 +- .../aggregate_tsm_schema/update_catalog.rs | 4 - ingester2/src/persist/mod.rs | 7 +- ingester2/src/persist/worker.rs | 3 +- ingester2/src/server/grpc/query.rs | 16 +- ingester2/src/test_util.rs | 1 - iox_catalog/src/interface.rs | 16 +- iox_catalog/src/kafkaless_transition.rs | 16 +- iox_catalog/src/mem.rs | 4 +- iox_catalog/src/postgres.rs | 45 ++-- iox_catalog/src/sqlite.rs | 51 ++--- iox_tests/src/builders.rs | 4 +- iox_tests/src/catalog.rs | 17 +- parquet_file/src/metadata.rs | 12 +- parquet_file/src/serialize.rs | 3 +- parquet_file/src/storage.rs | 3 +- parquet_file/tests/metadata.rs | 9 +- querier/src/cache/parquet_file.rs | 4 +- querier/src/ingester/mod.rs | 198 ++---------------- querier/src/namespace/query_access.rs | 9 - querier/src/parquet/creation.rs | 1 - querier/src/parquet/mod.rs | 11 +- querier/src/table/mod.rs | 34 +-- .../src/table/state_reconciler/interface.rs | 20 +- querier/src/table/test_util.rs | 16 +- service_grpc_catalog/src/lib.rs | 7 +- service_grpc_object_store/src/lib.rs | 5 +- 35 files changed, 138 insertions(+), 519 deletions(-) diff --git a/compactor2/src/components/parquet_file_sink/mock.rs b/compactor2/src/components/parquet_file_sink/mock.rs index f920270573..3166910e48 100644 --- a/compactor2/src/components/parquet_file_sink/mock.rs +++ b/compactor2/src/components/parquet_file_sink/mock.rs @@ -4,7 +4,7 @@ use std::{ }; use async_trait::async_trait; -use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp}; +use data_types::{ColumnSet, CompactionLevel, ParquetFileParams, Timestamp}; use datafusion::{ arrow::{datatypes::SchemaRef, record_batch::RecordBatch}, error::DataFusionError, @@ -72,7 +72,6 @@ impl ParquetFileSink for MockParquetFileSink { table_id: partition.table.id, partition_id: partition.partition_id, object_store_id: Uuid::from_u128(guard.len() as u128), - max_sequence_number: SequenceNumber::new(0), min_time: Timestamp::new(0), max_time: Timestamp::new(0), file_size_bytes: 1, @@ -168,7 +167,6 @@ mod tests { table_id: TableId::new(3), partition_id: PartitionId::new(1), object_store_id: Uuid::from_u128(2), - max_sequence_number: SequenceNumber::new(0), min_time: Timestamp::new(0), max_time: Timestamp::new(0), file_size_bytes: 1, @@ -231,7 +229,6 @@ mod tests { table_id: TableId::new(3), partition_id: PartitionId::new(1), object_store_id: Uuid::from_u128(0), - max_sequence_number: SequenceNumber::new(0), min_time: Timestamp::new(0), max_time: Timestamp::new(0), file_size_bytes: 1, diff --git a/compactor2/src/components/parquet_file_sink/object_store.rs b/compactor2/src/components/parquet_file_sink/object_store.rs index c6102f25e0..16af1d3b96 100644 --- a/compactor2/src/components/parquet_file_sink/object_store.rs +++ b/compactor2/src/components/parquet_file_sink/object_store.rs @@ -1,7 +1,7 @@ use std::{fmt::Display, sync::Arc}; use async_trait::async_trait; -use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber}; +use data_types::{CompactionLevel, ParquetFileParams}; use datafusion::{error::DataFusionError, physical_plan::SendableRecordBatchStream}; use iox_time::{Time, TimeProvider}; use parquet_file::{ @@ -15,9 +15,6 @@ use crate::partition_info::PartitionInfo; use super::ParquetFileSink; -// fields no longer used but still exists in the catalog -const MAX_SEQUENCE_NUMBER: i64 = 0; - #[derive(Debug)] pub struct ObjectStoreParquetFileSink { store: ParquetStorage, @@ -57,7 +54,6 @@ impl ParquetFileSink for ObjectStoreParquetFileSink { table_name: partition.table.name.clone().into(), partition_id: partition.partition_id, partition_key: partition.partition_key.clone(), - max_sequence_number: SequenceNumber::new(MAX_SEQUENCE_NUMBER), compaction_level: level, sort_key: partition.sort_key.clone(), max_l0_created_at, diff --git a/compactor2_test_utils/src/simulator.rs b/compactor2_test_utils/src/simulator.rs index 35bfbdd6f5..ce1620bf5f 100644 --- a/compactor2_test_utils/src/simulator.rs +++ b/compactor2_test_utils/src/simulator.rs @@ -7,9 +7,7 @@ use std::{ }; use async_trait::async_trait; -use data_types::{ - ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, SequenceNumber, Timestamp, -}; +use data_types::{ColumnSet, CompactionLevel, ParquetFile, ParquetFileParams, Timestamp}; use datafusion::physical_plan::SendableRecordBatchStream; use iox_time::Time; use observability_deps::tracing::info; @@ -206,7 +204,6 @@ impl SimulatedFile { table_id: partition_info.table.id, partition_id: partition_info.partition_id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(0), min_time, max_time, file_size_bytes, diff --git a/data_types/src/lib.rs b/data_types/src/lib.rs index 30cf7775b8..4bdd34d3dc 100644 --- a/data_types/src/lib.rs +++ b/data_types/src/lib.rs @@ -830,23 +830,6 @@ pub struct Partition { /// relative order of B and C have been reversed. pub sort_key: Vec, - /// The inclusive maximum [`SequenceNumber`] of the most recently persisted - /// data for this partition. - /// - /// All writes with a [`SequenceNumber`] less than and equal to this - /// [`SequenceNumber`] have been persisted to the object store. The inverse - /// is not guaranteed to be true due to update ordering; some files for this - /// partition may exist in the `parquet_files` table that have a greater - /// [`SequenceNumber`] than is specified here - the system will converge so - /// long as the ingester progresses. - /// - /// It is a system invariant that this value monotonically increases over - /// time - wrote another way, it is an invariant that partitions are - /// persisted (or at least made visible) in sequence order. - /// - /// If [`None`] no data has been persisted for this partition. - pub persisted_sequence_number: Option, - /// The time at which the newest file of the partition is created pub new_file_at: Option, } @@ -945,8 +928,6 @@ pub struct ParquetFile { pub partition_id: PartitionId, /// the uuid used in the object store path for this file pub object_store_id: Uuid, - /// the maximum sequence number from a record in this file - pub max_sequence_number: SequenceNumber, /// the min timestamp of data in this file pub min_time: Timestamp, /// the max timestamp of data in this file @@ -1003,7 +984,6 @@ impl ParquetFile { table_id: params.table_id, partition_id: params.partition_id, object_store_id: params.object_store_id, - max_sequence_number: params.max_sequence_number, min_time: params.min_time, max_time: params.max_time, to_delete: None, @@ -1044,8 +1024,6 @@ pub struct ParquetFileParams { pub partition_id: PartitionId, /// the uuid used in the object store path for this file pub object_store_id: Uuid, - /// the maximum sequence number from a record in this file - pub max_sequence_number: SequenceNumber, /// the min timestamp of data in this file pub min_time: Timestamp, /// the max timestamp of data in this file @@ -1071,7 +1049,6 @@ impl From for ParquetFileParams { table_id: value.table_id, partition_id: value.partition_id, object_store_id: value.object_store_id, - max_sequence_number: value.max_sequence_number, min_time: value.min_time, max_time: value.max_time, file_size_bytes: value.file_size_bytes, diff --git a/docs/ingester_querier_protocol.md b/docs/ingester_querier_protocol.md index d562abd1d1..427ded15ec 100644 --- a/docs/ingester_querier_protocol.md +++ b/docs/ingester_querier_protocol.md @@ -3,8 +3,8 @@ This document describes the query protocol that the querier uses to request data The protocol is based on [Apache Flight]. We however only support a single request type: `DoGet`. - ## Request (Querier ⇒ Ingester) + The `DoGet` ticket contains a [Protocol Buffer] message `influxdata.iox.ingester.v1.IngesterQueryRequest` (see our `generated_types` crate). This message contains: @@ -16,20 +16,20 @@ contains: the request and the ingester data). - **predicate:** Predicate for row-filtering on the ingester side. -The request does NOT contain a selection of partitions or shards. The ingester must respond with -all partitions and shards it knows for that specified namespace-table combination. +The request does NOT contain a selection of partitions. The ingester must respond with all +partitions it knows for that specified namespace-table combination. ## Response (Ingester ⇒ Querier) + The goal of the response is to stream the following ingester data hierarchy: -- For each shard: - - For each partition **(A)**: - - Persistence Information: - - Sequence number of max. persisted parquet file - - For each snapshot (contains _persisting_ data) **(B)**: - - Record batches with following operations applied **(C)**: - - selection (i.e. row filter via predicates) - - projection (i.e. column filter) +- For each partition **(A)**: +- Persistence Information: + - Sequence number of max. persisted parquet file +- For each snapshot (contains _persisting_ data) **(B)**: + - Record batches with following operations applied **(C)**: + - selection (i.e. row filter via predicates) + - projection (i.e. column filter) This is mapped to the following stream of Flight messages: @@ -37,35 +37,36 @@ This is mapped to the following stream of Flight messages: `influxdata.iox.ingester.v1.IngesterQueryResponseMetadata`. This message contains: - partition id - Sequence number of max. persisted parquet file -- **B:** `Schema` message that announces the snapshot schema. No app metadata is attached. The snapshot belongs to the - partition that was just announced. Transmitting a schema resets the dictionary information. -- **Between B and C:** `DictionaryBatch` messages that set the dictionary information for the next record batch. -- **C:** `RecordBatch` message that uses the last schema and the current dictionary state. The batch belongs to the - snapshot that was just announced. +- **B:** `Schema` message that announces the snapshot schema. No app metadata is attached. The + snapshot belongs to the partition that was just announced. Transmitting a schema resets the + dictionary information. +- **Between B and C:** `DictionaryBatch` messages that set the dictionary information for the next + record batch. +- **C:** `RecordBatch` message that uses the last schema and the current dictionary state. The + batch belongs to the snapshot that was just announced. -The protocol is stateful and therefore the order of the messages is important. A specific partition and snapshot may only -be announced once. - -All other messages types (at the time of writing these are `Tensor` and `SparseTensor`) are unsupported. +The protocol is stateful and therefore the order of the messages is important. A specific partition +and snapshot may only be announced once. +All other messages types (at the time of writing these are `Tensor` and `SparseTensor`) are +unsupported. ## Example + Imagine the following ingester state: -- shard S1: - - partition P1: - - max. persisted parquet file at `sequence_number=10` - - snapshots C1 and C2 - - partition P2: - - max. persisted parquet file at `sequence_number=1` - - snapshot C3 -- shard S2: - - partition P3: - - no persisted parquet file - - no snapshots (all deleted) - - partition P4: - - no persisted parquet file - - snapshot C4 +- partition P1: + - max. persisted parquet file at `sequence_number=10` + - snapshots C1 and C2 +- partition P2: + - max. persisted parquet file at `sequence_number=1` + - snapshot C3 +- partition P3: + - no persisted parquet file + - no snapshots (all deleted) +- partition P4: + - no persisted parquet file + - snapshot C4 This results in the following response stream: @@ -89,6 +90,5 @@ This results in the following response stream: Note that P3 was skipped because there was no unpersisted data. - [Apache Flight]: https://arrow.apache.org/docs/Format/Flight.html [Protocol Buffer]: https://developers.google.com/protocol-buffers diff --git a/garbage_collector/src/objectstore/checker.rs b/garbage_collector/src/objectstore/checker.rs index af58c73b55..d9b75d1188 100644 --- a/garbage_collector/src/objectstore/checker.rs +++ b/garbage_collector/src/objectstore/checker.rs @@ -138,7 +138,7 @@ mod tests { use chrono::TimeZone; use data_types::{ ColumnId, ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileParams, - PartitionId, SequenceNumber, TableId, Timestamp, + PartitionId, TableId, Timestamp, }; use iox_catalog::{interface::Catalog, mem::MemCatalog}; use object_store::path::Path; @@ -178,7 +178,6 @@ mod tests { table_id: partition.table_id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: Timestamp::new(1), max_time: Timestamp::new(10), file_size_bytes: 1337, diff --git a/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto b/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto index 6320e9d4bd..a7be9b84e8 100644 --- a/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto +++ b/generated_types/protos/influxdata/iox/catalog/v1/parquet_file.proto @@ -9,6 +9,8 @@ message ParquetFile { reserved "sequencer_id"; reserved 17; reserved "shard_id"; + reserved 8; + reserved "max_sequence_number"; // the id of the file in the catalog int64 id = 1; @@ -20,8 +22,6 @@ message ParquetFile { int64 partition_id = 5; // the object store uuid string object_store_id = 6; - // the maximum sequence number from a record in this file - int64 max_sequence_number = 8; // the min timestamp of data in this file int64 min_time = 9; // the max timestamp of data in this file diff --git a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto index 36f9c45543..12de13cc22 100644 --- a/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto +++ b/generated_types/protos/influxdata/iox/ingester/v1/parquet_metadata.proto @@ -19,6 +19,9 @@ message IoxMetadata { // shard_id was removed reserved 17; reserved "shard_id"; + // max_sequence_number was removed + reserved 13; + reserved "max_sequence_number"; // Object store ID. Used in the parquet filename. 16 bytes in big-endian order. bytes object_store_id = 1; @@ -44,9 +47,6 @@ message IoxMetadata { // Partition key of the partition that holds this parquet file. string partition_key = 9; - // The maximum sequence number from a shard in this parquet file. - int64 max_sequence_number = 13; - // The sort key of this chunk SortKey sort_key = 15; diff --git a/generated_types/protos/influxdata/iox/ingester/v1/query.proto b/generated_types/protos/influxdata/iox/ingester/v1/query.proto index e99effdf01..c4597b2a94 100644 --- a/generated_types/protos/influxdata/iox/ingester/v1/query.proto +++ b/generated_types/protos/influxdata/iox/ingester/v1/query.proto @@ -73,10 +73,9 @@ message IngesterQueryResponseMetadata { // Partition id for this batch. int64 partition_id = 7; - // Optional partition status. - // - // If this is given, then no schema and no batch will be part of this FlightData object. - PartitionStatus status = 8; + // Was partition status. + reserved "status"; + reserved 8; // UUID of this ingester instance. string ingester_uuid = 9; @@ -85,18 +84,6 @@ message IngesterQueryResponseMetadata { uint64 completed_persistence_count = 10; } -// Status of a partition that has unpersisted data. -// -// Note that this structure is specific to a partition (which itself is bound to a table and shard)! -message PartitionStatus { - // Max sequence number persisted - optional int64 parquet_max_sequence_number = 1; - - // Deprecated tombstone support in ingester (#5825). - reserved "tombstone_max_sequence_number"; - reserved 2; -} - // Serialization of `predicate::predicate::Predicate` that contains DataFusion `Expr`s message Predicate { // Optional field restriction. If any are present, restricts the results to only tables which diff --git a/import/src/aggregate_tsm_schema/update_catalog.rs b/import/src/aggregate_tsm_schema/update_catalog.rs index 78caa606fe..024bb03b77 100644 --- a/import/src/aggregate_tsm_schema/update_catalog.rs +++ b/import/src/aggregate_tsm_schema/update_catalog.rs @@ -1023,7 +1023,6 @@ mod tests { let partition = Partition { id: PartitionId::new(1), table_id: TableId::new(1), - persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), sort_key: Vec::new(), new_file_at: None, @@ -1070,7 +1069,6 @@ mod tests { let partition = Partition { id: PartitionId::new(1), table_id: TableId::new(1), - persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), // N.B. sort key is already what it will computed to; here we're testing the `adjust_sort_key_columns` code path sort_key: vec!["host".to_string(), "arch".to_string(), "time".to_string()], @@ -1117,7 +1115,6 @@ mod tests { let partition = Partition { id: PartitionId::new(1), table_id: TableId::new(1), - persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), // N.B. is missing host so will need updating sort_key: vec!["arch".to_string(), "time".to_string()], @@ -1166,7 +1163,6 @@ mod tests { let partition = Partition { id: PartitionId::new(1), table_id: TableId::new(1), - persisted_sequence_number: None, partition_key: PartitionKey::from("2022-06-21"), // N.B. is missing arch so will need updating sort_key: vec!["host".to_string(), "time".to_string()], diff --git a/ingester2/src/persist/mod.rs b/ingester2/src/persist/mod.rs index e9fa51cc3a..d841201197 100644 --- a/ingester2/src/persist/mod.rs +++ b/ingester2/src/persist/mod.rs @@ -15,7 +15,7 @@ mod tests { use std::{sync::Arc, time::Duration}; use assert_matches::assert_matches; - use data_types::{CompactionLevel, ParquetFile, SequenceNumber}; + use data_types::{CompactionLevel, ParquetFile}; use dml::DmlOperation; use futures::TryStreamExt; use iox_catalog::{ @@ -252,7 +252,6 @@ mod tests { table_id: got_table_id, partition_id: got_partition_id, object_store_id, - max_sequence_number, row_count, compaction_level, file_size_bytes, @@ -266,7 +265,6 @@ mod tests { assert_eq!(got_namespace_id, namespace_id); assert_eq!(got_table_id, table_id); assert_eq!(got_partition_id, partition_id); - assert_eq!(max_sequence_number, SequenceNumber::new(0)); assert_eq!(row_count, 1); assert_eq!(compaction_level, CompactionLevel::Initial); @@ -402,7 +400,6 @@ mod tests { table_id: got_table_id, partition_id: got_partition_id, object_store_id, - max_sequence_number, row_count, compaction_level, file_size_bytes, @@ -420,8 +417,6 @@ mod tests { assert_eq!(row_count, 1); assert_eq!(compaction_level, CompactionLevel::Initial); - assert_eq!(max_sequence_number.get(), 0); // Unused, dummy value - (object_store_id, file_size_bytes) } ); diff --git a/ingester2/src/persist/worker.rs b/ingester2/src/persist/worker.rs index 17877d2b80..ee69bcce1b 100644 --- a/ingester2/src/persist/worker.rs +++ b/ingester2/src/persist/worker.rs @@ -2,7 +2,7 @@ use std::{ops::ControlFlow, sync::Arc}; use async_channel::RecvError; use backoff::Backoff; -use data_types::{CompactionLevel, ParquetFileParams, SequenceNumber}; +use data_types::{CompactionLevel, ParquetFileParams}; use iox_catalog::interface::{get_table_schema_by_id, CasFailure, Catalog}; use iox_query::exec::Executor; use iox_time::{SystemProvider, TimeProvider}; @@ -263,7 +263,6 @@ where table_name: Arc::clone(&*ctx.table_name().get().await), partition_id: ctx.partition_id(), partition_key: ctx.partition_key().clone(), - max_sequence_number: SequenceNumber::new(0), // TODO: not ordered! compaction_level: CompactionLevel::Initial, sort_key: Some(data_sort_key), max_l0_created_at: time_now, diff --git a/ingester2/src/server/grpc/query.rs b/ingester2/src/server/grpc/query.rs index 4ea7f2551f..de013c7406 100644 --- a/ingester2/src/server/grpc/query.rs +++ b/ingester2/src/server/grpc/query.rs @@ -9,7 +9,7 @@ use arrow_flight::{ use data_types::{NamespaceId, PartitionId, TableId}; use flatbuffers::FlatBufferBuilder; use futures::{Stream, StreamExt, TryStreamExt}; -use generated_types::influxdata::iox::ingester::v1::{self as proto, PartitionStatus}; +use generated_types::influxdata::iox::ingester::v1 as proto; use metric::U64Counter; use observability_deps::tracing::*; use prost::Message; @@ -259,8 +259,6 @@ where fn encode_partition( // Partition ID. partition_id: PartitionId, - // Partition persistence status. - status: PartitionStatus, // Count of persisted Parquet files for the [`PartitionData`] instance this // [`PartitionResponse`] was generated from. // @@ -272,9 +270,6 @@ fn encode_partition( let mut bytes = bytes::BytesMut::new(); let app_metadata = proto::IngesterQueryResponseMetadata { partition_id: partition_id.get(), - status: Some(proto::PartitionStatus { - parquet_max_sequence_number: status.parquet_max_sequence_number, - }), ingester_uuid: ingester_id.to_string(), completed_persistence_count, }; @@ -312,14 +307,7 @@ fn encode_response( let partition_id = partition.id(); let completed_persistence_count = partition.completed_persistence_count(); let head = futures::stream::once(async move { - encode_partition( - partition_id, - PartitionStatus { - parquet_max_sequence_number: None, - }, - completed_persistence_count, - ingester_id, - ) + encode_partition(partition_id, completed_persistence_count, ingester_id) }); match partition.into_record_batch_stream() { diff --git a/ingester2/src/test_util.rs b/ingester2/src/test_util.rs index aa5a925ecc..140acefcb0 100644 --- a/ingester2/src/test_util.rs +++ b/ingester2/src/test_util.rs @@ -126,7 +126,6 @@ pub(crate) fn arbitrary_partition() -> Partition { table_id: ARBITRARY_TABLE_ID, partition_key: ARBITRARY_PARTITION_KEY.clone(), sort_key: Default::default(), - persisted_sequence_number: Default::default(), new_file_at: Default::default(), } } diff --git a/iox_catalog/src/interface.rs b/iox_catalog/src/interface.rs index 7a23d4a866..1b8742a6cd 100644 --- a/iox_catalog/src/interface.rs +++ b/iox_catalog/src/interface.rs @@ -792,7 +792,7 @@ pub(crate) mod test_helpers { use super::*; use ::test_helpers::{assert_contains, tracing::TracingCapture}; use assert_matches::assert_matches; - use data_types::{ColumnId, ColumnSet, CompactionLevel, SequenceNumber}; + use data_types::{ColumnId, ColumnSet, CompactionLevel}; use futures::Future; use metric::{Attributes, DurationHistogram, Metric}; use std::{collections::BTreeSet, ops::DerefMut, sync::Arc, time::Duration}; @@ -1803,7 +1803,6 @@ pub(crate) mod test_helpers { table_id: partition.table_id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: Timestamp::new(1), max_time: Timestamp::new(10), file_size_bytes: 1337, @@ -1839,7 +1838,6 @@ pub(crate) mod test_helpers { table_id: other_partition.table_id, partition_id: other_partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(200), min_time: Timestamp::new(50), max_time: Timestamp::new(60), ..parquet_file_params.clone() @@ -1987,7 +1985,6 @@ pub(crate) mod test_helpers { object_store_id: Uuid::new_v4(), min_time: Timestamp::new(1), max_time: Timestamp::new(10), - max_sequence_number: SequenceNumber::new(10), ..parquet_file_params }; let f1 = repos @@ -2000,7 +1997,6 @@ pub(crate) mod test_helpers { object_store_id: Uuid::new_v4(), min_time: Timestamp::new(50), max_time: Timestamp::new(60), - max_sequence_number: SequenceNumber::new(11), ..f1_params.clone() }; let f2 = repos @@ -2019,7 +2015,6 @@ pub(crate) mod test_helpers { object_store_id: Uuid::new_v4(), min_time: Timestamp::new(50), max_time: Timestamp::new(60), - max_sequence_number: SequenceNumber::new(12), ..f2_params }; let f3 = repos @@ -2223,7 +2218,6 @@ pub(crate) mod test_helpers { table_id: table_1.id, partition_id: partition_1.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: Timestamp::new(1), max_time: Timestamp::new(10), file_size_bytes: 1337, @@ -2238,7 +2232,6 @@ pub(crate) mod test_helpers { table_id: table_2.id, partition_id: partition_2.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: Timestamp::new(1), max_time: Timestamp::new(10), file_size_bytes: 1337, @@ -2327,7 +2320,6 @@ pub(crate) mod test_helpers { table_id: partition1.table_id, partition_id: partition1.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: Timestamp::new(1), max_time: Timestamp::new(10), file_size_bytes: 1337, @@ -2683,7 +2675,6 @@ pub(crate) mod test_helpers { table_id: partition.table_id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time, max_time, file_size_bytes: 1337, @@ -2794,7 +2785,6 @@ pub(crate) mod test_helpers { table_id: partition.table_id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(140), min_time: query_min_time + 1, max_time: query_max_time - 1, file_size_bytes: 1337, @@ -2884,7 +2874,6 @@ pub(crate) mod test_helpers { table_id: partition_1.table_id, partition_id: partition_1.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(1), min_time: Timestamp::new(100), max_time: Timestamp::new(250), file_size_bytes: 1337, @@ -2901,7 +2890,6 @@ pub(crate) mod test_helpers { .unwrap(); let parquet_file_params_2 = ParquetFileParams { object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(3), min_time: Timestamp::new(200), max_time: Timestamp::new(300), ..parquet_file_params @@ -2941,7 +2929,6 @@ pub(crate) mod test_helpers { table_id: partition_2.table_id, partition_id: partition_2.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(1), min_time: Timestamp::new(100), max_time: Timestamp::new(250), file_size_bytes: 1337, @@ -2958,7 +2945,6 @@ pub(crate) mod test_helpers { .unwrap(); let parquet_file_params_2 = ParquetFileParams { object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(3), min_time: Timestamp::new(200), max_time: Timestamp::new(300), ..parquet_file_params diff --git a/iox_catalog/src/kafkaless_transition.rs b/iox_catalog/src/kafkaless_transition.rs index c290507322..408848b794 100644 --- a/iox_catalog/src/kafkaless_transition.rs +++ b/iox_catalog/src/kafkaless_transition.rs @@ -1,4 +1,4 @@ -use data_types::{SequenceNumber, TopicId}; +use data_types::TopicId; /// Magic number to be used shard indices and shard ids in "kafkaless". pub(crate) const TRANSITION_SHARD_NUMBER: i32 = 1234; @@ -66,18 +66,4 @@ pub(crate) struct Shard { /// the shard index of the shard the sequence numbers are coming from, sharded by the router /// and write buffer pub(crate) shard_index: ShardIndex, - /// The minimum unpersisted sequence number. Because different tables - /// can be persisted at different times, it is possible some data has been persisted - /// with a higher sequence number than this. However, all data with a sequence number - /// lower than this must have been persisted to Parquet. - pub(crate) min_unpersisted_sequence_number: SequenceNumber, -} - -/// Shard index plus offset -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub(crate) struct Sequence { - /// The shard index - pub(crate) shard_index: ShardIndex, - /// The sequence number - pub(crate) sequence_number: SequenceNumber, } diff --git a/iox_catalog/src/mem.rs b/iox_catalog/src/mem.rs index e0e40a3eef..e9de4af7be 100644 --- a/iox_catalog/src/mem.rs +++ b/iox_catalog/src/mem.rs @@ -16,7 +16,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnId, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, + SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use iox_time::{SystemProvider, TimeProvider}; use observability_deps::tracing::warn; @@ -149,7 +149,6 @@ impl Catalog for MemCatalog { id: TRANSITION_SHARD_ID, topic_id: topic.id, shard_index: TRANSITION_SHARD_INDEX, - min_unpersisted_sequence_number: SequenceNumber::new(0), }; stage.shards.push(shard); transaction.commit_inplace().await?; @@ -701,7 +700,6 @@ impl PartitionRepo for MemTxn { table_id, partition_key: key, sort_key: vec![], - persisted_sequence_number: None, new_file_at: None, }; stage.partitions.push(p); diff --git a/iox_catalog/src/postgres.rs b/iox_catalog/src/postgres.rs index 9550965ce0..5951d75958 100644 --- a/iox_catalog/src/postgres.rs +++ b/iox_catalog/src/postgres.rs @@ -1097,7 +1097,7 @@ VALUES ( $1, $2, $3, '{}') ON CONFLICT ON CONSTRAINT partition_key_unique DO UPDATE SET partition_key = partition.partition_key -RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; +RETURNING id, table_id, partition_key, sort_key, new_file_at; "#, ) .bind(key) // $1 @@ -1119,7 +1119,7 @@ RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_ async fn get_by_id(&mut self, partition_id: PartitionId) -> Result> { let rec = sqlx::query_as::<_, Partition>( r#" -SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +SELECT id, table_id, partition_key, sort_key, new_file_at FROM partition WHERE id = $1; "#, @@ -1140,7 +1140,7 @@ WHERE id = $1; async fn list_by_table_id(&mut self, table_id: TableId) -> Result> { sqlx::query_as::<_, Partition>( r#" -SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +SELECT id, table_id, partition_key, sort_key, new_file_at FROM partition WHERE table_id = $1; "#, @@ -1181,7 +1181,7 @@ WHERE table_id = $1; UPDATE partition SET sort_key = $1 WHERE id = $2 AND sort_key = $3 -RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; +RETURNING id, table_id, partition_key, sort_key, new_file_at; "#, ) .bind(new_sort_key) // $1 @@ -1358,7 +1358,6 @@ impl ParquetFileRepo for PostgresTxn { table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, file_size_bytes, @@ -1373,12 +1372,12 @@ impl ParquetFileRepo for PostgresTxn { r#" INSERT INTO parquet_file ( shard_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, file_size_bytes, + min_time, max_time, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at ) -VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 ) +VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ) RETURNING id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at; "#, ) @@ -1386,16 +1385,15 @@ RETURNING .bind(table_id) // $2 .bind(partition_id) // $3 .bind(object_store_id) // $4 - .bind(max_sequence_number) // $5 - .bind(min_time) // $6 - .bind(max_time) // $7 - .bind(file_size_bytes) // $8 - .bind(row_count) // $9 - .bind(compaction_level) // $10 - .bind(created_at) // $11 - .bind(namespace_id) // $12 - .bind(column_set) // $13 - .bind(max_l0_created_at) // $14 + .bind(min_time) // $5 + .bind(max_time) // $6 + .bind(file_size_bytes) // $7 + .bind(row_count) // $8 + .bind(compaction_level) // $9 + .bind(created_at) // $10 + .bind(namespace_id) // $11 + .bind(column_set) // $12 + .bind(max_l0_created_at) // $13 .fetch_one(&mut self.inner) .await .map_err(|e| { @@ -1462,7 +1460,7 @@ RETURNING id; r#" SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id, parquet_file.partition_id, parquet_file.object_store_id, - parquet_file.max_sequence_number, parquet_file.min_time, + parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes, parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set, parquet_file.max_l0_created_at @@ -1482,7 +1480,7 @@ WHERE table_name.namespace_id = $1 sqlx::query_as::<_, ParquetFile>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE table_id = $1 AND to_delete IS NULL; @@ -1544,7 +1542,7 @@ RETURNING id; sqlx::query_as::<_, ParquetFile>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE parquet_file.partition_id = $1 @@ -1612,7 +1610,7 @@ RETURNING id; let rec = sqlx::query_as::<_, ParquetFile>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE object_store_id = $1; @@ -1670,7 +1668,7 @@ mod tests { use super::*; use crate::create_or_get_default_records; use assert_matches::assert_matches; - use data_types::{ColumnId, ColumnSet, SequenceNumber}; + use data_types::{ColumnId, ColumnSet}; use metric::{Attributes, DurationHistogram, Metric}; use rand::Rng; use sqlx::migrate::MigrateDatabase; @@ -2209,7 +2207,6 @@ mod tests { table_id, partition_id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(100), min_time: Timestamp::new(1), max_time: Timestamp::new(5), file_size_bytes: 1337, diff --git a/iox_catalog/src/sqlite.rs b/iox_catalog/src/sqlite.rs index ab8a9e0075..d9ffa2fceb 100644 --- a/iox_catalog/src/sqlite.rs +++ b/iox_catalog/src/sqlite.rs @@ -15,7 +15,7 @@ use async_trait::async_trait; use data_types::{ Column, ColumnId, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceId, ParquetFile, ParquetFileId, ParquetFileParams, Partition, PartitionId, PartitionKey, QueryPool, QueryPoolId, - SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, + SkippedCompaction, Table, TableId, Timestamp, TopicId, TopicMetadata, }; use serde::{Deserialize, Serialize}; use std::ops::Deref; @@ -871,7 +871,6 @@ struct PartitionPod { table_id: TableId, partition_key: PartitionKey, sort_key: Json>, - persisted_sequence_number: Option, new_file_at: Option, } @@ -882,7 +881,6 @@ impl From for Partition { table_id: value.table_id, partition_key: value.partition_key, sort_key: value.sort_key.0, - persisted_sequence_number: value.persisted_sequence_number, new_file_at: value.new_file_at, } } @@ -903,7 +901,7 @@ VALUES ( $1, $2, $3, '[]') ON CONFLICT (table_id, partition_key) DO UPDATE SET partition_key = partition.partition_key -RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; +RETURNING id, table_id, partition_key, sort_key, new_file_at; "#, ) .bind(key) // $1 @@ -925,7 +923,7 @@ RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_ async fn get_by_id(&mut self, partition_id: PartitionId) -> Result> { let rec = sqlx::query_as::<_, PartitionPod>( r#" -SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +SELECT id, table_id, partition_key, sort_key, new_file_at FROM partition WHERE id = $1; "#, @@ -946,7 +944,7 @@ WHERE id = $1; async fn list_by_table_id(&mut self, table_id: TableId) -> Result> { Ok(sqlx::query_as::<_, PartitionPod>( r#" -SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +SELECT id, table_id, partition_key, sort_key, new_file_at FROM partition WHERE table_id = $1; "#, @@ -990,7 +988,7 @@ WHERE table_id = $1; UPDATE partition SET sort_key = $1 WHERE id = $2 AND sort_key = $3 -RETURNING id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at; +RETURNING id, table_id, partition_key, sort_key, new_file_at; "#, ) .bind(Json(new_sort_key)) // $1 @@ -1129,7 +1127,7 @@ RETURNING * async fn most_recent_n(&mut self, n: usize) -> Result> { Ok(sqlx::query_as::<_, PartitionPod>( r#" -SELECT id, table_id, partition_key, sort_key, persisted_sequence_number, new_file_at +SELECT id, table_id, partition_key, sort_key, new_file_at FROM partition ORDER BY id DESC LIMIT $1; @@ -1185,7 +1183,6 @@ struct ParquetFilePod { table_id: TableId, partition_id: PartitionId, object_store_id: Uuid, - max_sequence_number: SequenceNumber, min_time: Timestamp, max_time: Timestamp, to_delete: Option, @@ -1205,7 +1202,6 @@ impl From for ParquetFile { table_id: value.table_id, partition_id: value.partition_id, object_store_id: value.object_store_id, - max_sequence_number: value.max_sequence_number, min_time: value.min_time, max_time: value.max_time, to_delete: value.to_delete, @@ -1227,7 +1223,6 @@ impl ParquetFileRepo for SqliteTxn { table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, file_size_bytes, @@ -1242,12 +1237,12 @@ impl ParquetFileRepo for SqliteTxn { r#" INSERT INTO parquet_file ( shard_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, file_size_bytes, + min_time, max_time, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at ) -VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14 ) +VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 ) RETURNING id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, namespace_id, column_set, max_l0_created_at; "#, ) @@ -1255,16 +1250,15 @@ RETURNING .bind(table_id) // $2 .bind(partition_id) // $3 .bind(object_store_id) // $4 - .bind(max_sequence_number) // $5 - .bind(min_time) // $6 - .bind(max_time) // $7 - .bind(file_size_bytes) // $8 - .bind(row_count) // $9 - .bind(compaction_level) // $10 - .bind(created_at) // $11 - .bind(namespace_id) // $12 - .bind(from_column_set(&column_set)) // $13 - .bind(max_l0_created_at) // $14 + .bind(min_time) // $5 + .bind(max_time) // $6 + .bind(file_size_bytes) // $7 + .bind(row_count) // $8 + .bind(compaction_level) // $9 + .bind(created_at) // $10 + .bind(namespace_id) // $11 + .bind(from_column_set(&column_set)) // $12 + .bind(max_l0_created_at) // $13 .fetch_one(self.inner.get_mut()) .await .map_err(|e| { @@ -1332,7 +1326,7 @@ RETURNING id; Ok(sqlx::query_as::<_, ParquetFilePod>( r#" SELECT parquet_file.id, parquet_file.namespace_id, parquet_file.table_id, - parquet_file.partition_id, parquet_file.object_store_id, parquet_file.max_sequence_number, + parquet_file.partition_id, parquet_file.object_store_id, parquet_file.min_time, parquet_file.max_time, parquet_file.to_delete, parquet_file.file_size_bytes, parquet_file.row_count, parquet_file.compaction_level, parquet_file.created_at, parquet_file.column_set, parquet_file.max_l0_created_at @@ -1355,7 +1349,7 @@ WHERE table_name.namespace_id = $1 Ok(sqlx::query_as::<_, ParquetFilePod>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE table_id = $1 AND to_delete IS NULL; @@ -1423,7 +1417,7 @@ RETURNING id; Ok(sqlx::query_as::<_, ParquetFilePod>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE parquet_file.partition_id = $1 @@ -1494,7 +1488,7 @@ RETURNING id; let rec = sqlx::query_as::<_, ParquetFilePod>( r#" SELECT id, namespace_id, table_id, partition_id, object_store_id, - max_sequence_number, min_time, max_time, to_delete, file_size_bytes, + min_time, max_time, to_delete, file_size_bytes, row_count, compaction_level, created_at, column_set, max_l0_created_at FROM parquet_file WHERE object_store_id = $1; @@ -1863,7 +1857,6 @@ mod tests { table_id, partition_id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(100), min_time: Timestamp::new(1), max_time: Timestamp::new(5), file_size_bytes: 1337, diff --git a/iox_tests/src/builders.rs b/iox_tests/src/builders.rs index 48ecc201f1..7e9a20be1c 100644 --- a/iox_tests/src/builders.rs +++ b/iox_tests/src/builders.rs @@ -1,6 +1,6 @@ use data_types::{ ColumnSet, CompactionLevel, NamespaceId, ParquetFile, ParquetFileId, Partition, PartitionId, - PartitionKey, SequenceNumber, SkippedCompaction, Table, TableId, Timestamp, + PartitionKey, SkippedCompaction, Table, TableId, Timestamp, }; use uuid::Uuid; @@ -21,7 +21,6 @@ impl ParquetFileBuilder { table_id: TableId::new(0), partition_id: PartitionId::new(0), object_store_id: Uuid::from_u128(id.try_into().expect("invalid id")), - max_sequence_number: SequenceNumber::new(0), min_time: Timestamp::new(0), max_time: Timestamp::new(0), to_delete: None, @@ -157,7 +156,6 @@ impl PartitionBuilder { table_id: TableId::new(0), partition_key: PartitionKey::from("key"), sort_key: vec![], - persisted_sequence_number: None, new_file_at: None, }, } diff --git a/iox_tests/src/catalog.rs b/iox_tests/src/catalog.rs index c4a3f5ad4a..0d3cd9507e 100644 --- a/iox_tests/src/catalog.rs +++ b/iox_tests/src/catalog.rs @@ -6,8 +6,8 @@ use arrow::{ }; use data_types::{ Column, ColumnSet, ColumnType, CompactionLevel, Namespace, NamespaceSchema, ParquetFile, - ParquetFileParams, Partition, PartitionId, QueryPool, SequenceNumber, Table, TableId, - TableSchema, Timestamp, TopicMetadata, + ParquetFileParams, Partition, PartitionId, QueryPool, Table, TableId, TableSchema, Timestamp, + TopicMetadata, }; use datafusion::physical_plan::metrics::Count; use datafusion_util::MemoryStream; @@ -456,7 +456,6 @@ impl TestPartition { record_batch, table, schema, - max_sequence_number, min_time, max_time, file_size_bytes, @@ -497,7 +496,6 @@ impl TestPartition { table_name: self.table.table.name.clone().into(), partition_id: self.partition.id, partition_key: self.partition.partition_key.clone(), - max_sequence_number, compaction_level: CompactionLevel::Initial, sort_key: Some(sort_key.clone()), max_l0_created_at: Time::from_timestamp_nanos(max_l0_created_at), @@ -516,7 +514,6 @@ impl TestPartition { record_batch: Some(record_batch), table: Some(table), schema: Some(schema), - max_sequence_number, min_time, max_time, file_size_bytes: Some(file_size_bytes.unwrap_or(real_file_size_bytes as u64)), @@ -543,7 +540,6 @@ impl TestPartition { ) -> TestParquetFile { let TestParquetFileBuilder { record_batch, - max_sequence_number, min_time, max_time, file_size_bytes, @@ -585,7 +581,6 @@ impl TestPartition { table_id: self.table.table.id, partition_id: self.partition.id, object_store_id: object_store_id.unwrap_or_else(Uuid::new_v4), - max_sequence_number, min_time: Timestamp::new(min_time), max_time: Timestamp::new(max_time), file_size_bytes: file_size_bytes.unwrap_or(0) as i64, @@ -628,7 +623,6 @@ pub struct TestParquetFileBuilder { record_batch: Option, table: Option, schema: Option, - max_sequence_number: SequenceNumber, min_time: i64, max_time: i64, file_size_bytes: Option, @@ -647,7 +641,6 @@ impl Default for TestParquetFileBuilder { record_batch: None, table: None, schema: None, - max_sequence_number: SequenceNumber::new(100), min_time: now().timestamp_nanos(), max_time: now().timestamp_nanos(), file_size_bytes: None, @@ -690,12 +683,6 @@ impl TestParquetFileBuilder { self } - /// Specify the maximum sequence number for the parquet file metadata. - pub fn with_max_seq(mut self, max_seq: i64) -> Self { - self.max_sequence_number = SequenceNumber::new(max_seq); - self - } - /// Specify the minimum time for the parquet file metadata. pub fn with_min_time(mut self, min_time: i64) -> Self { self.min_time = min_time; diff --git a/parquet_file/src/metadata.rs b/parquet_file/src/metadata.rs index 5bf26cec17..ea18d486a6 100644 --- a/parquet_file/src/metadata.rs +++ b/parquet_file/src/metadata.rs @@ -90,8 +90,7 @@ use base64::{prelude::BASE64_STANDARD, Engine}; use bytes::Bytes; use data_types::{ ColumnId, ColumnSet, ColumnSummary, CompactionLevel, InfluxDbType, NamespaceId, - ParquetFileParams, PartitionId, PartitionKey, SequenceNumber, StatValues, Statistics, TableId, - Timestamp, + ParquetFileParams, PartitionId, PartitionKey, StatValues, Statistics, TableId, Timestamp, }; use generated_types::influxdata::iox::ingester::v1 as proto; use iox_time::Time; @@ -274,9 +273,6 @@ pub struct IoxMetadata { /// partition key of the data pub partition_key: PartitionKey, - /// sequence number of the last write - pub max_sequence_number: SequenceNumber, - /// The compaction level of the file. /// /// * 0 (`CompactionLevel::Initial`): represents a level-0 file that is persisted by an @@ -340,7 +336,6 @@ impl IoxMetadata { table_name: self.table_name.to_string(), partition_id: self.partition_id.get(), partition_key: self.partition_key.to_string(), - max_sequence_number: self.max_sequence_number.get(), sort_key, compaction_level: self.compaction_level as i32, max_l0_created_at: Some(self.max_l0_created_at.date_time().into()), @@ -392,7 +387,6 @@ impl IoxMetadata { table_name, partition_id: PartitionId::new(proto_msg.partition_id), partition_key, - max_sequence_number: SequenceNumber::new(proto_msg.max_sequence_number), sort_key, compaction_level: proto_msg.compaction_level.try_into().context( InvalidCompactionLevelSnafu { @@ -417,7 +411,6 @@ impl IoxMetadata { table_name: table_name.into(), partition_id: PartitionId::new(1), partition_key: "unknown".into(), - max_sequence_number: SequenceNumber::new(1), compaction_level: CompactionLevel::Initial, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(creation_timestamp_ns), @@ -499,7 +492,6 @@ impl IoxMetadata { table_id: self.table_id, partition_id: self.partition_id, object_store_id: self.object_store_id, - max_sequence_number: self.max_sequence_number, min_time, max_time, file_size_bytes: file_size_bytes as i64, @@ -1017,7 +1009,6 @@ mod tests { table_name: Arc::from("weather"), partition_id: PartitionId::new(4), partition_key: PartitionKey::from("part"), - max_sequence_number: SequenceNumber::new(6), compaction_level: CompactionLevel::Initial, sort_key: Some(sort_key), max_l0_created_at: create_time, @@ -1041,7 +1032,6 @@ mod tests { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(42), diff --git a/parquet_file/src/serialize.rs b/parquet_file/src/serialize.rs index dfb8b094a2..0a631bfe69 100644 --- a/parquet_file/src/serialize.rs +++ b/parquet_file/src/serialize.rs @@ -197,7 +197,7 @@ mod tests { record_batch::RecordBatch, }; use bytes::Bytes; - use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId}; + use data_types::{CompactionLevel, NamespaceId, PartitionId, TableId}; use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use datafusion_util::MemoryStream; use iox_time::Time; @@ -214,7 +214,6 @@ mod tests { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(42), diff --git a/parquet_file/src/storage.rs b/parquet_file/src/storage.rs index 52adf34302..7cb04283ad 100644 --- a/parquet_file/src/storage.rs +++ b/parquet_file/src/storage.rs @@ -323,7 +323,7 @@ mod tests { array::{ArrayRef, Int64Array, StringArray}, record_batch::RecordBatch, }; - use data_types::{CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId}; + use data_types::{CompactionLevel, NamespaceId, PartitionId, TableId}; use datafusion::common::DataFusionError; use datafusion_util::MemoryStream; use iox_time::Time; @@ -579,7 +579,6 @@ mod tests { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(42), diff --git a/parquet_file/tests/metadata.rs b/parquet_file/tests/metadata.rs index f8a8d0df9d..ec673bea8f 100644 --- a/parquet_file/tests/metadata.rs +++ b/parquet_file/tests/metadata.rs @@ -4,9 +4,7 @@ use arrow::{ array::{ArrayRef, StringArray, TimestampNanosecondArray}, record_batch::RecordBatch, }; -use data_types::{ - ColumnId, CompactionLevel, NamespaceId, PartitionId, SequenceNumber, TableId, Timestamp, -}; +use data_types::{ColumnId, CompactionLevel, NamespaceId, PartitionId, TableId, Timestamp}; use datafusion_util::MemoryStream; use iox_time::Time; use object_store::DynObjectStore; @@ -57,7 +55,6 @@ async fn test_decoded_iox_metadata() { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(42), @@ -198,7 +195,6 @@ async fn test_empty_parquet_file_panic() { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(42), @@ -292,7 +288,6 @@ async fn test_decoded_many_columns_with_null_cols_iox_metadata() { table_name: "platanos".into(), partition_id: PartitionId::new(4), partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: Some(sort_key), max_l0_created_at: Time::from_timestamp_nanos(42), @@ -380,7 +375,6 @@ async fn test_derive_parquet_file_params() { table_name: "platanos".into(), partition_id, partition_key: "potato".into(), - max_sequence_number: SequenceNumber::new(11), compaction_level: CompactionLevel::FileNonOverlapped, sort_key: None, max_l0_created_at: Time::from_timestamp_nanos(1234), @@ -424,7 +418,6 @@ async fn test_derive_parquet_file_params() { assert_eq!(catalog_data.table_id, meta.table_id); assert_eq!(catalog_data.partition_id, meta.partition_id); assert_eq!(catalog_data.object_store_id, meta.object_store_id); - assert_eq!(catalog_data.max_sequence_number, meta.max_sequence_number); assert_eq!(catalog_data.file_size_bytes, file_size as i64); assert_eq!(catalog_data.compaction_level, meta.compaction_level); assert_eq!(catalog_data.created_at, Timestamp::new(1234)); diff --git a/querier/src/cache/parquet_file.rs b/querier/src/cache/parquet_file.rs index f231093ad0..e7d3eb909e 100644 --- a/querier/src/cache/parquet_file.rs +++ b/querier/src/cache/parquet_file.rs @@ -347,8 +347,8 @@ mod tests { partition.create_parquet_file(builder).await; let table_id = table.table.id; - let single_file_size = 224; - let two_file_size = 408; + let single_file_size = 216; + let two_file_size = 392; assert!(single_file_size < two_file_size); let cache = make_cache(&catalog); diff --git a/querier/src/ingester/mod.rs b/querier/src/ingester/mod.rs index 5339f2fbb0..d55435580e 100644 --- a/querier/src/ingester/mod.rs +++ b/querier/src/ingester/mod.rs @@ -13,8 +13,7 @@ use async_trait::async_trait; use backoff::{Backoff, BackoffConfig, BackoffError}; use client_util::connection; use data_types::{ - ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, SequenceNumber, TableSummary, - TimestampMinMax, + ChunkId, ChunkOrder, DeletePredicate, NamespaceId, PartitionId, TableSummary, TimestampMinMax, }; use datafusion::error::DataFusionError; use futures::{stream::FuturesUnordered, TryStreamExt}; @@ -105,14 +104,6 @@ pub enum Error { source: Box, }, - #[snafu(display( - "Partition status missing for partition {partition_id}, ingestger: {ingester_address}" - ))] - PartitionStatusMissing { - partition_id: PartitionId, - ingester_address: String, - }, - #[snafu(display("Got batch without chunk information from ingester: {ingester_address}"))] BatchWithoutChunk { ingester_address: String }, @@ -550,10 +541,6 @@ impl IngesterStreamDecoder { self.flush_partition()?; let partition_id = PartitionId::new(md.partition_id); - let status = md.status.context(PartitionStatusMissingSnafu { - partition_id, - ingester_address: self.ingester_address.as_ref(), - })?; ensure!( !self.finished_partitions.contains_key(&partition_id), DuplicatePartitionInfoSnafu { @@ -576,7 +563,6 @@ impl IngesterStreamDecoder { ingester_uuid, partition_id, md.completed_persistence_count, - status.parquet_max_sequence_number.map(SequenceNumber::new), partition_sort_key, ); self.current_partition = Some(partition); @@ -772,10 +758,6 @@ pub struct IngesterPartition { /// The number of Parquet files this ingester UUID has persisted for this partition. completed_persistence_count: u64, - /// Maximum sequence number of parquet files the ingester has - /// persisted for this partition - parquet_max_sequence_number: Option, - /// Partition-wide sort key. partition_sort_key: Option>, @@ -790,14 +772,12 @@ impl IngesterPartition { ingester_uuid: Uuid, partition_id: PartitionId, completed_persistence_count: u64, - parquet_max_sequence_number: Option, partition_sort_key: Option>, ) -> Self { Self { ingester_uuid, partition_id, completed_persistence_count, - parquet_max_sequence_number, partition_sort_key, chunks: vec![], } @@ -864,10 +844,6 @@ impl IngesterPartition { self.completed_persistence_count } - pub(crate) fn parquet_max_sequence_number(&self) -> Option { - self.parquet_max_sequence_number - } - pub(crate) fn chunks(&self) -> &[IngesterChunk] { &self.chunks } @@ -1077,7 +1053,6 @@ mod tests { }; use assert_matches::assert_matches; use data_types::TableId; - use generated_types::influxdata::iox::ingester::v1::PartitionStatus; use influxdb_iox_client::flight::generated_types::IngesterQueryResponseMetadata; use iox_tests::TestCatalog; use metric::Attributes; @@ -1171,14 +1146,7 @@ mod tests { MockFlightClient::new([( "addr1", Ok(MockQueryData { - results: vec![metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: None, - }), - ingester_uuid.to_string(), - 5, - )], + results: vec![metadata(1, ingester_uuid.to_string(), 5)], }), )]) .await, @@ -1190,30 +1158,11 @@ mod tests { let p = &partitions[0]; assert_eq!(p.partition_id.get(), 1); - assert_eq!(p.parquet_max_sequence_number, None); assert_eq!(p.chunks.len(), 0); assert_eq!(p.ingester_uuid, ingester_uuid); assert_eq!(p.completed_persistence_count, 5); } - #[tokio::test] - async fn test_flight_err_partition_status_missing() { - let ingester_uuid = Uuid::new_v4(); - - let mock_flight_client = Arc::new( - MockFlightClient::new([( - "addr1", - Ok(MockQueryData { - results: vec![metadata(1, None, ingester_uuid.to_string(), 5)], - }), - )]) - .await, - ); - let ingester_conn = mock_flight_client.ingester_conn().await; - let err = get_partitions(&ingester_conn).await.unwrap_err(); - assert_matches!(err, Error::PartitionStatusMissing { .. }); - } - #[tokio::test] async fn test_flight_err_duplicate_partition_info() { let ingester_uuid = Uuid::new_v4(); @@ -1223,30 +1172,9 @@ mod tests { "addr1", Ok(MockQueryData { results: vec![ - metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: None, - }), - ingester_uuid.to_string(), - 3, - ), - metadata( - 2, - Some(PartitionStatus { - parquet_max_sequence_number: None, - }), - ingester_uuid.to_string(), - 4, - ), - metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: None, - }), - ingester_uuid.to_string(), - 5, - ), + metadata(1, ingester_uuid.to_string(), 3), + metadata(2, ingester_uuid.to_string(), 4), + metadata(1, ingester_uuid.to_string(), 5), ], }), )]) @@ -1320,14 +1248,7 @@ mod tests { "addr1", Ok(MockQueryData { results: vec![ - metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - ingester_uuid1.to_string(), - 3, - ), + metadata(1, ingester_uuid1.to_string(), 3), Ok(( DecodedPayload::Schema(Arc::clone(&schema_1_1)), IngesterQueryResponseMetadata::default(), @@ -1348,14 +1269,7 @@ mod tests { DecodedPayload::RecordBatch(record_batch_1_2), IngesterQueryResponseMetadata::default(), )), - metadata( - 2, - Some(PartitionStatus { - parquet_max_sequence_number: Some(21), - }), - ingester_uuid1.to_string(), - 4, - ), + metadata(2, ingester_uuid1.to_string(), 4), Ok(( DecodedPayload::Schema(Arc::clone(&schema_2_1)), IngesterQueryResponseMetadata::default(), @@ -1371,14 +1285,7 @@ mod tests { "addr2", Ok(MockQueryData { results: vec![ - metadata( - 3, - Some(PartitionStatus { - parquet_max_sequence_number: Some(31), - }), - ingester_uuid2.to_string(), - 5, - ), + metadata(3, ingester_uuid2.to_string(), 5), Ok(( DecodedPayload::Schema(Arc::clone(&schema_3_1)), IngesterQueryResponseMetadata::default(), @@ -1400,10 +1307,6 @@ mod tests { let p1 = &partitions[0]; assert_eq!(p1.partition_id.get(), 1); - assert_eq!( - p1.parquet_max_sequence_number, - Some(SequenceNumber::new(11)) - ); assert_eq!(p1.chunks.len(), 2); assert_eq!(p1.chunks[0].schema().as_arrow(), schema_1_1); assert_eq!(p1.chunks[0].batches.len(), 2); @@ -1415,10 +1318,6 @@ mod tests { let p2 = &partitions[1]; assert_eq!(p2.partition_id.get(), 2); - assert_eq!( - p2.parquet_max_sequence_number, - Some(SequenceNumber::new(21)) - ); assert_eq!(p2.chunks.len(), 1); assert_eq!(p2.chunks[0].schema().as_arrow(), schema_2_1); assert_eq!(p2.chunks[0].batches.len(), 1); @@ -1426,10 +1325,6 @@ mod tests { let p3 = &partitions[2]; assert_eq!(p3.partition_id.get(), 3); - assert_eq!( - p3.parquet_max_sequence_number, - Some(SequenceNumber::new(31)) - ); assert_eq!(p3.chunks.len(), 1); assert_eq!(p3.chunks[0].schema().as_arrow(), schema_3_1); assert_eq!(p3.chunks[0].batches.len(), 1); @@ -1442,14 +1337,7 @@ mod tests { MockFlightClient::new([( "addr1", Ok(MockQueryData { - results: vec![metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - "not-a-valid-uuid", - 42, - )], + results: vec![metadata(1, "not-a-valid-uuid", 42)], }), )]) .await, @@ -1481,36 +1369,15 @@ mod tests { "addr1", Ok(MockQueryData { results: vec![ - metadata( - 1, - Some(PartitionStatus { - parquet_max_sequence_number: Some(11), - }), - ingester_uuid1.to_string(), - 0, - ), - metadata( - 2, - Some(PartitionStatus { - parquet_max_sequence_number: Some(21), - }), - ingester_uuid1.to_string(), - 42, - ), + metadata(1, ingester_uuid1.to_string(), 0), + metadata(2, ingester_uuid1.to_string(), 42), ], }), ), ( "addr2", Ok(MockQueryData { - results: vec![metadata( - 3, - Some(PartitionStatus { - parquet_max_sequence_number: Some(31), - }), - ingester_uuid2.to_string(), - 9000, - )], + results: vec![metadata(3, ingester_uuid2.to_string(), 9000)], }), ), ]) @@ -1535,28 +1402,16 @@ mod tests { assert_eq!(p1.ingester_uuid, ingester_uuid1); assert_eq!(p1.completed_persistence_count, 0); assert_eq!(p1.partition_id.get(), 1); - assert_eq!( - p1.parquet_max_sequence_number, - Some(SequenceNumber::new(11)) - ); let p2 = &partitions[1]; assert_eq!(p2.ingester_uuid, ingester_uuid1); assert_eq!(p2.completed_persistence_count, 42); assert_eq!(p2.partition_id.get(), 2); - assert_eq!( - p2.parquet_max_sequence_number, - Some(SequenceNumber::new(21)) - ); let p3 = &partitions[2]; assert_eq!(p3.ingester_uuid, ingester_uuid2); assert_eq!(p3.completed_persistence_count, 9000); assert_eq!(p3.partition_id.get(), 3); - assert_eq!( - p3.parquet_max_sequence_number, - Some(SequenceNumber::new(31)) - ); } #[tokio::test] @@ -1694,7 +1549,6 @@ mod tests { fn metadata( partition_id: i64, - status: Option, ingester_uuid: impl Into, completed_persistence_count: u64, ) -> MockFlightResult { @@ -1702,7 +1556,6 @@ mod tests { DecodedPayload::None, IngesterQueryResponseMetadata { partition_id, - status, ingester_uuid: ingester_uuid.into(), completed_persistence_count, }, @@ -1813,17 +1666,11 @@ mod tests { ]; for case in cases { - let parquet_max_sequence_number = None; // Construct a partition and ensure it doesn't error - let ingester_partition = IngesterPartition::new( - ingester_uuid, - PartitionId::new(1), - 0, - parquet_max_sequence_number, - None, - ) - .try_add_chunk(ChunkId::new(), expected_schema.clone(), vec![case]) - .unwrap(); + let ingester_partition = + IngesterPartition::new(ingester_uuid, PartitionId::new(1), 0, None) + .try_add_chunk(ChunkId::new(), expected_schema.clone(), vec![case]) + .unwrap(); for batch in &ingester_partition.chunks[0].batches { assert_eq!(batch.schema(), expected_schema.as_arrow()); @@ -1843,16 +1690,9 @@ mod tests { let batch = RecordBatch::try_from_iter(vec![("b", int64_array()), ("time", ts_array())]).unwrap(); - let parquet_max_sequence_number = None; - let err = IngesterPartition::new( - ingester_uuid, - PartitionId::new(1), - 0, - parquet_max_sequence_number, - None, - ) - .try_add_chunk(ChunkId::new(), expected_schema, vec![batch]) - .unwrap_err(); + let err = IngesterPartition::new(ingester_uuid, PartitionId::new(1), 0, None) + .try_add_chunk(ChunkId::new(), expected_schema, vec![batch]) + .unwrap_err(); assert_matches!(err, Error::RecordBatchType { .. }); } diff --git a/querier/src/namespace/query_access.rs b/querier/src/namespace/query_access.rs index 4ce9353935..c6b1fd4d85 100644 --- a/querier/src/namespace/query_access.rs +++ b/querier/src/namespace/query_access.rs @@ -244,7 +244,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(1)) .with_line_protocol("cpu,host=a load=1 11") - .with_max_seq(1) .with_min_time(11) .with_max_time(11); partition_cpu_a_1.create_parquet_file(builder).await; @@ -252,7 +251,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(2)) .with_line_protocol("cpu,host=a load=2 22") - .with_max_seq(2) .with_min_time(22) .with_max_time(22); partition_cpu_a_1 @@ -264,7 +262,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(3)) .with_line_protocol("cpu,host=z load=0 0") - .with_max_seq(2) .with_min_time(22) .with_max_time(22); partition_cpu_a_1.create_parquet_file(builder).await; @@ -272,7 +269,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(4)) .with_line_protocol("cpu,host=a load=3 33") - .with_max_seq(3) .with_min_time(33) .with_max_time(33); partition_cpu_a_1.create_parquet_file(builder).await; @@ -280,7 +276,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(5)) .with_line_protocol("cpu,host=a load=4 10001") - .with_max_seq(4) .with_min_time(10_001) .with_max_time(10_001); partition_cpu_a_2.create_parquet_file(builder).await; @@ -288,7 +283,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_creation_time(Time::from_timestamp_nanos(6)) .with_line_protocol("cpu,host=b load=5 11") - .with_max_seq(5) .with_min_time(11) .with_max_time(11); partition_cpu_b_1.create_parquet_file(builder).await; @@ -302,7 +296,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(7)) .with_line_protocol(&lp) - .with_max_seq(6) .with_min_time(11) .with_max_time(14); partition_mem_c_1.create_parquet_file(builder).await; @@ -310,7 +303,6 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_max_l0_created_at(Time::from_timestamp_nanos(8)) .with_line_protocol("mem,host=c perc=50 1001") - .with_max_seq(7) .with_min_time(1001) .with_max_time(1001); partition_mem_c_2 @@ -523,7 +515,6 @@ mod tests { .with_max_l0_created_at(Time::from_timestamp_nanos(10)) // duplicate row with different field value (load=14) .with_line_protocol("cpu,host=a load=14 10001") - .with_max_seq(2_000) .with_min_time(10_001) .with_max_time(10_001); partition_cpu_a_2.create_parquet_file(builder).await; diff --git a/querier/src/parquet/creation.rs b/querier/src/parquet/creation.rs index a49cb53c67..29a9f1990e 100644 --- a/querier/src/parquet/creation.rs +++ b/querier/src/parquet/creation.rs @@ -226,7 +226,6 @@ impl ChunkAdapter { order, sort_key: Some(sort_key), partition_id: parquet_file.partition_id, - max_sequence_number: parquet_file.max_sequence_number, compaction_level: parquet_file.compaction_level, }); diff --git a/querier/src/parquet/mod.rs b/querier/src/parquet/mod.rs index cf0823d669..9d76fdd47b 100644 --- a/querier/src/parquet/mod.rs +++ b/querier/src/parquet/mod.rs @@ -1,8 +1,7 @@ //! Querier Chunks use data_types::{ - ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, SequenceNumber, - TableSummary, + ChunkId, ChunkOrder, CompactionLevel, DeletePredicate, PartitionId, TableSummary, }; use iox_query::util::create_basic_summary; use parquet_file::chunk::ParquetChunk; @@ -29,9 +28,6 @@ pub struct QuerierParquetChunkMeta { /// Partition ID. partition_id: PartitionId, - /// The maximum sequence number within this chunk. - max_sequence_number: SequenceNumber, - /// Compaction level of the parquet file of the chunk compaction_level: CompactionLevel, } @@ -52,11 +48,6 @@ impl QuerierParquetChunkMeta { self.partition_id } - /// The maximum sequence number within this chunk. - pub fn max_sequence_number(&self) -> SequenceNumber { - self.max_sequence_number - } - /// Compaction level of the parquet file of the chunk pub fn compaction_level(&self) -> CompactionLevel { self.compaction_level diff --git a/querier/src/table/mod.rs b/querier/src/table/mod.rs index 90b489e76a..58c89c41ba 100644 --- a/querier/src/table/mod.rs +++ b/querier/src/table/mod.rs @@ -461,7 +461,7 @@ mod tests { table::test_util::{querier_table, IngesterPartitionBuilder}, }; use arrow_util::assert_batches_eq; - use data_types::{ChunkId, ColumnType, SequenceNumber}; + use data_types::{ChunkId, ColumnType}; use iox_query::exec::IOxSessionContext; use iox_tests::{TestCatalog, TestParquetFileBuilder, TestTable}; use iox_time::TimeProvider; @@ -522,7 +522,6 @@ mod tests { ); let builder = TestParquetFileBuilder::default() .with_line_protocol(&lp) - .with_max_seq(1) .with_min_time(outside_retention) .with_max_time(inside_retention); let file_partially_inside = partition.create_parquet_file(builder).await; @@ -536,7 +535,6 @@ mod tests { ); let builder = TestParquetFileBuilder::default() .with_line_protocol(&lp) - .with_max_seq(2) .with_min_time(inside_retention) .with_max_time(inside_retention); let file_fully_inside = partition.create_parquet_file(builder).await; @@ -550,7 +548,6 @@ mod tests { ); let builder = TestParquetFileBuilder::default() .with_line_protocol(&lp) - .with_max_seq(3) .with_min_time(outside_retention) .with_max_time(outside_retention); let _file_fully_outside = partition.create_parquet_file(builder).await; @@ -601,63 +598,54 @@ mod tests { let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=1 11") - .with_max_seq(2) .with_min_time(11) .with_max_time(11); let file111 = partition11.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=2 22") - .with_max_seq(4) .with_min_time(22) .with_max_time(22); let file112 = partition11.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=3 33") - .with_max_seq(6) .with_min_time(33) .with_max_time(33); let file113 = partition11.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=4 44") - .with_max_seq(8) .with_min_time(44) .with_max_time(44); let file114 = partition11.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=5 55") - .with_max_seq(10) .with_min_time(55) .with_max_time(55); let file115 = partition11.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=5 55") - .with_max_seq(2) .with_min_time(55) .with_max_time(55); let file121 = partition12.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=10 100") - .with_max_seq(2) .with_min_time(99) .with_max_time(99); let file122 = partition12.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table1 foo=10 100") - .with_max_seq(2) .with_min_time(100) .with_max_time(100); let _file123 = partition12.create_parquet_file(builder).await; let builder = TestParquetFileBuilder::default() .with_line_protocol("table2 foo=6 66") - .with_max_seq(2) .with_min_time(66) .with_max_time(66); let _file211 = partition21.create_parquet_file(builder).await; @@ -716,8 +704,7 @@ mod tests { let builder = IngesterPartitionBuilder::new(schema, &partition) .with_lp(["table,tag1=val1,tag2=val2 foo=3,bar=4 11"]); - let ingester_partition = - builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(1))); + let ingester_partition = builder.build(); let querier_table = TestQuerierTable::new(&catalog, &table) .await @@ -783,13 +770,10 @@ mod tests { let builder = IngesterPartitionBuilder::new(schema, &partition).with_lp(["table foo=1 1"]); // Parquet file between with max sequence number 2 - let pf_builder = TestParquetFileBuilder::default() - .with_line_protocol("table1 foo=1 11") - .with_max_seq(2); + let pf_builder = TestParquetFileBuilder::default().with_line_protocol("table1 foo=1 11"); partition.create_parquet_file(pf_builder).await; - let ingester_partition = - builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(2))); + let ingester_partition = builder.build(); let querier_table = TestQuerierTable::new(&catalog, &table) .await @@ -800,9 +784,7 @@ mod tests { assert_eq!(chunks.len(), 2); // Now, make a second chunk with max sequence number 3 - let pf_builder = TestParquetFileBuilder::default() - .with_line_protocol("table1 foo=1 22") - .with_max_seq(3); + let pf_builder = TestParquetFileBuilder::default().with_line_protocol("table1 foo=1 22"); partition.create_parquet_file(pf_builder).await; // With the same ingester response, still expect 2 chunks: one @@ -810,10 +792,8 @@ mod tests { let chunks = querier_table.chunks().await.unwrap(); assert_eq!(chunks.len(), 2); - // update the ingester response to return a new max parquet - // sequence number that includes the new file (3) - let ingester_partition = - builder.build_with_max_parquet_sequence_number(Some(SequenceNumber::new(3))); + // update the ingester response + let ingester_partition = builder.build(); let querier_table = querier_table .clear_ingester_partitions() diff --git a/querier/src/table/state_reconciler/interface.rs b/querier/src/table/state_reconciler/interface.rs index ba7bd95afe..5c7b4ed8e4 100644 --- a/querier/src/table/state_reconciler/interface.rs +++ b/querier/src/table/state_reconciler/interface.rs @@ -1,7 +1,7 @@ //! Interface for reconciling Ingester and catalog state use crate::{ingester::IngesterPartition, parquet::QuerierParquetChunk}; -use data_types::{CompactionLevel, ParquetFile, PartitionId, SequenceNumber}; +use data_types::{CompactionLevel, ParquetFile, PartitionId}; use std::{ops::Deref, sync::Arc}; /// Information about an ingester partition. @@ -9,17 +9,12 @@ use std::{ops::Deref, sync::Arc}; /// This is mostly the same as [`IngesterPartition`] but allows easier mocking. pub trait IngesterPartitionInfo { fn partition_id(&self) -> PartitionId; - fn parquet_max_sequence_number(&self) -> Option; } impl IngesterPartitionInfo for IngesterPartition { fn partition_id(&self) -> PartitionId { self.deref().partition_id() } - - fn parquet_max_sequence_number(&self) -> Option { - self.deref().parquet_max_sequence_number() - } } impl IngesterPartitionInfo for Arc @@ -29,10 +24,6 @@ where fn partition_id(&self) -> PartitionId { self.deref().partition_id() } - - fn parquet_max_sequence_number(&self) -> Option { - self.deref().parquet_max_sequence_number() - } } /// Information about a parquet file. @@ -40,7 +31,6 @@ where /// This is mostly the same as [`ParquetFile`] but allows easier mocking. pub trait ParquetFileInfo { fn partition_id(&self) -> PartitionId; - fn max_sequence_number(&self) -> SequenceNumber; fn compaction_level(&self) -> CompactionLevel; } @@ -49,10 +39,6 @@ impl ParquetFileInfo for Arc { self.partition_id } - fn max_sequence_number(&self) -> SequenceNumber { - self.max_sequence_number - } - fn compaction_level(&self) -> CompactionLevel { self.compaction_level } @@ -63,10 +49,6 @@ impl ParquetFileInfo for QuerierParquetChunk { self.meta().partition_id() } - fn max_sequence_number(&self) -> SequenceNumber { - self.meta().max_sequence_number() - } - fn compaction_level(&self) -> CompactionLevel { self.meta().compaction_level() } diff --git a/querier/src/table/test_util.rs b/querier/src/table/test_util.rs index f1b61b2554..e5a96135a7 100644 --- a/querier/src/table/test_util.rs +++ b/querier/src/table/test_util.rs @@ -4,7 +4,7 @@ use crate::{ IngesterPartition, }; use arrow::record_batch::RecordBatch; -use data_types::{ChunkId, SequenceNumber}; +use data_types::ChunkId; use iox_catalog::interface::{get_schema_by_name, SoftDeletedRows}; use iox_tests::{TestCatalog, TestPartition, TestTable}; use mutable_batch_lp::test_helpers::lp_to_mutable_batch; @@ -91,26 +91,14 @@ impl IngesterPartitionBuilder { self } - /// Create a ingester partition with the specified max parquet sequence number - pub(crate) fn build_with_max_parquet_sequence_number( - &self, - parquet_max_sequence_number: Option, - ) -> IngesterPartition { - self.build(parquet_max_sequence_number) - } - /// Create an ingester partition with the specified field values - pub(crate) fn build( - &self, - parquet_max_sequence_number: Option, - ) -> IngesterPartition { + pub(crate) fn build(&self) -> IngesterPartition { let data = self.lp.iter().map(|lp| lp_to_record_batch(lp)).collect(); IngesterPartition::new( Uuid::new_v4(), self.partition.partition.id, 0, - parquet_max_sequence_number, self.partition_sort_key.clone(), ) .try_add_chunk( diff --git a/service_grpc_catalog/src/lib.rs b/service_grpc_catalog/src/lib.rs index e19cba4762..22331ee194 100644 --- a/service_grpc_catalog/src/lib.rs +++ b/service_grpc_catalog/src/lib.rs @@ -172,7 +172,6 @@ fn to_parquet_file(p: data_types::ParquetFile) -> ParquetFile { table_id: p.table_id.get(), partition_id: p.partition_id.get(), object_store_id: p.object_store_id.to_string(), - max_sequence_number: p.max_sequence_number.get(), min_time: p.min_time.get(), max_time: p.max_time.get(), to_delete: p.to_delete.map(|t| t.get()).unwrap_or(0), @@ -198,9 +197,7 @@ fn to_partition(p: data_types::Partition) -> Partition { #[cfg(test)] mod tests { use super::*; - use data_types::{ - ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp, - }; + use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp}; use generated_types::influxdata::iox::catalog::v1::catalog_service_server::CatalogService; use iox_catalog::mem::MemCatalog; use uuid::Uuid; @@ -241,7 +238,6 @@ mod tests { table_id: table.id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(40), min_time: Timestamp::new(1), max_time: Timestamp::new(5), file_size_bytes: 2343, @@ -253,7 +249,6 @@ mod tests { }; let p2params = ParquetFileParams { object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(70), ..p1params.clone() }; p1 = repos.parquet_files().create(p1params).await.unwrap(); diff --git a/service_grpc_object_store/src/lib.rs b/service_grpc_object_store/src/lib.rs index 1abd7f825a..26dfc38bcc 100644 --- a/service_grpc_object_store/src/lib.rs +++ b/service_grpc_object_store/src/lib.rs @@ -96,9 +96,7 @@ impl object_store_service_server::ObjectStoreService for ObjectStoreService { mod tests { use super::*; use bytes::Bytes; - use data_types::{ - ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, SequenceNumber, Timestamp, - }; + use data_types::{ColumnId, ColumnSet, CompactionLevel, ParquetFileParams, Timestamp}; use generated_types::influxdata::iox::object_store::v1::object_store_service_server::ObjectStoreService; use iox_catalog::mem::MemCatalog; use object_store::{memory::InMemory, ObjectStore}; @@ -138,7 +136,6 @@ mod tests { table_id: table.id, partition_id: partition.id, object_store_id: Uuid::new_v4(), - max_sequence_number: SequenceNumber::new(40), min_time: Timestamp::new(1), max_time: Timestamp::new(5), file_size_bytes: 2343, From 05688799c42bc386fcadb695eb630b7299bd8333 Mon Sep 17 00:00:00 2001 From: "Christopher M. Wolff" Date: Wed, 3 May 2023 08:20:14 -0700 Subject: [PATCH 011/119] fix: handle aliases in gapfill aggregate columns (#7725) Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- .../tests/query_tests2/cases/in/gapfill.sql | 8 + .../cases/in/gapfill.sql.expected | 161 +++++++++------- .../src/logical_optimizer/handle_gapfill.rs | 176 ++++++++++++++---- 3 files changed, 238 insertions(+), 107 deletions(-) diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql index fde419cf6c..404edc97fe 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql +++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql @@ -100,3 +100,11 @@ from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:44:00Z' group by four_minute; +-- With an aliased aggregate column +SELECT + region, + date_bin_gapfill('10 minute', time) as minute, + locf(avg(cpu.user)) as locf_avg_user +from cpu +where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' +group by region, minute; diff --git a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected index a0b63e1b76..11c67bb74c 100644 --- a/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected +++ b/influxdb_iox/tests/query_tests2/cases/in/gapfill.sql.expected @@ -103,11 +103,11 @@ ---------- | plan_type | plan | ---------- -| logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) | +| logical_plan | Projection: cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time) AS minute, AVG(cpu.user) AS locf(AVG(cpu.user)) | | | GapFill: groupBy=[[cpu.region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[LOCF(AVG(cpu.user))]], time_column=date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time), stride=IntervalMonthDayNano("600000000000"), range=Included(TimestampNanosecond(957528000000000000, None))..Included(TimestampNanosecond(957531540000000000, None)) | | | Aggregate: groupBy=[[cpu.region, datebin(IntervalMonthDayNano("600000000000"), cpu.time) AS date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)]], aggr=[[AVG(cpu.user)]] | | | TableScan: cpu projection=[region, time, user], full_filters=[cpu.time >= TimestampNanosecond(957528000000000000, None), cpu.time <= TimestampNanosecond(957531540000000000, None)] | -| physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as AVG(cpu.user)] | +| physical_plan | ProjectionExec: expr=[region@0 as region, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 as minute, AVG(cpu.user)@2 as locf(AVG(cpu.user))] | | | GapFillExec: group_expr=[region@0, date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1], aggr_expr=[LOCF(AVG(cpu.user)@2)], stride=600000000000, time_range=Included("957528000000000000")..Included("957531540000000000") | | | SortPreservingMergeExec: [region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 ASC] | | | SortExec: expr=[region@0 ASC,date_bin_gapfill(IntervalMonthDayNano("600000000000"),cpu.time)@1 ASC] | @@ -122,76 +122,93 @@ | | | ---------- -- SQL: SELECT region, date_bin_gapfill(interval '5 minute', time) as minute, locf(min(cpu.user)) from cpu where time between timestamp '2000-05-05T12:15:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute; -+--------+----------------------+---------------+ -| region | minute | MIN(cpu.user) | -+--------+----------------------+---------------+ -| a | 2000-05-05T12:15:00Z | | -| a | 2000-05-05T12:20:00Z | 23.2 | -| a | 2000-05-05T12:25:00Z | 23.2 | -| a | 2000-05-05T12:30:00Z | 23.2 | -| a | 2000-05-05T12:35:00Z | 23.2 | -| a | 2000-05-05T12:40:00Z | 21.0 | -| a | 2000-05-05T12:45:00Z | 21.0 | -| a | 2000-05-05T12:50:00Z | 21.0 | -| a | 2000-05-05T12:55:00Z | 21.0 | -| b | 2000-05-05T12:15:00Z | | -| b | 2000-05-05T12:20:00Z | | -| b | 2000-05-05T12:25:00Z | | -| b | 2000-05-05T12:30:00Z | 25.2 | -| b | 2000-05-05T12:35:00Z | 28.9 | -| b | 2000-05-05T12:40:00Z | 28.9 | -| b | 2000-05-05T12:45:00Z | 28.9 | -| b | 2000-05-05T12:50:00Z | 28.9 | -| b | 2000-05-05T12:55:00Z | 28.9 | -+--------+----------------------+---------------+ ++--------+----------------------+---------------------+ +| region | minute | locf(MIN(cpu.user)) | ++--------+----------------------+---------------------+ +| a | 2000-05-05T12:15:00Z | | +| a | 2000-05-05T12:20:00Z | 23.2 | +| a | 2000-05-05T12:25:00Z | 23.2 | +| a | 2000-05-05T12:30:00Z | 23.2 | +| a | 2000-05-05T12:35:00Z | 23.2 | +| a | 2000-05-05T12:40:00Z | 21.0 | +| a | 2000-05-05T12:45:00Z | 21.0 | +| a | 2000-05-05T12:50:00Z | 21.0 | +| a | 2000-05-05T12:55:00Z | 21.0 | +| b | 2000-05-05T12:15:00Z | | +| b | 2000-05-05T12:20:00Z | | +| b | 2000-05-05T12:25:00Z | | +| b | 2000-05-05T12:30:00Z | 25.2 | +| b | 2000-05-05T12:35:00Z | 28.9 | +| b | 2000-05-05T12:40:00Z | 28.9 | +| b | 2000-05-05T12:45:00Z | 28.9 | +| b | 2000-05-05T12:50:00Z | 28.9 | +| b | 2000-05-05T12:55:00Z | 28.9 | ++--------+----------------------+---------------------+ -- SQL: SELECT date_bin_gapfill(interval '1 minute', time) as minute, locf(min(cpu.idle)) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by minute; -+----------------------+---------------+ -| minute | MIN(cpu.idle) | -+----------------------+---------------+ -| 2000-05-05T12:19:00Z | | -| 2000-05-05T12:20:00Z | 70.0 | -| 2000-05-05T12:21:00Z | 70.0 | -| 2000-05-05T12:22:00Z | 70.0 | -| 2000-05-05T12:23:00Z | 70.0 | -| 2000-05-05T12:24:00Z | 70.0 | -| 2000-05-05T12:25:00Z | 70.0 | -| 2000-05-05T12:26:00Z | 70.0 | -| 2000-05-05T12:27:00Z | 70.0 | -| 2000-05-05T12:28:00Z | 70.0 | -| 2000-05-05T12:29:00Z | 70.0 | -| 2000-05-05T12:30:00Z | 70.0 | -| 2000-05-05T12:31:00Z | 70.0 | -| 2000-05-05T12:32:00Z | 70.0 | -| 2000-05-05T12:33:00Z | 70.0 | -| 2000-05-05T12:34:00Z | 70.0 | -| 2000-05-05T12:35:00Z | 70.0 | -| 2000-05-05T12:36:00Z | 70.0 | -| 2000-05-05T12:37:00Z | 70.0 | -| 2000-05-05T12:38:00Z | 70.0 | -| 2000-05-05T12:39:00Z | 60.0 | -| 2000-05-05T12:40:00Z | 60.0 | -+----------------------+---------------+ ++----------------------+---------------------+ +| minute | locf(MIN(cpu.idle)) | ++----------------------+---------------------+ +| 2000-05-05T12:19:00Z | | +| 2000-05-05T12:20:00Z | 70.0 | +| 2000-05-05T12:21:00Z | 70.0 | +| 2000-05-05T12:22:00Z | 70.0 | +| 2000-05-05T12:23:00Z | 70.0 | +| 2000-05-05T12:24:00Z | 70.0 | +| 2000-05-05T12:25:00Z | 70.0 | +| 2000-05-05T12:26:00Z | 70.0 | +| 2000-05-05T12:27:00Z | 70.0 | +| 2000-05-05T12:28:00Z | 70.0 | +| 2000-05-05T12:29:00Z | 70.0 | +| 2000-05-05T12:30:00Z | 70.0 | +| 2000-05-05T12:31:00Z | 70.0 | +| 2000-05-05T12:32:00Z | 70.0 | +| 2000-05-05T12:33:00Z | 70.0 | +| 2000-05-05T12:34:00Z | 70.0 | +| 2000-05-05T12:35:00Z | 70.0 | +| 2000-05-05T12:36:00Z | 70.0 | +| 2000-05-05T12:37:00Z | 70.0 | +| 2000-05-05T12:38:00Z | 70.0 | +| 2000-05-05T12:39:00Z | 60.0 | +| 2000-05-05T12:40:00Z | 60.0 | ++----------------------+---------------------+ -- SQL: SELECT date_bin_gapfill(interval '4 minutes', time) as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:40:00Z' group by four_minute; -+----------------------+---------------+---------------+-----------------+ -| four_minute | MIN(cpu.idle) | MIN(cpu.user) | COUNT(UInt8(1)) | -+----------------------+---------------+---------------+-----------------+ -| 2000-05-05T12:16:00Z | | | | -| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 | -| 2000-05-05T12:24:00Z | 67.5 | 24.2 | | -| 2000-05-05T12:28:00Z | 65.0 | 25.2 | 1 | -| 2000-05-05T12:32:00Z | 62.5 | 27.05 | | -| 2000-05-05T12:36:00Z | 60.0 | 28.9 | 1 | -| 2000-05-05T12:40:00Z | | 21.0 | 1 | -+----------------------+---------------+---------------+-----------------+ ++----------------------+----------------------------+----------------------------+-----------------+ +| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) | ++----------------------+----------------------------+----------------------------+-----------------+ +| 2000-05-05T12:16:00Z | | | | +| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 | +| 2000-05-05T12:24:00Z | 67.5 | 24.2 | | +| 2000-05-05T12:28:00Z | 65.0 | 25.2 | 1 | +| 2000-05-05T12:32:00Z | 62.5 | 27.05 | | +| 2000-05-05T12:36:00Z | 60.0 | 28.9 | 1 | +| 2000-05-05T12:40:00Z | | 21.0 | 1 | ++----------------------+----------------------------+----------------------------+-----------------+ -- SQL: SELECT date_bin_gapfill(interval '4 minutes 1 nanosecond', time, timestamp '2000-05-05T12:15:59.999999999') as four_minute, interpolate(min(cpu.idle)), interpolate(min(cpu."user")), count(*) from cpu where time between timestamp '2000-05-05T12:19:00Z' and timestamp '2000-05-05T12:44:00Z' group by four_minute; -+--------------------------------+---------------+---------------+-----------------+ -| four_minute | MIN(cpu.idle) | MIN(cpu.user) | COUNT(UInt8(1)) | -+--------------------------------+---------------+---------------+-----------------+ -| 2000-05-05T12:15:59.999999999Z | | | | -| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 | -| 2000-05-05T12:24:00.000000001Z | 67.5 | 24.2 | | -| 2000-05-05T12:28:00.000000002Z | 65.0 | 25.2 | 1 | -| 2000-05-05T12:32:00.000000003Z | 62.5 | 23.1 | | -| 2000-05-05T12:36:00.000000004Z | 60.0 | 21.0 | 2 | -| 2000-05-05T12:40:00.000000005Z | | | | -+--------------------------------+---------------+---------------+-----------------+ \ No newline at end of file ++--------------------------------+----------------------------+----------------------------+-----------------+ +| four_minute | interpolate(MIN(cpu.idle)) | interpolate(MIN(cpu.user)) | COUNT(UInt8(1)) | ++--------------------------------+----------------------------+----------------------------+-----------------+ +| 2000-05-05T12:15:59.999999999Z | | | | +| 2000-05-05T12:20:00Z | 70.0 | 23.2 | 1 | +| 2000-05-05T12:24:00.000000001Z | 67.5 | 24.2 | | +| 2000-05-05T12:28:00.000000002Z | 65.0 | 25.2 | 1 | +| 2000-05-05T12:32:00.000000003Z | 62.5 | 23.1 | | +| 2000-05-05T12:36:00.000000004Z | 60.0 | 21.0 | 2 | +| 2000-05-05T12:40:00.000000005Z | | | | ++--------------------------------+----------------------------+----------------------------+-----------------+ +-- SQL: SELECT region, date_bin_gapfill('10 minute', time) as minute, locf(avg(cpu.user)) as locf_avg_user from cpu where time between timestamp '2000-05-05T12:00:00Z' and timestamp '2000-05-05T12:59:00Z' group by region, minute; ++--------+----------------------+--------------------+ +| region | minute | locf_avg_user | ++--------+----------------------+--------------------+ +| a | 2000-05-05T12:00:00Z | | +| a | 2000-05-05T12:10:00Z | | +| a | 2000-05-05T12:20:00Z | 23.2 | +| a | 2000-05-05T12:30:00Z | 23.2 | +| a | 2000-05-05T12:40:00Z | 21.0 | +| a | 2000-05-05T12:50:00Z | 21.0 | +| b | 2000-05-05T12:00:00Z | | +| b | 2000-05-05T12:10:00Z | | +| b | 2000-05-05T12:20:00Z | | +| b | 2000-05-05T12:30:00Z | 27.049999999999997 | +| b | 2000-05-05T12:40:00Z | 27.049999999999997 | +| b | 2000-05-05T12:50:00Z | 27.049999999999997 | ++--------+----------------------+--------------------+ \ No newline at end of file diff --git a/iox_query/src/logical_optimizer/handle_gapfill.rs b/iox_query/src/logical_optimizer/handle_gapfill.rs index 23d346dcc7..6c7526e5e1 100644 --- a/iox_query/src/logical_optimizer/handle_gapfill.rs +++ b/iox_query/src/logical_optimizer/handle_gapfill.rs @@ -14,6 +14,7 @@ use datafusion::{ optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule}, prelude::{col, Expr}, }; +use hashbrown::{hash_map, HashMap}; use query_functions::gapfill::{DATE_BIN_GAPFILL_UDF_NAME, INTERPOLATE_UDF_NAME, LOCF_UDF_NAME}; use std::{ collections::HashSet, @@ -205,9 +206,6 @@ fn build_gapfill_node( .collect(); let aggr_expr = new_group_expr.split_off(aggr.group_expr.len()); - // For now, we can only fill with null values. - // In the future, this rule will allow a projection to be pushed into the - // GapFill node, e.g., if it contains an item like `LOCF()`. let fill_behavior = aggr_expr .iter() .cloned() @@ -365,6 +363,16 @@ fn udf_to_fill_strategy(name: &str) -> Option { } } +fn fill_strategy_to_udf(fs: &FillStrategy) -> Result<&'static str> { + match fs { + FillStrategy::PrevNullAsMissing => Ok(LOCF_UDF_NAME), + FillStrategy::LinearInterpolate => Ok(INTERPOLATE_UDF_NAME), + _ => Err(DataFusionError::Internal(format!( + "unknown UDF for fill strategy {fs:?}" + ))), + } +} + fn handle_projection(proj: &Projection) -> Result> { let Projection { input, @@ -381,49 +389,32 @@ fn handle_projection(proj: &Projection) -> Result> { return Ok(None) }; - let fill_cols: Vec<(&Expr, FillStrategy, &str)> = proj_exprs + let mut fill_fn_rewriter = FillFnRewriter { + aggr_col_fill_map: HashMap::new(), + }; + let new_proj_exprs = proj_exprs .iter() - .filter_map(|e| match e { - Expr::ScalarUDF { fun, args } => { - if let Some(strategy) = udf_to_fill_strategy(&fun.name) { - let col = &args[0]; - Some((col, strategy, fun.name.as_str())) - } else { - None - } - } - _ => None, - }) - .collect(); - if fill_cols.is_empty() { - // No special gap-filling functions, nothing to do. + .map(|e| e.clone().rewrite(&mut fill_fn_rewriter)) + .collect::>>()?; + + let FillFnRewriter { aggr_col_fill_map } = fill_fn_rewriter; + if aggr_col_fill_map.is_empty() { return Ok(None); } // Clone the existing GapFill node, then modify it in place // to reflect the new fill strategy. let mut new_gapfill = child_gapfill.clone(); - for (e, fs, fn_name) in fill_cols { - if new_gapfill.replace_fill_strategy(e, fs).is_none() { + for (e, fs) in aggr_col_fill_map { + let udf = fill_strategy_to_udf(&fs)?; + if new_gapfill.replace_fill_strategy(&e, fs).is_none() { // There was a gap filling function called on a non-aggregate column. return Err(DataFusionError::Plan(format!( - "{fn_name} must be called on an aggregate column in a gap-filling query" + "{udf} must be called on an aggregate column in a gap-filling query", ))); } } - // Remove the gap filling functions from the projection. - let new_proj_exprs: Vec = proj_exprs - .iter() - .cloned() - .map(|e| match e { - Expr::ScalarUDF { fun, mut args } if udf_to_fill_strategy(&fun.name).is_some() => { - args.remove(0) - } - _ => e, - }) - .collect(); - let new_proj = { let mut proj = proj.clone(); proj.expr = new_proj_exprs; @@ -437,6 +428,58 @@ fn handle_projection(proj: &Projection) -> Result> { Ok(Some(new_proj)) } +/// Implements `TreeNodeRewriter`: +/// - Traverses over the expressions in a projection node +/// - If it finds `locf(col)` or `interpolate(col)`, +/// it replaces them with `col AS ` +/// - Collects into [`Self::aggr_col_fill_map`] which correlates +/// aggregate columns to their [`FillStrategy`]. +struct FillFnRewriter { + aggr_col_fill_map: HashMap, +} + +impl TreeNodeRewriter for FillFnRewriter { + type N = Expr; + fn pre_visit(&mut self, expr: &Expr) -> Result { + match expr { + Expr::ScalarUDF { fun, .. } if udf_to_fill_strategy(&fun.name).is_some() => { + Ok(RewriteRecursion::Mutate) + } + _ => Ok(RewriteRecursion::Continue), + } + } + + fn mutate(&mut self, expr: Expr) -> Result { + let orig_name = expr.display_name()?; + match expr { + Expr::ScalarUDF { ref fun, .. } if udf_to_fill_strategy(&fun.name).is_none() => { + Ok(expr) + } + Expr::ScalarUDF { fun, mut args } => { + let fs = udf_to_fill_strategy(&fun.name).expect("must be a fill fn"); + let arg = args.remove(0); + self.add_fill_strategy(arg.clone(), fs)?; + Ok(arg.alias(orig_name)) + } + _ => Ok(expr), + } + } +} + +impl FillFnRewriter { + fn add_fill_strategy(&mut self, e: Expr, fs: FillStrategy) -> Result<()> { + match self.aggr_col_fill_map.entry(e) { + hash_map::Entry::Occupied(_) => Err(DataFusionError::NotImplemented( + "multiple fill strategies for the same column".to_string(), + )), + hash_map::Entry::Vacant(ve) => { + ve.insert(fs); + Ok(()) + } + } + } +} + fn count_udf(e: &Expr, name: &str) -> Result { let mut count = 0; e.apply(&mut |expr| { @@ -659,6 +702,35 @@ mod test { Ok(()) } + #[test] + fn different_fill_strategies_one_col() -> Result<()> { + let plan = LogicalPlanBuilder::from(table_scan()?) + .filter( + col("time") + .gt_eq(lit_timestamp_nano(1000)) + .and(col("time").lt(lit_timestamp_nano(2000))), + )? + .aggregate( + vec![ + col("loc"), + date_bin_gapfill(lit(ScalarValue::IntervalDayTime(Some(60_000))), col("time"))?, + ], + vec![avg(col("temp")), min(col("temp"))], + )? + .project(vec![ + locf(col("loc"))?, + col("date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)"), + locf(col("AVG(temps.temp)"))?, + interpolate(col("AVG(temps.temp)"))?, + ])? + .build()?; + assert_optimizer_err( + &plan, + "This feature is not implemented: multiple fill strategies for the same column", + ); + Ok(()) + } + #[test] fn nonscalar_origin() -> Result<()> { let plan = LogicalPlanBuilder::from(table_scan()?) @@ -928,7 +1000,7 @@ mod test { format_optimized_plan(&plan)?, @r###" --- - - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp), MIN(temps.temp)" + - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS locf(AVG(temps.temp)), MIN(temps.temp) AS locf(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[LOCF(AVG(temps.temp)), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" @@ -937,6 +1009,40 @@ mod test { Ok(()) } + #[test] + fn with_locf_aliased() -> Result<()> { + let plan = LogicalPlanBuilder::from(table_scan()?) + .filter( + col("time") + .gt_eq(lit_timestamp_nano(1000)) + .and(col("time").lt(lit_timestamp_nano(2000))), + )? + .aggregate( + vec![date_bin_gapfill( + lit(ScalarValue::IntervalDayTime(Some(60_000))), + col("time"), + )?], + vec![avg(col("temp")), min(col("temp"))], + )? + .project(vec![ + col("date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)"), + locf(col("MIN(temps.temp)"))?.alias("locf_min_temp"), + ])? + .build()?; + + insta::assert_yaml_snapshot!( + format_optimized_plan(&plan)?, + @r###" + --- + - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), MIN(temps.temp) AS locf(MIN(temps.temp)) AS locf_min_temp" + - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), LOCF(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" + - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" + - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" + - " TableScan: temps" + "###); + Ok(()) + } + #[test] fn with_interpolate() -> Result<()> { let plan = LogicalPlanBuilder::from(table_scan()?) @@ -963,7 +1069,7 @@ mod test { format_optimized_plan(&plan)?, @r###" --- - - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp), MIN(temps.temp)" + - "Projection: date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), AVG(temps.temp) AS interpolate(AVG(temps.temp)), MIN(temps.temp) AS interpolate(MIN(temps.temp))" - " GapFill: groupBy=[[date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[INTERPOLATE(AVG(temps.temp)), INTERPOLATE(MIN(temps.temp))]], time_column=date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time), stride=IntervalDayTime(\"60000\"), range=Included(TimestampNanosecond(1000, None))..Excluded(TimestampNanosecond(2000, None))" - " Aggregate: groupBy=[[datebin(IntervalDayTime(\"60000\"), temps.time) AS date_bin_gapfill(IntervalDayTime(\"60000\"),temps.time)]], aggr=[[AVG(temps.temp), MIN(temps.temp)]]" - " Filter: temps.time >= TimestampNanosecond(1000, None) AND temps.time < TimestampNanosecond(2000, None)" From abe5d26d2f52d660c9a1ea17923ea5e9172fa5be Mon Sep 17 00:00:00 2001 From: Marco Neumann Date: Wed, 3 May 2023 18:40:00 +0200 Subject: [PATCH 012/119] chore: update DataFusion to `2787e7a36a6be83d91201df20827d3695f933300` (#7732) Required to get: - https://github.com/apache/arrow-datafusion/pull/6199 --- Cargo.lock | 18 +++++++++--------- Cargo.toml | 4 ++-- workspace-hack/Cargo.toml | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b2685fc3a..dd26a90177 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1432,7 +1432,7 @@ dependencies = [ [[package]] name = "datafusion" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "ahash 0.8.3", "arrow", @@ -1481,7 +1481,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "arrow", "arrow-array", @@ -1495,7 +1495,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "dashmap", "datafusion-common", @@ -1512,7 +1512,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "ahash 0.8.3", "arrow", @@ -1523,7 +1523,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "arrow", "async-trait", @@ -1540,7 +1540,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "ahash 0.8.3", "arrow", @@ -1572,7 +1572,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "arrow", "chrono", @@ -1586,7 +1586,7 @@ dependencies = [ [[package]] name = "datafusion-row" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "arrow", "datafusion-common", @@ -1597,7 +1597,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "23.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=beee1d91303d5eff220fadd08b2c28404c2b3e5a#beee1d91303d5eff220fadd08b2c28404c2b3e5a" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2787e7a36a6be83d91201df20827d3695f933300#2787e7a36a6be83d91201df20827d3695f933300" dependencies = [ "arrow", "arrow-schema", diff --git a/Cargo.toml b/Cargo.toml index 31910e771a..ea50bde0f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,8 +115,8 @@ license = "MIT OR Apache-2.0" [workspace.dependencies] arrow = { version = "38.0.0" } arrow-flight = { version = "38.0.0" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="beee1d91303d5eff220fadd08b2c28404c2b3e5a" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300", default-features = false } +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev="2787e7a36a6be83d91201df20827d3695f933300" } hashbrown = { version = "0.13.2" } parquet = { version = "38.0.0" } tonic = { version = "0.9.2", features = ["tls", "tls-webpki-roots"] } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 2531f168b1..763595b17e 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -30,9 +30,9 @@ bytes = { version = "1" } chrono = { version = "0.4", default-features = false, features = ["alloc", "clock", "serde"] } crossbeam-utils = { version = "0.8" } crypto-common = { version = "0.1", default-features = false, features = ["std"] } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a" } -datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } -datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "beee1d91303d5eff220fadd08b2c28404c2b3e5a", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300" } +datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } +datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "2787e7a36a6be83d91201df20827d3695f933300", default-features = false, features = ["crypto_expressions", "regex_expressions", "unicode_expressions"] } digest = { version = "0.10", features = ["mac", "std"] } either = { version = "1" } fixedbitset = { version = "0.4" } From 6de18b6544978c287ac76156b40bc9a805df0b56 Mon Sep 17 00:00:00 2001 From: Joe-Blount <73478756+Joe-Blount@users.noreply.github.com> Date: Wed, 3 May 2023 15:09:00 -0500 Subject: [PATCH 013/119] chore: conditionally parse shard_id from HOSTNAME (#7733) * chore: conditionally parse shard_id from HOSTNAME * chore: remove HOSTNAME env from test case relying on it not being there. --- clap_blocks/src/compactor2.rs | 8 +++++++ compactor2/src/components/hardcoded.rs | 5 ++++ influxdb_iox/src/commands/run/all_in_one.rs | 1 + .../tests/end_to_end_cases/compactor.rs | 15 ++++++++++++ ioxd_compactor2/src/lib.rs | 24 +++++++++++++++++-- 5 files changed, 51 insertions(+), 2 deletions(-) diff --git a/clap_blocks/src/compactor2.rs b/clap_blocks/src/compactor2.rs index 64584a802f..6b6cf5d64f 100644 --- a/clap_blocks/src/compactor2.rs +++ b/clap_blocks/src/compactor2.rs @@ -235,6 +235,7 @@ pub struct Compactor2Config { /// Number of shards. /// /// If this is set then the shard ID MUST also be set. If both are not provided, sharding is disabled. + /// (shard ID can be provided by the host name) #[clap( long = "compaction-shard-count", env = "INFLUXDB_IOX_COMPACTION_SHARD_COUNT", @@ -254,6 +255,13 @@ pub struct Compactor2Config { )] pub shard_id: Option, + /// Host Name + /// + /// comprised of leading text (e.g. 'iox-shared-compactor-'), ending with shard_id (e.g. '0'). + /// When shard_count is specified, but shard_id is not specified, the id is extracted from hostname. + #[clap(long = "hostname", env = "HOSTNAME", action)] + pub hostname: Option, + /// Minimum number of L1 files to compact to L2. /// /// If there are more than this many L1 (by definition non diff --git a/compactor2/src/components/hardcoded.rs b/compactor2/src/components/hardcoded.rs index 4a6c04f17b..491e1ad7a0 100644 --- a/compactor2/src/components/hardcoded.rs +++ b/compactor2/src/components/hardcoded.rs @@ -6,6 +6,7 @@ use std::{sync::Arc, time::Duration}; use data_types::CompactionLevel; use object_store::memory::InMemory; +use observability_deps::tracing::info; use crate::{ config::{CompactionType, Config, PartitionsSourceConfig}, @@ -156,6 +157,10 @@ fn make_partitions_source_commit_partition_sink( let mut id_only_partition_filters: Vec> = vec![]; if let Some(shard_config) = &config.shard_config { // add shard filter before performing any catalog IO + info!( + "starting compactor {} of {}", + shard_config.shard_id, shard_config.n_shards + ); id_only_partition_filters.push(Arc::new(ShardPartitionFilter::new( shard_config.n_shards, shard_config.shard_id, diff --git a/influxdb_iox/src/commands/run/all_in_one.rs b/influxdb_iox/src/commands/run/all_in_one.rs index ae136ee291..ca519aecf5 100644 --- a/influxdb_iox/src/commands/run/all_in_one.rs +++ b/influxdb_iox/src/commands/run/all_in_one.rs @@ -505,6 +505,7 @@ impl Config { ignore_partition_skip_marker: false, shard_count: None, shard_id: None, + hostname: None, min_num_l1_files_to_compact: 1, process_once: false, process_all_partitions: false, diff --git a/influxdb_iox/tests/end_to_end_cases/compactor.rs b/influxdb_iox/tests/end_to_end_cases/compactor.rs index d9490a11d4..7744c011ad 100644 --- a/influxdb_iox/tests/end_to_end_cases/compactor.rs +++ b/influxdb_iox/tests/end_to_end_cases/compactor.rs @@ -66,6 +66,7 @@ fn num_shards_without_shard_id_is_invalid() { .arg("compactor2") .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "1") // only provide shard count .env("INFLUXDB_IOX_CATALOG_TYPE", "memory") + .env_remove("HOSTNAME") .assert() .failure() .stderr(predicate::str::contains( @@ -73,6 +74,20 @@ fn num_shards_without_shard_id_is_invalid() { )); } +#[test] +fn num_shards_with_hostname_is_valid() { + Command::cargo_bin("influxdb_iox") + .unwrap() + .arg("run") + .arg("compactor2") + .env("INFLUXDB_IOX_COMPACTION_SHARD_COUNT", "3") // provide shard count + .env("HOSTNAME", "iox-shared-compactor-8") // provide shard id via hostname + .env("INFLUXDB_IOX_CATALOG_TYPE", "memory") + .assert() + .failure() + .stderr(predicate::str::contains("shard_id out of range")); +} + #[tokio::test] async fn sharded_compactor_0_always_compacts_partition_1() { test_helpers::maybe_start_logging(); diff --git a/ioxd_compactor2/src/lib.rs b/ioxd_compactor2/src/lib.rs index 5f4901cc1d..397e4aa24e 100644 --- a/ioxd_compactor2/src/lib.rs +++ b/ioxd_compactor2/src/lib.rs @@ -143,11 +143,31 @@ pub async fn create_compactor2_server_type( ) -> Arc { let backoff_config = BackoffConfig::default(); + // if shard_count is specified, shard_id must be provided also. + // shard_id may be specified explicitly or extracted from the host name. + let mut shard_id = compactor_config.shard_id; + if shard_id.is_none() + && compactor_config.shard_count.is_some() + && compactor_config.hostname.is_some() + { + let parsed_id = compactor_config + .hostname + .unwrap() + .chars() + .skip_while(|ch| !ch.is_ascii_digit()) + .take_while(|ch| ch.is_ascii_digit()) + .fold(None, |acc, ch| { + ch.to_digit(10).map(|b| acc.unwrap_or(0) * 10 + b) + }); + if parsed_id.is_some() { + shard_id = Some(parsed_id.unwrap() as usize); + } + } assert!( - compactor_config.shard_id.is_some() == compactor_config.shard_count.is_some(), + shard_id.is_some() == compactor_config.shard_count.is_some(), "must provide or not provide shard ID and count" ); - let shard_config = compactor_config.shard_id.map(|shard_id| ShardConfig { + let shard_config = shard_id.map(|shard_id| ShardConfig { shard_id, n_shards: compactor_config.shard_count.expect("just checked"), }); From 2d601bf21159d6201c5cc3bb7d338ebeb2be4c35 Mon Sep 17 00:00:00 2001 From: Nga Tran Date: Wed, 3 May 2023 18:29:26 -0400 Subject: [PATCH 014/119] test: num files to read exceed the max_parquet_fanout and all sorted files are resorted (#7737) Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com> --- influxdb_iox/tests/query_tests2/cases.rs | 12 +++++++++ .../cases/in/duplicates_parquet_50_files.sql | 14 +++++++++++ .../duplicates_parquet_50_files.sql.expected | 25 +++++++++++++++++++ influxdb_iox/tests/query_tests2/setups.rs | 22 ++++++++++++++++ 4 files changed, 73 insertions(+) create mode 100644 influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql create mode 100644 influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected diff --git a/influxdb_iox/tests/query_tests2/cases.rs b/influxdb_iox/tests/query_tests2/cases.rs index 7113ae1eca..6c8d803a6f 100644 --- a/influxdb_iox/tests/query_tests2/cases.rs +++ b/influxdb_iox/tests/query_tests2/cases.rs @@ -121,6 +121,18 @@ async fn duplicates_parquet_many() { .await; } +#[tokio::test] +async fn duplicates_parquet_50() { + test_helpers::maybe_start_logging(); + + TestCase { + input: "cases/in/duplicates_parquet_50_files.sql", + chunk_stage: ChunkStage::Parquet, + } + .run() + .await; +} + #[tokio::test] async fn gapfill() { test_helpers::maybe_start_logging(); diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql new file mode 100644 index 0000000000..704756cd9d --- /dev/null +++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql @@ -0,0 +1,14 @@ +-- Test setup for running with 50 parquet files +-- IOX_SETUP: FiftySortedSameParquetFiles + + +-- each parquet file has either 2 rows, one with f=1 and the other with f=2 +-- and then there are 50 that have a single row with f=3 +select count(1), sum(f1) from m; + +-- All 50 files are sorted but since it is larger than max_parquet_fanout which is set 40, +-- we do not use the presort and add a SortExec +-- WHen running this test, a warning "cannot use pre-sorted parquet files, fan-out too wide" is printed +-- IOX_COMPARE: uuid +EXPLAIN select count(1), sum(f1) from m; + diff --git a/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected new file mode 100644 index 0000000000..0e397532e6 --- /dev/null +++ b/influxdb_iox/tests/query_tests2/cases/in/duplicates_parquet_50_files.sql.expected @@ -0,0 +1,25 @@ +-- Test Setup: FiftySortedSameParquetFiles +-- SQL: select count(1), sum(f1) from m; ++-----------------+-----------+ +| COUNT(Int64(1)) | SUM(m.f1) | ++-----------------+-----------+ +| 1 | 1.0 | ++-----------------+-----------+ +-- SQL: EXPLAIN select count(1), sum(f1) from m; +-- Results After Normalizing UUIDs +---------- +| plan_type | plan | +---------- +| logical_plan | Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)), SUM(m.f1)]] | +| | TableScan: m projection=[f1] | +| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[COUNT(Int64(1)), SUM(m.f1)] | +| | RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 | +| | ProjectionExec: expr=[f1@1 as f1] | +| | DeduplicateExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC] | +| | SortPreservingMergeExec: [tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] | +| | SortExec: expr=[tag1@2 ASC,tag2@3 ASC,tag3@4 ASC,tag4@5 ASC,time@6 ASC,__chunk_order@0 ASC] | +| | ParquetExec: limit=None, partitions={4 groups: [[1/1/1/00000000-0000-0000-0000-000000000000.parquet, 1/1/1/00000000-0000-0000-0000-000000000001.parquet, 1/1/1/00000000-0000-0000-0000-000000000002.parquet, 1/1/1/00000000-0000-0000-0000-000000000003.parquet, 1/1/1/00000000-0000-0000-0000-000000000004.parquet, 1/1/1/00000000-0000-0000-0000-000000000005.parquet, 1/1/1/00000000-0000-0000-0000-000000000006.parquet, 1/1/1/00000000-0000-0000-0000-000000000007.parquet, 1/1/1/00000000-0000-0000-0000-000000000008.parquet, 1/1/1/00000000-0000-0000-0000-000000000009.parquet, 1/1/1/00000000-0000-0000-0000-00000000000a.parquet, 1/1/1/00000000-0000-0000-0000-00000000000b.parquet, 1/1/1/00000000-0000-0000-0000-00000000000c.parquet], [1/1/1/00000000-0000-0000-0000-00000000000d.parquet, 1/1/1/00000000-0000-0000-0000-00000000000e.parquet, 1/1/1/00000000-0000-0000-0000-00000000000f.parquet, 1/1/1/00000000-0000-0000-0000-000000000010.parquet, 1/1/1/00000000-0000-0000-0000-000000000011.parquet, 1/1/1/00000000-0000-0000-0000-000000000012.parquet, 1/1/1/00000000-0000-0000-0000-000000000013.parquet, 1/1/1/00000000-0000-0000-0000-000000000014.parquet, 1/1/1/00000000-0000-0000-0000-000000000015.parquet, 1/1/1/00000000-0000-0000-0000-000000000016.parquet, 1/1/1/00000000-0000-0000-0000-000000000017.parquet, 1/1/1/00000000-0000-0000-0000-000000000018.parquet, 1/1/1/00000000-0000-0000-0000-000000000019.parquet], [1/1/1/00000000-0000-0000-0000-00000000001a.parquet, 1/1/1/00000000-0000-0000-0000-00000000001b.parquet, 1/1/1/00000000-0000-0000-0000-00000000001c.parquet, 1/1/1/00000000-0000-0000-0000-00000000001d.parquet, 1/1/1/00000000-0000-0000-0000-00000000001e.parquet, 1/1/1/00000000-0000-0000-0000-00000000001f.parquet, 1/1/1/00000000-0000-0000-0000-000000000020.parquet, 1/1/1/00000000-0000-0000-0000-000000000021.parquet, 1/1/1/00000000-0000-0000-0000-000000000022.parquet, 1/1/1/00000000-0000-0000-0000-000000000023.parquet, 1/1/1/00000000-0000-0000-0000-000000000024.parquet, 1/1/1/00000000-0000-0000-0000-000000000025.parquet], [1/1/1/00000000-0000-0000-0000-000000000026.parquet, 1/1/1/00000000-0000-0000-0000-000000000027.parquet, 1/1/1/00000000-0000-0000-0000-000000000028.parquet, 1/1/1/00000000-0000-0000-0000-000000000029.parquet, 1/1/1/00000000-0000-0000-0000-00000000002a.parquet, 1/1/1/00000000-0000-0000-0000-00000000002b.parquet, 1/1/1/00000000-0000-0000-0000-00000000002c.parquet, 1/1/1/00000000-0000-0000-0000-00000000002d.parquet, 1/1/1/00000000-0000-0000-0000-00000000002e.parquet, 1/1/1/00000000-0000-0000-0000-00000000002f.parquet, 1/1/1/00000000-0000-0000-0000-000000000030.parquet, 1/1/1/00000000-0000-0000-0000-000000000031.parquet]]}, projection=[__chunk_order, f1, tag1, tag2, tag3, tag4, time] | +| | | +---------- \ No newline at end of file diff --git a/influxdb_iox/tests/query_tests2/setups.rs b/influxdb_iox/tests/query_tests2/setups.rs index 74d5b2df59..c87a275942 100644 --- a/influxdb_iox/tests/query_tests2/setups.rs +++ b/influxdb_iox/tests/query_tests2/setups.rs @@ -269,6 +269,28 @@ pub static SETUPS: Lazy> = Lazy::new(|| { }) .collect::>(), ), + ( + "FiftySortedSameParquetFiles", + (0..50) + .flat_map(|_i| { + + let write = Step::WriteLineProtocol( + "m,tag1=A,tag2=B,tag3=C,tag4=D f1=1,f2=2 2001".into(), // duplicated across all chunks + ); + + [ + Step::RecordNumParquetFiles, + write, + Step::Persist, + Step::WaitForPersisted2 { + expected_increase: 1, + }, + ] + .into_iter() + }) + .collect::>(), + ), + ( "OneMeasurementManyFields", vec![ From ccacd7e78eb54353bf213e32e282c962b2a66294 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Thu, 4 May 2023 18:25:19 +1000 Subject: [PATCH 015/119] chore: Fix doc --- influxdb_influxql_parser/src/select.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb_influxql_parser/src/select.rs b/influxdb_influxql_parser/src/select.rs index 0a138e1b89..0ef658ba57 100644 --- a/influxdb_influxql_parser/src/select.rs +++ b/influxdb_influxql_parser/src/select.rs @@ -44,7 +44,7 @@ pub struct SelectStatement { /// Expressions used for grouping the selection. pub group_by: Option, - /// The [fill clause] specifies the fill behaviour for the selection. If the value is [`None`], + /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`], /// it is the same behavior as `fill(none)`. /// /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill From b47e0efc85888d33afced07d8eafdfe838217222 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Thu, 4 May 2023 18:27:32 +1000 Subject: [PATCH 016/119] feat: Step 1 on N for an intermediate representation of `SELECT` The end goal is that each `Select` node will contain a schema to be referenced directly by the InfluxQL planner. Additionally, further refinement of the field data types used by the `Select` node are expected, to remove ambiguity from the planner. --- .../src/plan/expr_type_evaluator.rs | 257 ++++++++++++------ iox_query_influxql/src/plan/field.rs | 27 +- iox_query_influxql/src/plan/ir.rs | 92 +++++++ iox_query_influxql/src/plan/mod.rs | 1 + iox_query_influxql/src/plan/rewriter.rs | 199 +++++--------- 5 files changed, 362 insertions(+), 214 deletions(-) create mode 100644 iox_query_influxql/src/plan/ir.rs diff --git a/iox_query_influxql/src/plan/expr_type_evaluator.rs b/iox_query_influxql/src/plan/expr_type_evaluator.rs index ae87158002..b563ef97b1 100644 --- a/iox_query_influxql/src/plan/expr_type_evaluator.rs +++ b/iox_query_influxql/src/plan/expr_type_evaluator.rs @@ -1,33 +1,33 @@ use crate::plan::field::field_by_name; use crate::plan::field_mapper::map_type; +use crate::plan::ir::TableReference; use crate::plan::{error, SchemaProvider}; use datafusion::common::Result; -use influxdb_influxql_parser::common::{MeasurementName, QualifiedMeasurementName}; use influxdb_influxql_parser::expression::{ Binary, BinaryOperator, Call, Expr, VarRef, VarRefDataType, }; use influxdb_influxql_parser::literal::Literal; -use influxdb_influxql_parser::select::{Dimension, FromMeasurementClause, MeasurementSelection}; +use influxdb_influxql_parser::select::Dimension; use itertools::Itertools; /// Evaluate the type of the specified expression. /// /// Derived from [Go implementation](https://github.com/influxdata/influxql/blob/1ba470371ec093d57a726b143fe6ccbacf1b452b/ast.go#L4796-L4797). -pub(crate) fn evaluate_type( +pub(super) fn evaluate_type( s: &dyn SchemaProvider, expr: &Expr, - from: &FromMeasurementClause, + from: &[TableReference], ) -> Result> { TypeEvaluator::new(from, s).eval_type(expr) } struct TypeEvaluator<'a> { s: &'a dyn SchemaProvider, - from: &'a FromMeasurementClause, + from: &'a [TableReference], } impl<'a> TypeEvaluator<'a> { - fn new(from: &'a FromMeasurementClause, s: &'a dyn SchemaProvider) -> Self { + fn new(from: &'a [TableReference], s: &'a dyn SchemaProvider) -> Self { Self { from, s } } @@ -98,14 +98,11 @@ impl<'a> TypeEvaluator<'a> { } _ => { let mut data_type: Option = None; - for ms in self.from.iter() { - match ms { - MeasurementSelection::Name(QualifiedMeasurementName { - name: MeasurementName::Name(ident), - .. - }) => match ( + for tr in self.from.iter() { + match tr { + TableReference::Name(name) => match ( data_type, - map_type(self.s, ident.as_str(), expr.name.as_str())?, + map_type(self.s, name.as_str(), expr.name.as_str())?, ) { (Some(existing), Some(res)) => { if res < existing { @@ -115,9 +112,9 @@ impl<'a> TypeEvaluator<'a> { (None, Some(res)) => data_type = Some(res), _ => continue, }, - MeasurementSelection::Subquery(select) => { + TableReference::Subquery(select) => { // find the field by name - if let Some(field) = field_by_name(select, expr.name.as_str()) { + if let Some(field) = field_by_name(&select.fields, expr.name.as_str()) { match (data_type, evaluate_type(self.s, &field.expr, &select.from)?) { (Some(existing), Some(res)) => { @@ -140,9 +137,6 @@ impl<'a> TypeEvaluator<'a> { } } } - _ => { - return error::internal("eval_var_ref: Unexpected MeasurementSelection") - } } } @@ -252,6 +246,7 @@ fn binary_data_type( #[cfg(test)] mod test { use crate::plan::expr_type_evaluator::{binary_data_type, evaluate_type}; + use crate::plan::rewriter::map_select; use crate::plan::test_utils::{parse_select, MockSchemaProvider}; use assert_matches::assert_matches; use datafusion::common::DataFusionError; @@ -314,51 +309,72 @@ mod test { fn test_evaluate_type() { let namespace = MockSchemaProvider::default(); - let stmt = parse_select("SELECT shared_field0 FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT shared_field0 FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT shared_tag0 FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = + map_select(&namespace, &parse_select("SELECT shared_tag0 FROM temp_01")).unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Tag); // Unknown - let stmt = parse_select("SELECT not_exists FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select(&namespace, &parse_select("SELECT not_exists FROM temp_01")).unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from).unwrap(); assert!(res.is_none()); - let stmt = parse_select("SELECT shared_field0 FROM temp_02"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT shared_field0 FROM temp_02"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT shared_field0 FROM temp_02"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT shared_field0 FROM temp_02"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); // Same field across multiple measurements resolves to the highest precedence (float) - let stmt = parse_select("SELECT shared_field0 FROM temp_01, temp_02"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT shared_field0 FROM temp_01, temp_02"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); // Explicit cast of integer field to float - let stmt = parse_select("SELECT SUM(field_i64::float) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT SUM(field_i64::float) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); @@ -368,15 +384,23 @@ mod test { // Binary expressions // - let stmt = parse_select("SELECT field_f64 + field_i64 FROM all_types"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT field_f64 + field_i64 FROM all_types"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT field_bool | field_bool FROM all_types"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT field_bool | field_bool FROM all_types"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); @@ -385,98 +409,154 @@ mod test { // Fallible // Verify incompatible operators and operator error - let stmt = parse_select("SELECT field_f64 & field_i64 FROM all_types"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT field_f64 & field_i64 FROM all_types"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from); assert_matches!(res, Err(DataFusionError::Plan(ref s)) if s == "incompatible operands for operator &: float and integer"); // data types for functions - let stmt = parse_select("SELECT SUM(field_f64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT SUM(field_f64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT SUM(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT SUM(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT SUM(field_u64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT SUM(field_u64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Unsigned); - let stmt = parse_select("SELECT MIN(field_f64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT MIN(field_f64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT MAX(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT MAX(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT FIRST(field_str) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT FIRST(field_str) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::String); - let stmt = parse_select("SELECT LAST(field_str) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT LAST(field_str) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::String); - let stmt = parse_select("SELECT MEAN(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT MEAN(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT MEAN(field_u64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT MEAN(field_u64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select("SELECT COUNT(field_f64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT COUNT(field_f64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT COUNT(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT COUNT(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT COUNT(field_u64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT COUNT(field_u64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); - let stmt = parse_select("SELECT COUNT(field_str) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT COUNT(field_str) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); @@ -501,8 +581,12 @@ mod test { "holt_winters", "holt_winters_with_fit", ] { - let stmt = parse_select(&format!("SELECT {name}(field_i64) FROM temp_01")); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select(&format!("SELECT {name}(field_i64) FROM temp_01")), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); @@ -510,16 +594,24 @@ mod test { } // Integer functions - let stmt = parse_select("SELECT elapsed(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT elapsed(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Integer); // Invalid function - let stmt = parse_select("SELECT not_valid(field_i64) FROM temp_01"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT not_valid(field_i64) FROM temp_01"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .is_none(); @@ -527,25 +619,34 @@ mod test { // subqueries - let stmt = parse_select("SELECT inner FROM (SELECT field_f64 as inner FROM temp_01)"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select("SELECT inner FROM (SELECT field_f64 as inner FROM temp_01)"), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = - parse_select("SELECT inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)"); - let field = stmt.fields.head().unwrap(); + let stmt = map_select( + &namespace, + &parse_select( + "SELECT inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)", + ), + ) + .unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); assert_matches!(res, VarRefDataType::Float); - let stmt = parse_select( + let stmt = map_select(&namespace, &parse_select( "SELECT shared_tag0, inner FROM (SELECT shared_tag0, field_f64 as inner FROM temp_01)", - ); - let field = stmt.fields.head().unwrap(); + )).unwrap(); + let field = stmt.fields.first().unwrap(); let res = evaluate_type(&namespace, &field.expr, &stmt.from) .unwrap() .unwrap(); diff --git a/iox_query_influxql/src/plan/field.rs b/iox_query_influxql/src/plan/field.rs index 238739be4c..c7cad73ccc 100644 --- a/iox_query_influxql/src/plan/field.rs +++ b/iox_query_influxql/src/plan/field.rs @@ -1,5 +1,5 @@ use influxdb_influxql_parser::expression::{Call, Expr, VarRef}; -use influxdb_influxql_parser::select::{Field, SelectStatement}; +use influxdb_influxql_parser::select::Field; use influxdb_influxql_parser::visit::{Recursion, Visitable, Visitor}; use std::ops::Deref; @@ -40,8 +40,8 @@ pub(crate) fn field_name(f: &Field) -> String { /// This implementation duplicates the behavior of the original implementation, including skipping the /// first argument. It is likely the original intended to skip the _last_ argument, which is the number /// of rows. -pub(crate) fn field_by_name<'a>(select: &'a SelectStatement, name: &str) -> Option<&'a Field> { - select.fields +pub(crate) fn field_by_name<'a>(fields: &'a [Field], name: &str) -> Option<&'a Field> { + fields .iter() .find(|f| { field_name(f) == name || match &f.expr { @@ -128,47 +128,50 @@ mod test { fn test_field_by_name() { let stmt = parse_select("SELECT usage, idle FROM cpu"); assert_eq!( - format!("{}", field_by_name(&stmt, "usage").unwrap()), + format!("{}", field_by_name(&stmt.fields, "usage").unwrap()), "usage" ); let stmt = parse_select("SELECT usage as foo, usage FROM cpu"); assert_eq!( - format!("{}", field_by_name(&stmt, "foo").unwrap()), + format!("{}", field_by_name(&stmt.fields, "foo").unwrap()), "usage AS foo" ); let stmt = parse_select("SELECT top(idle, usage, 5), usage FROM cpu"); assert_eq!( - format!("{}", field_by_name(&stmt, "usage").unwrap()), + format!("{}", field_by_name(&stmt.fields, "usage").unwrap()), "top(idle, usage, 5)" ); let stmt = parse_select("SELECT bottom(idle, usage, 5), usage FROM cpu"); assert_eq!( - format!("{}", field_by_name(&stmt, "usage").unwrap()), + format!("{}", field_by_name(&stmt.fields, "usage").unwrap()), "bottom(idle, usage, 5)" ); let stmt = parse_select("SELECT top(idle, usage, 5) as foo, usage FROM cpu"); assert_eq!( - format!("{}", field_by_name(&stmt, "usage").unwrap()), + format!("{}", field_by_name(&stmt.fields, "usage").unwrap()), "top(idle, usage, 5) AS foo" ); assert_eq!( - format!("{}", field_by_name(&stmt, "foo").unwrap()), + format!("{}", field_by_name(&stmt.fields, "foo").unwrap()), "top(idle, usage, 5) AS foo" ); // Not exists let stmt = parse_select("SELECT usage, idle FROM cpu"); - assert_matches!(field_by_name(&stmt, "bar"), None); + assert_matches!(field_by_name(&stmt.fields, "bar"), None); // Does not match name by first argument to top or bottom, per // bug in original implementation. let stmt = parse_select("SELECT top(foo, usage, 5), idle FROM cpu"); - assert_matches!(field_by_name(&stmt, "foo"), None); - assert_eq!(format!("{}", field_by_name(&stmt, "idle").unwrap()), "idle"); + assert_matches!(field_by_name(&stmt.fields, "foo"), None); + assert_eq!( + format!("{}", field_by_name(&stmt.fields, "idle").unwrap()), + "idle" + ); } } diff --git a/iox_query_influxql/src/plan/ir.rs b/iox_query_influxql/src/plan/ir.rs new file mode 100644 index 0000000000..11547c8342 --- /dev/null +++ b/iox_query_influxql/src/plan/ir.rs @@ -0,0 +1,92 @@ +//! Defines data structures which represent an InfluxQL +//! statement after it has been processed + +use influxdb_influxql_parser::common::{ + LimitClause, MeasurementName, OffsetClause, OrderByClause, QualifiedMeasurementName, + WhereClause, +}; +use influxdb_influxql_parser::expression::ConditionalExpression; +use influxdb_influxql_parser::select::{ + Field, FieldList, FillClause, FromMeasurementClause, GroupByClause, MeasurementSelection, + SelectStatement, TimeZoneClause, +}; + +#[derive(Debug, Default, Clone)] +pub(super) struct Select { + /// The schema of the selection. + // pub(super) schema: Todo, + + /// Projection clause of the selection. + pub(super) fields: Vec, + + /// A list of tables or subqueries used as the source data for the selection. + pub(super) from: Vec, + + /// A conditional expression to filter the selection. + pub(super) condition: Option, + + /// The GROUP BY clause of the selection. + pub(super) group_by: Option, + + /// The [fill] clause specifies the fill behaviour for the selection. If the value is [`None`], + /// it is the same behavior as `fill(none)`. + /// + /// [fill]: https://docs.influxdata.com/influxdb/v1.8/query_language/explore-data/#group-by-time-intervals-and-fill + pub(super) fill: Option, + + /// Configures the ordering of the selection by time. + pub(super) order_by: Option, + + /// A value to restrict the number of rows returned. + pub(super) limit: Option, + + /// A value to specify an offset to start retrieving rows. + pub(super) offset: Option, + + /// The timezone for the query, specified as [`tz('